---
title: "Vulkan GPU Backend"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Vulkan GPU Backend}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  eval = FALSE
)
```

## Introduction

ggmlR supports GPU acceleration through the Vulkan backend. Vulkan is a
cross-platform graphics API that works on Windows, Linux, and macOS (via
MoltenVK). This vignette shows how to use the Vulkan backend for accelerated
tensor operations.

## Checking Vulkan Availability

First, check if Vulkan is available on your system:
```{r check-vulkan}
library(ggmlR)

# Check if Vulkan backend is available
if (ggml_vulkan_available()) {
  cat("Vulkan is available!\n")
  cat("Number of Vulkan devices:", ggml_vulkan_device_count(), "\n")
} else {
  cat("Vulkan is not available. Using CPU backend.\n")
}
```

## Listing Available Devices

You can list all available Vulkan devices:
```{r list-devices}
if (ggml_vulkan_available()) {
  devices <- ggml_vulkan_list_devices()
  print(devices)

  # Get detailed info for each device
  for (i in seq_len(ggml_vulkan_device_count())) {
    cat("\nDevice", i - 1, ":\n")
    cat("  Name:", ggml_vulkan_device_description(i - 1), "\n")

    mem <- ggml_vulkan_device_memory(i - 1)
    cat("  Memory:", round(mem / 1024^3, 2), "GB\n")
  }
}
```

## Initializing Vulkan Backend

To use Vulkan for computations:
```{r init-vulkan}
if (ggml_vulkan_available()) {
  # Initialize Vulkan backend (device 0)
  vk <- ggml_vulkan_init(device = 0)

  # Check backend name
  cat("Backend:", ggml_vulkan_backend_name(vk), "\n")

  # Verify it's a Vulkan backend
  stopifnot(ggml_vulkan_is_backend(vk))
}
```

## Basic Computation on GPU

Here's how to perform tensor operations on the GPU:
```{r gpu-compute}
if (ggml_vulkan_available()) {
  # Initialize Vulkan backend
  vk <- ggml_vulkan_init(0)

 # Create scheduler with Vulkan backend
  sched <- ggml_backend_sched_new(list(vk))

  # Create context for tensors
  ctx <- ggml_init(64 * 1024 * 1024)

  # Create tensors
  n <- 10000
  a <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n)
  b <- ggml_new_tensor_1d(ctx, GGML_TYPE_F32, n)

  # Build computation graph
  c <- ggml_add(ctx, a, b)
  d <- ggml_mul(ctx, c, c)
  result <- ggml_sum(ctx, d)

  graph <- ggml_build_forward_expand(ctx, result)

  # Reserve memory and allocate
  ggml_backend_sched_reserve(sched, graph)
  ggml_backend_sched_alloc_graph(sched, graph)

  # Set data
  ggml_set_f32(a, rnorm(n))
  ggml_set_f32(b, rnorm(n))

  # Compute on GPU
  ggml_backend_sched_graph_compute(sched, graph)

  # Get result
  cat("Result:", ggml_get_f32(result), "\n")

  # Cleanup
  ggml_backend_sched_free(sched)
  ggml_vulkan_free(vk)
  ggml_free(ctx)
}
```

## Matrix Multiplication on GPU

Matrix multiplication benefits significantly from GPU acceleration:
```{r matmul-gpu}
if (ggml_vulkan_available()) {
  vk <- ggml_vulkan_init(0)
  sched <- ggml_backend_sched_new(list(vk))
  ctx <- ggml_init(128 * 1024 * 1024)

  # Create matrices
  m <- 1024
  n <- 1024
  k <- 1024

  A <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, k, m)
  B <- ggml_new_tensor_2d(ctx, GGML_TYPE_F32, k, n)

  # Matrix multiplication: C = A * B^T
  C <- ggml_mul_mat(ctx, A, B)

  graph <- ggml_build_forward_expand(ctx, C)
  ggml_backend_sched_reserve(sched, graph)
  ggml_backend_sched_alloc_graph(sched, graph)

  # Initialize with random data
  ggml_set_f32(A, rnorm(m * k))
  ggml_set_f32(B, rnorm(n * k))

  # Time GPU computation
  start <- Sys.time()
  ggml_backend_sched_graph_compute(sched, graph)
  gpu_time <- Sys.time() - start

  cat("GPU matmul time:", round(as.numeric(gpu_time) * 1000, 2), "ms\n")
  cat("Result shape:", ggml_tensor_shape(C), "\n")

  # Cleanup
  ggml_backend_sched_free(sched)
  ggml_vulkan_free(vk)
  ggml_free(ctx)
}
```

## Troubleshooting

### Vulkan Not Available

If `ggml_vulkan_available()` returns `FALSE`:

1. **Check drivers**: Ensure you have up-to-date GPU drivers
2. **Verify Vulkan installation**: Run `vulkaninfo` in terminal
3. **Check device support**: Some integrated GPUs may not support Vulkan

### Performance Tips

1. **Batch operations**: Group multiple operations into a single graph
2. **Use appropriate types**: F16 can be faster on some GPUs
3. **Minimize data transfers**: Keep data on GPU as long as possible
4. **Choose the right device**: Use `ggml_vulkan_device_memory()` to find the
   most capable GPU

## See Also

- `vignette("multi-gpu")` for multi-GPU inference
- `vignette("quantization")` for using quantized models