--- title: "Keras-like API in ggmlR" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Keras-like API in ggmlR} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r setup, include=FALSE} knitr::opts_chunk$set(eval = TRUE) ``` ggmlR provides a high-level Keras-like interface for building and training neural networks on top of the ggml C tensor library. This vignette walks through the main APIs using the built-in `iris` and `mtcars` datasets. ## Installation ```{r} # install.packages("ggmlR") # once on CRAN library(ggmlR) ``` --- ## 1. Sequential API The sequential API mirrors `keras.Sequential`: stack layers with `|>`, compile, fit, evaluate, predict. ### 1.1 Prepare data — iris (3-class classification) ```{r} data(iris) set.seed(42) x <- as.matrix(iris[, 1:4]) x <- scale(x) # standardise # one-hot encode species y <- model.matrix(~ Species - 1, iris) # [150 x 3] idx <- sample(nrow(x)) n_tr <- 120L x_tr <- x[idx[1:n_tr], ] y_tr <- y[idx[1:n_tr], ] x_val <- x[idx[(n_tr+1):150], ] y_val <- y[idx[(n_tr+1):150], ] ``` ### 1.2 Build and compile ```{r} model <- ggml_model_sequential() |> ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> ggml_layer_batch_norm() |> ggml_layer_dropout(0.3, stochastic = TRUE) |> ggml_layer_dense(16L, activation = "relu") |> ggml_layer_dense(3L, activation = "softmax") |> ggml_compile( optimizer = "adam", loss = "categorical_crossentropy", metrics = c("accuracy") ) print(model) ``` ### 1.3 Train ```{r} model <- ggml_fit( model, x_tr, y_tr, epochs = 100L, batch_size = 32L, validation_split = 0.0, # we supply our own val set below verbose = 0L ) ``` ### 1.4 Evaluate and predict ```{r} score <- ggml_evaluate(model, x_val, y_val, batch_size = 16L) cat(sprintf("Val loss: %.4f Val accuracy: %.4f\n", score$loss, score$accuracy)) probs <- ggml_predict(model, x_val, batch_size = 16L) classes <- apply(probs, 1, which.max) true <- apply(y_val, 1, which.max) cat("Confusion matrix:\n") print(table(true = true, predicted = classes)) ``` ### 1.5 Save and reload ```{r} path <- tempfile(fileext = ".rds") ggml_save_model(model, path) cat(sprintf("Saved: %s (%.1f KB)\n", path, file.size(path) / 1024)) model2 <- ggml_load_model(path) score2 <- ggml_evaluate(model2, x_val, y_val, batch_size = 16L) cat(sprintf("Reloaded model accuracy: %.4f\n", score2$accuracy)) ``` --- ## 2. Functional API The functional API lets you build DAGs with multiple inputs/branches/outputs. Use `ggml_input()` to declare inputs, pipe through layers, then call `ggml_model()`. ### 2.1 Single-input DAG — residual-style network on iris ```{r} inp <- ggml_input(shape = 4L) h <- inp |> ggml_layer_dense(32L, activation = "relu") |> ggml_layer_batch_norm() h <- h |> ggml_layer_dense(16L, activation = "relu") out <- h |> ggml_layer_dense(3L, activation = "softmax") model_fn <- ggml_model(inputs = inp, outputs = out) |> ggml_compile(optimizer = "adam", loss = "categorical_crossentropy", metrics = c("accuracy")) model_fn <- ggml_fit(model_fn, x_tr, y_tr, epochs = 100L, batch_size = 32L, verbose = 0L) score_fn <- ggml_evaluate(model_fn, x_val, y_val, batch_size = 16L) cat(sprintf("Functional model accuracy: %.4f\n", score_fn$accuracy)) ``` ### 2.2 Multi-input model — mtcars regression Predict `mpg` from two input groups: engine features and transmission features. ```{r} data(mtcars) set.seed(7) # Input group 1: engine (disp, hp, wt) # Input group 2: transmission / gearbox (cyl, gear, carb, am) engine <- as.matrix(scale(mtcars[, c("disp","hp","wt")])) trans <- as.matrix(scale(mtcars[, c("cyl","gear","carb","am")])) y_mpg <- matrix(scale(mtcars$mpg), ncol = 1L) # [32 x 1] # small dataset — use all for training, evaluate on same data for demo x1 <- engine; x2 <- trans inp1 <- ggml_input(shape = 3L, name = "engine") inp2 <- ggml_input(shape = 4L, name = "transmission") branch1 <- inp1 |> ggml_layer_dense(16L, activation = "relu") branch2 <- inp2 |> ggml_layer_dense(16L, activation = "relu") merged <- ggml_layer_add(list(branch1, branch2)) # element-wise add out_reg <- merged |> ggml_layer_dense(8L, activation = "relu") |> ggml_layer_dense(1L) model_reg <- ggml_model(inputs = list(inp1, inp2), outputs = out_reg) |> ggml_compile(optimizer = "adam", loss = "mse") model_reg <- ggml_fit(model_reg, x = list(x1, x2), y = y_mpg, epochs = 200L, batch_size = 16L, verbose = 0L) preds <- ggml_predict(model_reg, x = list(x1, x2), batch_size = 16L) cat(sprintf("Pearson r (scaled mpg): %.4f\n", cor(preds, y_mpg))) ``` --- ## 3. Callbacks Callbacks plug into `ggml_fit()` via the `callbacks` argument. ### 3.1 Early stopping ```{r} cb_stop <- ggml_callback_early_stopping( monitor = "val_loss", patience = 15L, min_delta = 1e-4 ) model_cb <- ggml_model_sequential() |> ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> ggml_layer_dense(3L, activation = "softmax") |> ggml_compile(optimizer = "adam", loss = "categorical_crossentropy") model_cb <- ggml_fit(model_cb, x_tr, y_tr, epochs = 300L, batch_size = 32L, validation_split = 0.1, callbacks = list(cb_stop), verbose = 0L) ``` ### 3.2 Learning-rate schedulers ```{r} # Cosine annealing cb_cosine <- ggml_schedule_cosine_decay(T_max = 100L, eta_min = 1e-5) # Step decay: halve LR every 30 epochs cb_step <- ggml_schedule_step_decay(step_size = 30L, gamma = 0.5) # Reduce on plateau cb_plateau <- ggml_schedule_reduce_on_plateau( monitor = "val_loss", factor = 0.5, patience = 10L, min_lr = 1e-6 ) model_lr <- ggml_model_sequential() |> ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |> ggml_layer_dense(3L, activation = "softmax") |> ggml_compile(optimizer = "adam", loss = "categorical_crossentropy") model_lr <- ggml_fit(model_lr, x_tr, y_tr, epochs = 150L, batch_size = 32L, validation_split = 0.1, callbacks = list(cb_cosine), verbose = 0L) ``` --- ## 4. Autograd Engine (`ag_*`) The autograd engine provides PyTorch-style dynamic computation graphs. Tensors are **column-major**: shape is `[features, batch]`. ### 4.1 Prepare data (iris, col-major) ```{r} # transpose: rows = features, cols = samples x_tr_ag <- t(x_tr) # [4, 120] y_tr_ag <- t(y_tr) # [3, 120] x_val_ag <- t(x_val) # [4, 30] y_val_ag <- t(y_val) ``` ### 4.2 Build a model with `ag_sequential` ```{r} ag_mod <- ag_sequential( ag_linear(4L, 32L, activation = "relu"), ag_batch_norm(32L), ag_dropout(0.3), ag_linear(32L, 16L, activation = "relu"), ag_linear(16L, 3L) ) params <- ag_mod$parameters() opt <- optimizer_adam(params, lr = 1e-3) ``` ### 4.3 Training loop ```{r} BS <- 32L n <- ncol(x_tr_ag) ag_train(ag_mod) set.seed(42) for (ep in seq_len(150L)) { perm <- sample(n) for (b in seq_len(ceiling(n / BS))) { idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))] xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) yb <- y_tr_ag[, idx, drop = FALSE] with_grad_tape({ loss <- ag_softmax_cross_entropy_loss(ag_mod$forward(xb), yb) }) grads <- backward(loss) opt$step(grads) opt$zero_grad() } if (ep %% 50L == 0L) cat(sprintf("epoch %d loss %.4f\n", ep, loss$data[1])) } ``` ### 4.4 Inference and accuracy ```{r} ag_eval(ag_mod) # forward in chunks, apply softmax manually ag_predict_cm <- function(model, x_cm, chunk = 64L) { n <- ncol(x_cm) out <- matrix(0.0, nrow(model$forward(ag_tensor(x_cm[,1,drop=FALSE]))$data), n) for (s in seq(1L, n, by = chunk)) { e <- min(s + chunk - 1L, n) lg <- model$forward(ag_tensor(x_cm[, s:e, drop = FALSE]))$data ev <- exp(lg - apply(lg, 2, max)) out[, s:e] <- ev / colSums(ev) } out } probs_ag <- ag_predict_cm(ag_mod, x_val_ag) # [3, 30] pred_ag <- apply(probs_ag, 2, which.max) true_ag <- apply(y_val_ag, 1, which.max) # col-major: rows = classes acc_ag <- mean(pred_ag == true_ag) cat(sprintf("Autograd val accuracy: %.4f\n", acc_ag)) ``` ### 4.5 LR scheduler + gradient clipping ```{r} ag_mod2 <- ag_sequential( ag_linear(4L, 64L, activation = "relu"), ag_batch_norm(64L), ag_dropout(0.3), ag_linear(64L, 32L, activation = "relu"), ag_linear(32L, 3L) ) params2 <- ag_mod2$parameters() opt2 <- optimizer_adam(params2, lr = 1e-3) sch2 <- lr_scheduler_cosine(opt2, T_max = 150L, lr_min = 1e-5) ag_train(ag_mod2) set.seed(42) for (ep in seq_len(150L)) { perm <- sample(n) for (b in seq_len(ceiling(n / BS))) { idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))] xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) yb <- y_tr_ag[, idx, drop = FALSE] with_grad_tape({ loss2 <- ag_softmax_cross_entropy_loss(ag_mod2$forward(xb), yb) }) grads2 <- backward(loss2) clip_grad_norm(params2, grads2, max_norm = 5.0) opt2$step(grads2) opt2$zero_grad() } sch2$step() } ag_eval(ag_mod2) probs2 <- ag_predict_cm(ag_mod2, x_val_ag) acc2 <- mean(apply(probs2, 2, which.max) == true_ag) cat(sprintf("ag + cosine + clip val accuracy: %.4f\n", acc2)) ``` ### 4.6 Manual model from raw `ag_param` For full control, build a network directly from parameters: ```{r} make_net <- function(n_in, n_hidden, n_out) { W1 <- ag_param(matrix(rnorm(n_hidden * n_in) * sqrt(2/n_in), n_hidden, n_in)) b1 <- ag_param(matrix(0.0, n_hidden, 1L)) W2 <- ag_param(matrix(rnorm(n_out * n_hidden) * sqrt(2/n_hidden), n_out, n_hidden)) b2 <- ag_param(matrix(0.0, n_out, 1L)) list( forward = function(x) ag_add(ag_matmul(W2, ag_relu(ag_add(ag_matmul(W1, x), b1))), b2), parameters = function() list(W1=W1, b1=b1, W2=W2, b2=b2) ) } set.seed(1) net <- make_net(4L, 32L, 3L) opt_r <- optimizer_adam(net$parameters(), lr = 1e-3) for (ep in seq_len(200L)) { perm <- sample(n) for (b in seq_len(ceiling(n / BS))) { idx <- perm[seq((b-1L)*BS+1L, min(b*BS, n))] xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE]) yb <- y_tr_ag[, idx, drop = FALSE] with_grad_tape({ loss_r <- ag_softmax_cross_entropy_loss(net$forward(xb), yb) }) gr <- backward(loss_r) opt_r$step(gr); opt_r$zero_grad() } } probs_r <- ag_predict_cm(net, x_val_ag) acc_r <- mean(apply(probs_r, 2, which.max) == true_ag) cat(sprintf("Raw ag_param val accuracy: %.4f\n", acc_r)) ``` --- ## 5. GPU / device selection ```{r} # Use Vulkan GPU if available, fall back to CPU device <- tryCatch({ ag_device("gpu") "gpu" }, error = function(e) "cpu") cat("Running on:", device, "\n") # Mixed precision (f16 on GPU, f32 on CPU) ag_dtype(if (device == "gpu") "f16" else "f32") ``` After calling `ag_device()` and `ag_dtype()`, all subsequent `ag_param` and `ag_tensor` calls use the selected device and dtype. The sequential API (`ggml_fit`) picks up the backend automatically. --- ## API summary | Task | Sequential / Functional | Autograd | |------|------------------------|----------| | Build model | `ggml_model_sequential()` + `ggml_layer_*` | `ag_sequential()` or raw `ag_param` | | Functional DAG | `ggml_input()` → `ggml_model()` | — | | Compile | `ggml_compile()` | manual optimizer | | Train | `ggml_fit()` | `with_grad_tape` + `backward` + `opt$step` | | Evaluate | `ggml_evaluate()` | manual loop | | Predict | `ggml_predict()` | manual forward | | Save / load | `ggml_save_model()` / `ggml_load_model()` | — | | Callbacks | `ggml_callback_early_stopping()`, `ggml_schedule_*` | `lr_scheduler_*`, `clip_grad_norm` | | Device | automatic | `ag_device()` / `ag_dtype()` |