## ----echo=FALSE, results="hide"----------------------------------------------- opt <- options(digits=3) set.seed(1) ## ----eval=FALSE--------------------------------------------------------------- # # Install the package from a remote repository. # install.packages("Allspice") ## ----------------------------------------------------------------------------- # Activate the library. library("Allspice") packageVersion("Allspice") ls("package:Allspice") ## ----eval=FALSE--------------------------------------------------------------- # # Access function documentation (not shown in vignette). # ? Allspice::classifier ## ----eval=FALSE--------------------------------------------------------------- # # Run all code examples (not shown in vignette). # fn <- system.file("examples.R", package = "Allspice") # source(fn) ## ----------------------------------------------------------------------------- # Generate gene RNA read counts. simu <- bcellALL(300) print(simu$counts[1:5,1:6]) print(simu$metadata[1:6,]) ## ----------------------------------------------------------------------------- # Set up a classifier for genetic B-cell ALL subtypes. cls <- classifier() ## ----------------------------------------------------------------------------- # List covariates. info <- information(cls) print(info$covariates[,c("ASSET","TITLE","COVAR")]) ## ----------------------------------------------------------------------------- # Set covariates. covariates(cls) <- simu$metadata ## ----------------------------------------------------------------------------- # Load RNA-seq profiles. profiles(cls) <- simu$counts ## ----------------------------------------------------------------------------- # Prediction results. pred <- predictions(cls) primary <- pred[[1]] print(primary[1:6,c("LABEL","FREQ","PROX","EXCL")]) ## ----------------------------------------------------------------------------- # Prediction results. ambig <- which(primary$CATEG == "Ambiguous") uncla <- which(primary$CATEG == "Unclassified") rows <- unique(c(1:5, ambig[1], uncla[1])) print(primary[rows,c("LABEL","MATCH","FREQ","PROX","EXCL")]) ## ----------------------------------------------------------------------------- # Access subtype labelling information. info <- information(cls) print(info$categories[1:5,c("ASSET","TITLE","CATEG","LABEL")]) ## ----------------------------------------------------------------------------- # Select successful classification. rows <- which((primary$CATEG != "Ambiguous") & (primary$FREQ > 0.9)) print(primary[rows[1],c("LABEL","MATCH","FREQ","PROX","EXCL")]) ## ----results="hide", fig.width=8, fig.height=6, fig.align="center", fig.cap="Figure: Classification results. The predicted subtype label is written on the top left corner. The expected frequency of the subtype given the data profile is written as a percentage after the predicted label. RNA biomarker scores were also calculated for the presence of alterations in specific genes (center bar chart), note that a patient may have multiple driver genes in parallel. The right-most chart shows RNA biomarker scores for the predicted source tissue of the sample (in this simulated dataset, all samples were generated from B-cell ALL profiles). "---- # Show patient report. report(cls, name = rows[1], file = NULL) ## ----------------------------------------------------------------------------- # Select ambiguous classification. rows <- which((primary$CATEG == "Ambiguous") & (primary$PROX > 0.9)) print(primary[rows[1],c("LABEL","MATCH","FREQ","PROX","EXCL")]) ## ----results="hide", fig.width=8, fig.height=6, fig.align="center", fig.cap="Figure: Classification results for a case with mixed transcriptional characteristics."---- # Show patient report. report(cls, name = rows[1], file = NULL) ## ----------------------------------------------------------------------------- # Select poor quality samples. rows <- which(primary$CATEG == "Unclassified") print(primary[rows[1],c("LABEL","MATCH","FREQ","PROX","EXCL")]) ## ----results="hide", fig.width=8, fig.height=6, fig.align="center", fig.cap="Figure: Classification results for a case with atypical data."---- # Show patient report. report(cls, name = rows[1], file = NULL) ## ----------------------------------------------------------------------------- # Create a new empty asset. bALL <- asset() print(configuration(bALL)) ## ----------------------------------------------------------------------------- # Re-configure asset. configuration(bALL) <- c(ninput.max=30, nonzero.min=90) print(configuration(bALL)[c("ninput.max","nonzero.min")]) ## ----------------------------------------------------------------------------- # Prepare asset title. materials <- list(title="Simutypes") ## ----------------------------------------------------------------------------- # Prepare RNA-seq read counts. materials$dat <- simu$counts ## ----------------------------------------------------------------------------- # Prepare covariate data. materials$covariates <- simu$metadata[,c("MALE","AGE")] ## ----------------------------------------------------------------------------- # Prepare subtype information. categ <- simu$metadata[,"SUBTYPE",drop=FALSE] rows <- which(categ != "Contaminated") materials$bits <- categ[rows,,drop=FALSE] ## ----------------------------------------------------------------------------- # Assemble the classification asset. bALL <- asset() assemble(bALL) <- materials ## ----------------------------------------------------------------------------- # Save asset to disk. tpath <- tempfile() export(bALL, folder = tpath) ## ----------------------------------------------------------------------------- # Create a classifier. clstest <- classifier(tpath) ## ----------------------------------------------------------------------------- # Classify samples. simutest <- bcellALL(5) covariates(clstest) <- simutest$metadata profiles(clstest) <- simutest$counts primtest <- predictions(clstest)[[1]] print(primtest[,c("LABEL","MATCH","FREQ","PROX","EXCL")]) ## ----------------------------------------------------------------------------- # Show correct subtypes. print(simutest$metadata) ## ----------------------------------------------------------------------------- # Iris flower dataset. print(head(iris)) ## ----------------------------------------------------------------------------- # Set row names. flowers <- iris rownames(flowers) <- paste0("flower", 1:nrow(flowers)) print(flowers[c(1,80,150),]) ## ----------------------------------------------------------------------------- # Prepare training set. materials <- list(title="Iris species") materials$dat <- t(flowers[,1:4]) # vars on rows, samples on columns materials$bits <- flowers[,"Species",drop=FALSE] ## ----------------------------------------------------------------------------- # Set human-readable category labels. model <- asset() labels <- c("Iris Setosa", "Iris Virginica", "Iris Versicolor") names(labels) <- c("setosa", "virginica", "versicolor") visuals(model) <- labels ## ----------------------------------------------------------------------------- # Configure a new asset. configuration(model) <- c(norm=FALSE, logarithm=FALSE) configuration(model) <- c(nonzero.min=0, nonzero.ratio=0) print(configuration(model)) ## ----------------------------------------------------------------------------- # Assemble the classification asset. assemble(model) <- materials tpath <- tempfile() export(model, folder = tpath) ## ----------------------------------------------------------------------------- # Classify samples. clsiris <- classifier(tpath) profiles(clsiris) <- t(flowers[,1:4]) iristest <- predictions(clsiris)[[1]] print(iristest[c(1,80,150),c("LABEL","MATCH","PROX","EXCL")]) ## ----------------------------------------------------------------------------- # Summary of results. print(table(iristest$LABEL, flowers$Species)) ## ----results="hide"----------------------------------------------------------- # Default ALL classifier. cls <- classifier() ## ----------------------------------------------------------------------------- # Predict source tissue. simu <- bcellALL(5) covariates(cls) <- simu$metadata profiles(cls) <- simu$counts tissues <- predictions(cls)[[3]] print(tissues[,c("LABEL","CATEG","MATCH","MATCH.2nd")]) ## ----------------------------------------------------------------------------- # Show asset contents. base <- system.file(package = "Allspice") folder <- file.path(base, "subtypes") print(dir(folder)) ## ----------------------------------------------------------------------------- # Category information. dat <- read.delim(file.path(folder, "categories.txt")) print(dat) ## ----------------------------------------------------------------------------- # Standardized subtype profiles. dat <- read.delim(file.path(folder, "centroids.txt")) print(dat[1:5,1:6]) cat(nrow(dat), " genes, ", ncol(dat), " subtypes\n", sep="") ## ----------------------------------------------------------------------------- # Regression coefficients. dat <- read.delim(file.path(folder, "coefficients.txt")) print(dat[1:5,]) ## ----------------------------------------------------------------------------- # Asset settings. dat <- read.delim(file.path(folder, "configuration.txt")) print(dat) ## ----------------------------------------------------------------------------- # Covariate statistics. dat <- read.delim(file.path(folder, "covariates.txt")) print(dat) ## ----------------------------------------------------------------------------- # Gene names. dat <- read.delim(file.path(folder, "nomenclature.txt")) print(dat[1:5,]) ## ----------------------------------------------------------------------------- # RNA reference profile. dat <- read.delim(file.path(folder, "reference.txt")) print(dat[1:5,]) cat(nrow(dat), " genes\n", sep="") ## ----echo=FALSE--------------------------------------------------------------- sessionInfo() Sys.time() ## ----echo=FALSE, results="hide"----------------------------------------------- options(opt)