## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) # get original graphic parameters to be able to # revert back at the end of the vignette original_mfrow <- par()$mfrow original_xpd <- par()$xpd original_mar <- par()$mar original_scipen <- options()$scipen if(exists(".Random.seed", .GlobalEnv) == F) { runif(1) } oldseed <- get(".Random.seed", .GlobalEnv) oldRNGkind <- RNGkind() # set some new parameters for viewing and reproducibility options(scipen = 999) set.seed(123) ## ----setup-------------------------------------------------------------------- library("TDApplied") ## ----echo = F,fig.height = 3,fig.width = 7,fig.align = 'center',eval = requireNamespace("TDAstats")---- par(mfrow = c(1,4)) circ <- TDAstats::circle2d[sample(1:100,50),] plot(x = circ[,1],y = circ[,2],main = "Approximation 1:\nindividual data points",xlab = "",ylab = "",las = 1) plot(x = circ[,1],y = circ[,2],main = "Approximation 2:\nnot a loop",xlab = "",ylab = "",las = 1) for(i in 1:(nrow(circ)-1)) { for(j in (i+1):nrow(circ)) { if(sqrt((circ[i,1]-circ[j,1])^2+(circ[i,2] - circ[j,2])^2) <= 0.2) { lines(c(circ[i,1],circ[j,1]),c(circ[i,2],circ[j,2])) } } } plot(x = circ[,1],y = circ[,2],main = "Approximation 3:\nloop",xlab = "",ylab = "",las = 1) for(i in 1:(nrow(circ)-1)) { for(j in (i+1):nrow(circ)) { if(sqrt((circ[i,1]-circ[j,1])^2+(circ[i,2] - circ[j,2])^2) <= 1) { lines(c(circ[i,1],circ[j,1]),c(circ[i,2],circ[j,2])) } } } plot(x = circ[,1],y = circ[,2],main = "Approximation 4:\nnot a loop",xlab = "",ylab = "",las = 1) for(i in 1:(nrow(circ)-1)) { for(j in (i+1):nrow(circ)) { if(sqrt((circ[i,1]-circ[j,1])^2+(circ[i,2] - circ[j,2])^2) <= 2) { lines(c(circ[i,1],circ[j,1]),c(circ[i,2],circ[j,2])) } } } par(mfrow = c(1,1)) ## ----echo = F,eval = T-------------------------------------------------------- # create the data circ <- data.frame(x = c(-0.815663170207959,-0.595099344842628,0.864889292249816,0.20793951765139,-0.954866650195679,-0.18706988463915,-0.798725432243614,0.830591026518797,-0.661799977235305,0.999878714877245,0.245504222689744,-0.431326750789015,0.666897829439335,0.30720650008886,0.661328840033743,-0.76039885584628,0.239379052558954,-0.351042298907497,-0.829639564028719,0.71216384463492,-0.940714425455924,-0.949907158729059,0.101402707416082,0.986585898475086,0.33772426530395,0.734256674456601,0.771307272532461,0.980540492964891,-0.750935667779769,0.63057972598311,0.75993549565125,-0.997652044265574,-0.199144715289604,-0.996513284779077,0.997484507122883,0.918374523107964,0.54816769740782,0.22028039764542,-0.965072372227865,-0.86565030108487,0.999975356226944,-0.0356376975101777,-0.998775901784022,0.879402540916111,0.389455305917558,-0.73827286958174,0.322030655401438,0.321397994754972,0.943823546436826,-0.565572997267532),y = c(0.578527089051413,-0.803652144754107,-0.501962660116878,0.978141685544025,-0.297034813353725,-0.982346608006102,0.601695673814639,0.55688288415649,-0.749680458683131,0.0155741945354593,0.969395521261319,-0.902195784768357,-0.745149169689602,-0.951642877503506,0.750096104069088,0.649456372690012,-0.970926191425475,-0.936359602064153,-0.558299376498163,0.702013289329204,-0.339199601619947,0.312532222011246,-0.994845460827303,0.163242962880818,0.941245090624598,-0.678871958484023,0.63646295362616,-0.196316941847027,0.660375365119076,-0.776124480466288,-0.649998494190016,0.0684864845989492,-0.979970092590699,0.0834342451204145,-0.0708848224221418,0.395712313816768,0.836368444836729,-0.975436592717935,-0.261983427648545,0.500649134855612,-0.00702046571072753,0.999364775503006,-0.0494641083566077,0.476078954618127,-0.921045365165398,0.674502164592185,-0.946729241642889,0.946944205836586,-0.330449864868202,0.824698238607201)) ## ----echo = F,warning=F,fig.height = 4,fig.width = 4,fig.align = 'center'----- plot(x = circ$x,y = circ$y,xlab = "x",ylab = "y",main = "circ") ## ----echo = T,eval = F-------------------------------------------------------- # # import the ripser module # ripser <- import_ripser() # # # calculate the persistence diagram # diag <- PyH(X = circ,maxdim = 1,thresh = 2,ripser = ripser) # # # view last five rows of the diagram # diag[47:51,] ## ----echo = F,eval = T-------------------------------------------------------- diag <- data.frame(dimension = c(rep(0,50),1),birth = c(rep(0,50),0.5579783),death = c(0.0123064760118723,0.0144490944221616,0.0149910748004913,0.0156172784045339,0.0172923970967531,0.0189713705331087,0.0196240246295929,0.0225948672741652,0.0286996569484472,0.0365069359540939,0.038569450378418,0.0386403761804104,0.0444764532148838,0.0477333702147007,0.0612373314797878,0.0639129132032394,0.0699725300073624,0.0705153122544289,0.0721508488059044,0.0791449695825577,0.0858016163110733,0.0872511267662048,0.0882881134748459,0.0893174782395363,0.0925402045249939,0.0944025367498398,0.0944980531930923,0.099234826862812,0.117955945432186,0.120451688766479,0.126571387052536,0.139067515730858,0.142296731472015,0.148265853524208,0.158034011721611,0.181465998291969,0.188804805278778,0.19113427400589,0.20612421631813,0.21517525613308,0.228875741362572,0.233790531754494,0.235128790140152,0.242270082235336,0.244500055909157,0.245646774768829,0.254552245140076,0.281323730945587,0.288743227720261,2,1.73859250545502)) diag[47:51,] ## ----echo=FALSE,fig.height = 5,fig.width = 5,fig.align = 'center'------------- theta <- stats::runif(n = 100,min = 0,max = 2*pi) x <- cos(theta) y <- sin(theta) origin <- data.frame(x = 0,y = 0) new_data <- rbind(data.frame(x = x,y = y), origin) layout <- as.matrix(new_data) rownames(layout) <- as.character(1:nrow(new_data)) vrs <- vr_graphs(new_data, eps = c(0.0001,1)) plot_vr_graph(vrs, eps = (0.0001), layout = layout, plot_isolated_vertices = TRUE,vertex_labels = FALSE) ## ----------------------------------------------------------------------------- enc_rad <- enclosing_radius(new_data, distance_mat = FALSE) print(enc_rad) ## ----echo = FALSE,fig.height = 5,fig.width = 5,fig.align = 'center'----------- plot_vr_graph(vrs, eps = enc_rad, layout = layout,vertex_labels = F) ## ----echo = T,eval = F-------------------------------------------------------- # # convert TDA diagram into data frame # diag1 <- TDA::ripsDiag(circ,maxdimension = 1,maxscale = 2,library = "dionysus") # diag1_df <- diagram_to_df(diag1) # class(diag1_df) ## ----echo = F----------------------------------------------------------------- c("data.frame") ## ----echo = T,eval = F-------------------------------------------------------- # # convert TDAstats diagram into data frame # diag2 <- TDAstats::calculate_homology(circ,dim = 1,threshold = 2) # diag2_df <- diagram_to_df(diag1) # class(diag2_df) ## ----echo = F----------------------------------------------------------------- c("data.frame") ## ----echo = F,fig.height = 3,fig.width = 7,fig.align = 'center'--------------- D1 = data.frame(dimension = c(0),birth = c(2),death = c(3)) D2 = data.frame(dimension = c(0),birth = c(2,0),death = c(3.3,0.5)) D3 = data.frame(dimension = c(0),birth = c(0),death = c(0.5)) par(mfrow = c(1,3)) plot_diagram(D1,title = "D1",max_radius = 4,legend = F) plot_diagram(D2,title = "D2",max_radius = 4,legend = F) plot_diagram(D3,title = "D3",max_radius = 4,legend = F) ## ----echo = F,fig.height = 3,fig.width = 5,fig.align = 'center'--------------- par(mfrow = c(1,2)) plot(x = c(D1$birth,D2$birth),y = c(D1$death,D2$death),main = "Best matching D1,D2",xlab = "",ylab = "",xlim = c(0,4),ylim = c(0,4)) abline(a = 0,b = 1) lines(c(2,2),c(3,3.3)) lines(c(0,0.25),c(0.5,0.25)) plot(x = c(D1$birth,D3$birth),y = c(D1$death,D3$death),main = "Best matching D1,D3",xlab = "",ylab = "",xlim = c(0,4),ylim = c(0,4)) abline(a = 0,b = 1) lines(c(2,2.5),c(3,2.5)) lines(c(0,0.25),c(0.5,0.25)) par(mfrow = c(1,1)) ## ----echo = F,fig.height = 3,fig.width = 7,fig.align = 'center'--------------- par(mfrow = c(1,3)) x = seq(-4,4,0.01) y = seq(-4,4,0.01) D1_with_diag = rbind(D1,data.frame(dimension = c(0),birth = c(0.25),death = c(0.25))) z1 = outer(x,y,FUN = function(x,y){ # sigma = 1 return((exp(-((x-D1_with_diag[1,2])^2+(y-D1_with_diag[1,3])^2)/(2*1^2)))/sqrt(2*pi*1^2) + (exp(-((x-D1_with_diag[2,2])^2+(y-D1_with_diag[2,3])^2)/(2*1^2)))/sqrt(2*pi*1^2)) }) z1 = z1/sum(z1) image(x = x,y = y,z1,main = "Distribution for D1",xlab = "",xlim = c(-4,4),ylim = c(-4,4),ylab = "") abline(a = 0,b = 1) D3_with_diag = rbind(D3,data.frame(dimension = c(0),birth = c(2.5),death = c(2.5))) z3 = outer(x,y,FUN = function(x,y){ # sigma = 1 return((exp(-((x-D3_with_diag[1,2])^2+(y-D3_with_diag[1,3])^2)/(2*1^2)))/sqrt(2*pi*1^2) + (exp(-((x-D3_with_diag[2,2])^2+(y-D3_with_diag[2,3])^2)/(2*1^2)))/sqrt(2*pi*1^2)) }) z3 = z3/sum(z3) image(x = x,y = y,z3,main = "Distribution for D3",xlab = "",xlim = c(-4,4),ylim = c(-4,4),ylab = "") abline(a = 0,b = 1) image(x = x,y = y,z1-z3,main = "Difference of distributions",xlab = "",xlim = c(-4,4),ylim = c(-4,4),ylab = "") abline(a = 0,b = 1) par(mfrow = c(1,1)) ## ----echo = T----------------------------------------------------------------- # calculate 2-wasserstein distance between D1 and D2 diagram_distance(D1,D2,dim = 0,p = 2,distance = "wasserstein") # calculate 2-wasserstein distance between D1 and D3 diagram_distance(D1,D3,dim = 0,p = 2,distance = "wasserstein") # calculate bottleneck distance between D1 and D2 diagram_distance(D1,D2,dim = 0,p = Inf,distance = "wasserstein") # calculate bottleneck distance between D1 and D3 diagram_distance(D1,D3,dim = 0,p = Inf,distance = "wasserstein") ## ----echo = T----------------------------------------------------------------- # Fisher information metric calculation between D1 and D2 for sigma = 1 diagram_distance(D1,D2,dim = 0,distance = "fisher",sigma = 1) # Fisher information metric calculation between D1 and D3 for sigma = 1 diagram_distance(D1,D3,dim = 0,distance = "fisher",sigma = 1) ## ----echo = T,eval = F-------------------------------------------------------- # # Fisher information metric calculation between D1 and D2 for sigma = 1 # diagram_distance(D1,D2,dim = 0,distance = "fisher",sigma = 1) ## ----echo = F----------------------------------------------------------------- 0.02354779 ## ----echo = T,eval = F-------------------------------------------------------- # # fast approximate Fisher information metric calculation between D1 and D3 for sigma = 1 # diagram_distance(D1,D2,dim = 0,distance = "fisher",sigma = 1,rho = 0.001) ## ----echo = F----------------------------------------------------------------- 0.02354779 ## ----echo = T----------------------------------------------------------------- # calculate the kernel value between D1 and D2 with sigma = 2, t = 2 diagram_kernel(D1,D2,dim = 0,sigma = 2,t = 2) # calculate the kernel value between D1 and D3 with sigma = 2, t = 2 diagram_kernel(D1,D3,dim = 0,sigma = 2,t = 2) ## ----echo = T,fig.height = 5,fig.width = 5,fig.align = 'center'--------------- plot_diagram(diag,title = "Circle diagram") ## ----------------------------------------------------------------------------- # determine significant features circ_result <- universal_null(circ, thresh = enclosing_radius(circ)) circ_result$subsetted_diag ## ----echo = T,fig.height = 5,fig.width = 5,fig.align = 'center',eval = requireNamespace("TDA")---- # create circle with noise dataset and plot circ_with_noise <- circ x_noise <- stats::rnorm(n = 50,sd = 0.1) y_noise <- stats::rnorm(n = 50,sd = 0.1) circ_with_noise$x <- circ_with_noise$x + x_noise circ_with_noise$y <- circ_with_noise$y + y_noise plot(circ_with_noise) # rerun the inference procedure library(TDA) noisy_circ_result <- universal_null(circ_with_noise, FUN_diag = "ripsDiag", thresh = enclosing_radius(circ_with_noise), return_pvals = TRUE) noisy_circ_result$subsetted_diag noisy_circ_result$pvals ## ----eval = requireNamespace("TDA")------------------------------------------- # inference without infinite cycle inference res_non_inf_small_thresh <- universal_null(circ_with_noise, FUN_diag = "ripsDiag", thresh = 0.9) res_non_inf_small_thresh$subsetted_diag # inference with infinite cycle inference res_inf_small_thresh <- universal_null(circ_with_noise, FUN_diag = "ripsDiag", thresh = 0.9, infinite_cycle_inference = TRUE) res_inf_small_thresh$subsetted_diag ## ----echo = F,fig.height = 5,fig.width = 5,fig.align = 'center',eval = requireNamespace("TDAstats") & requireNamespace("TDA")---- par(mfrow = c(1,1)) pt <- as.numeric(diag[which(diag[,1L] == 1),])[2:3] plot_diagram(D = diag,title = "Circ diagram with confidence interval",legend = T,max_radius = 2*0.3953059 + pt[[2]]) graphics::rect(xleft = pt[[1]]-0.3953059,xright = pt[[1]]+0.3953059,ybottom = pt[[2]]-0.3953059,ytop = pt[[2]]+0.3953059) graphics::lines(x = c(pt[[1]],pt[[1]] + 0.3953059),y = c(pt[[2]],pt[[2]])) graphics::lines(x = c(pt[[1]],pt[[1]]),y = c(pt[[2]],pt[[2]] - 0.3953059)) graphics::text(x = c(pt[[1]] + 0.3953059/2,0.4),y = c(1.83,1.55),c("t","t")) ## ----fig.height = 4,fig.width = 8,fig.align = 'center',eval = F--------------- # # calculate the bootstrapped persistence thresholds using 2 cores # # and 30 iterations. We'll use the distance matrix of circ to # # make representative cycles more comprehensible # library("TDA") # thresh <- bootstrap_persistence_thresholds(X = as.matrix(dist(circ)), # FUN_diag = 'ripsDiag', # FUN_boot = 'ripsDiag', # distance_mat = T, # maxdim = 1,thresh = 2,num_workers = 2, # alpha = 0.05,num_samples = 30, # return_subsetted = T,return_pvals = T, # calculate_representatives = T) # diag <- thresh$diag # # # plot original diagram and thresholded diagram side-by-side, including # # p-values. These p-values are the smallest possible (1/31) when there # # are 30 bootstrap iterations # par(mfrow = c(1,2)) # # plot_diagram(diag,title = "Circ diagram") # # plot_diagram(diag,title = "Circ diagram with thresholds", # thresholds = thresh$thresholds) # text(x = c(0.2,0.5),y = c(2,1.8), # paste("p = ",round(thresh$pvals,digits = 3)), # cex = 0.5) ## ----fig.height = 4,fig.width = 8,fig.align = 'center',eval = T,echo = F------ thresh <- readRDS("thresh.rds") diag <- thresh$diag par(mfrow = c(1,2)) plot_diagram(diag,title = "Circ diagram") plot_diagram(diag,title = "Circ diagram with thresholds", thresholds = thresh$thresholds) text(x = c(0.2,0.5),y = c(2,1.8), paste("p = ",round(thresh$pvals,digits = 3)), cex = 0.5) ## ----echo = F----------------------------------------------------------------- par(mfrow = c(1,1)) ## ----echo = T,eval = F-------------------------------------------------------- # # ripser has already been imported, so calculate diagram with representatives # diag_rep <- PyH(circ,maxdim = 1,thresh = 2,ripser = ripser,calculate_representatives = T) # # # identify the loops in the diagram # diag_rep$diagram[which(diag_rep$diagram$dimension == 1),] ## ----echo = F----------------------------------------------------------------- data.frame(dimension = c(1),birth = c(0.5579783),death = c(1.738593),row.names = c("50")) ## ----echo = T,eval = F-------------------------------------------------------- # # show the representative for the loop, just the first five rows # diag_rep$representatives[[2]][[1]][1:5,] ## ----echo = F----------------------------------------------------------------- representative = matrix(data = c(50,42,46,42,50,4,42,29,42,16,50,11,42,7,42,1,50,48,50,25,42,40,46,4,29,4,16,4,46,11,29,11,16,11,7,4,48,46,4,1,11,7,46,25,48,29,50,37,48,16,29,25,11,1,25,16,42,22,48,7,40,4,25,7,48,1,40,11,25,1,50,15,48,40,40,25,50,20,46,37,37,29,37,16,42,34,22,4,42,32,50,27,22,11,37,7,37,1,46,15,29,15,48,22,50,8,43,42,16,15,25,22,46,20,40,37,29,20,15,7,20,16,50,44,15,1,34,4,46,27,32,4,20,7,29,27,34,11,27,16,20,1,32,11,50,36,40,15,42,39,27,7,46,8,48,34,48,32,29,8,43,4,34,25,37,22,27,1,42,5,40,20,16,8,32,25,43,11,42,21,46,44,8,7,44,29,40,27,8,1,44,16,48,43,43,25,22,15,46,36,44,7,50,24,36,29,40,8,36,16,44,1,39,4,22,20,37,34,5,4,37,32,39,11,36,7),nrow = 113,ncol = 2,byrow = T) representative[1:5,] ## ----echo = T,eval = F-------------------------------------------------------- # unique(c(diag_rep$representatives[[2]][[1]][,1],diag_rep$representatives[[2]][[1]][,2])) ## ----echo = F----------------------------------------------------------------- unique(c(representative[,1],representative[,2])) ## ----echo = T,eval = F-------------------------------------------------------- # plot(x = circ$x,y = circ$y,xlab = "x",ylab = "y",main = "circ with representative") # for(i in 1:nrow(circ)) # { # for(j in 1:nrow(circ)) # { # pt1 <- circ[i,] # pt2 <- circ[j,] # if(sqrt((pt1[[1]] - pt2[[1]])^2 + (pt1[[2]] - pt2[[2]])^2) <= 1.7) # { # graphics::lines(x = c(pt1[[1]],pt2[[1]]),y = c(pt1[[2]],pt2[[2]])) # } # } # } # # for(i in 1:nrow(diag_rep$representatives[[2]][[1]])) # { # pt1 <- circ[diag_rep$representatives[[2]][[1]][i,1],] # pt2 <- circ[diag_rep$representatives[[2]][[1]][i,2],] # if(sqrt((pt1[[1]] - pt2[[1]])^2 + (pt1[[2]] - pt2[[2]])^2) <= 1.7) # { # graphics::lines(x = c(pt1[[1]],pt2[[1]]),y = c(pt1[[2]],pt2[[2]]),col = "red") # } # } ## ----echo = F,warning=F,fig.height = 4,fig.width = 4,fig.align = 'center'----- plot(x = circ$x,y = circ$y,xlab = "x",ylab = "y",main = "circ with representative") for(i in 1:nrow(circ)) { for(j in 1:nrow(circ)) { pt1 <- circ[i,] pt2 <- circ[j,] if(sqrt((pt1[[1]] - pt2[[1]])^2 + (pt1[[2]] - pt2[[2]])^2) < 1.7) { graphics::lines(x = c(pt1[[1]],pt2[[1]]),y = c(pt1[[2]],pt2[[2]])) } } } for(i in 1:nrow(representative)) { pt1 <- circ[representative[i,1],] pt2 <- circ[representative[i,2],] if(sqrt((pt1[[1]] - pt2[[1]])^2 + (pt1[[2]] - pt2[[2]])^2) <= 1.7) { graphics::lines(x = c(pt1[[1]],pt2[[1]]),y = c(pt1[[2]],pt2[[2]]),col = "red") } } ## ----echo = F,warning=F,fig.height = 4,fig.width = 4,fig.align = 'center'----- plot(x = circ$x,y = circ$y,xlab = "x",ylab = "y",main = "circ with representative") for(i in 1:nrow(circ)) { for(j in 1:nrow(circ)) { pt1 <- circ[i,] pt2 <- circ[j,] if(sqrt((pt1[[1]] - pt2[[1]])^2 + (pt1[[2]] - pt2[[2]])^2) <= 0.6009) { graphics::lines(x = c(pt1[[1]],pt2[[1]]),y = c(pt1[[2]],pt2[[2]])) } } } for(i in 1:nrow(representative)) { pt1 <- circ[representative[i,1],] pt2 <- circ[representative[i,2],] if(sqrt((pt1[[1]] - pt2[[1]])^2 + (pt1[[2]] - pt2[[2]])^2) <= 0.6009) { graphics::lines(x = c(pt1[[1]],pt2[[1]]),y = c(pt1[[2]],pt2[[2]]),col = "red") } } ## ----echo = T,eval = T-------------------------------------------------------- # get half of loop's birth radius eps_1 <- diag[nrow(diag),2L]/2 # get mean of loop's birth and death radii eps_2 <- (diag[nrow(diag),3L] + diag[nrow(diag),2L])/2 # compute two VR graphs gs <- vr_graphs(X = circ,eps = c(eps_1,eps_2)) ## ----eval = requireNamespace("igraph"),fig.height = 5,fig.width = 5,fig.align = 'center'---- # plot first graph plot_vr_graph(gs,eps_1) # plot second graph layout <- plot_vr_graph(gs,eps_2,return_layout = TRUE) layout <- apply(layout,MARGIN = 2,FUN = function(X){ return(-1 + 2*(X - min(X))/(max(X) - min(X))) }) ## ----eval = requireNamespace('igraph'),echo = F,fig.width = 5,fig.align = 'center'---- # get the stimuli in the loop stimuli_in_loop <- unique(as.numeric(thresh$subsetted_representatives[[2]])) # create colors for the data points, light blue if not in the loop # and red if in the loop colors <- rep("lightblue",50) colors[stimuli_in_loop] <- "red" # plot only component containing the loop stimuli with vertex colors plot_vr_graph(gs,eps_2,cols = colors,component_of = stimuli_in_loop[[1]],layout = layout) ## ----eval = requireNamespace('igraph'),fig.width = 5,fig.align = 'center'----- # plot only component containing the loop stimuli with vertex colors plot_vr_graph(gs,eps_2,cols = colors, component_of = stimuli_in_loop[[1]], vertex_labels = FALSE, layout = layout) # get indices of vertices in loop # not necessary in this case but necessary when we have # removed some vertices from the graph vertex_inds <- match(stimuli_in_loop,as.numeric(rownames(layout))) # add volcano image over loop nodes # image could be anything like rasters read from # png files! this is just an example.. utils::data("volcano") volcano <- (volcano - min(volcano)) / diff(range(volcano)) for(i in vertex_inds) { graphics::rasterImage(volcano,xleft = layout[i,1L] - 0.05, xright = layout[i,1L] + 0.05, ybottom = layout[i,2L] - 0.05, ytop = layout[i,2L] + 0.05) } ## ----echo = F,fig.height = 3,fig.width = 7,fig.align = 'center'--------------- par(mfrow = c(1,3)) plot_diagram(D1,title = "D1",max_radius = 4,legend = F) plot_diagram(D2,title = "D2",max_radius = 4,legend = F) plot_diagram(D3,title = "D3",max_radius = 4,legend = F) ## ----echo = F----------------------------------------------------------------- par(mfrow = c(1,1)) ## ----echo = F----------------------------------------------------------------- generate_TDApplied_vignette_data <- function(num_D1,num_D2,num_D3){ # num_D1 is the number of desired copies of D1, and likewise # for num_D2 and num_D3 # create data D1 = data.frame(dimension = c(0),birth = c(2),death = c(3)) D2 = data.frame(dimension = c(0),birth = c(2,0),death = c(3.3,0.5)) D3 = data.frame(dimension = c(0),birth = c(0),death = c(0.5)) # make noisy copies noisy_copies <- lapply(X = 1:(num_D1 + num_D2 + num_D3),FUN = function(X){ # i stores the number of the data frame to make copies of: # i = 1 is for D1, i = 2 is for D2 and i = 3 is for D3 i <- 1 if(X > num_D1 & X <= num_D1 + num_D2) { i <- 2 } if(X > num_D1 + num_D2) { i <- 3 } # store correct data in noisy_copy noisy_copy <- get(paste0("D",i)) # add Gaussian noise to birth and death values n <- nrow(noisy_copy) noisy_copy$dimension <- as.numeric(as.character(noisy_copy$dimension)) noisy_copy$birth <- noisy_copy$birth + stats::rnorm(n = n,mean = 0,sd = 0.05) noisy_copy$death <- noisy_copy$death + stats::rnorm(n = n,mean = 0,sd = 0.05) # make any birth values which are less than 0 equal 0 noisy_copy[which(noisy_copy$birth < 0),2] <- 0 # make any birth values which are greater than their death values equal their death values noisy_copy[which(noisy_copy$birth > noisy_copy$death),2] <- noisy_copy[which(noisy_copy$birth > noisy_copy$death),3] return(noisy_copy) }) # return list containing num_D1 noisy copies of D1, then # num_D2 noisy copies of D2, and finally num_D3 noisy copies # of D3 return(noisy_copies) } ## ----echo = F,fig.height = 5,fig.width = 5,fig.align = 'center'--------------- par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) noisy_copies_D1 = generate_TDApplied_vignette_data(2,0,0) cols = factor(c("D1","D1\'","D2\'\'"),levels = c("D1","D1\'","D1\'\'")) plot(x = c(D1$birth,noisy_copies_D1[[1]]$birth,noisy_copies_D1[[2]]$birth),y = c(D1$death,noisy_copies_D1[[1]]$death,noisy_copies_D1[[2]]$death),main = "D1 and noisy copies",xlab = "",ylab = "",xlim = c(0,4),ylim = c(0,4),col = c("black","red","blue"),bty = "L") lines(x = c(-0.15,4.15),y = c(-0.15,4.15)) legend("topright", inset=c(-0.2,0), legend=levels(cols), pch=16, col=c("black","red","blue")) ## ----echo = T----------------------------------------------------------------- # permutation test between three diagrams g1 <- generate_TDApplied_vignette_data(3,0,0) g2 <- generate_TDApplied_vignette_data(0,3,0) g3 <- generate_TDApplied_vignette_data(0,0,3) perm_test <- permutation_test(g1,g2,g3, num_workers = 2, dims = c(0)) perm_test$p_values ## ----echo = T----------------------------------------------------------------- # create 10 noisy copies of D1 and D2 g1 <- generate_TDApplied_vignette_data(10,0,0) g2 <- generate_TDApplied_vignette_data(0,10,0) # do independence test with sigma = t = 1 indep_test <- independence_test(g1,g2,dims = c(0),num_workers = 2) indep_test$p_values ## ----echo = F,fig.height = 3,fig.width = 6,fig.align = 'center'--------------- # plot par(mfrow = c(1,2)) plot(x = circ[,1],y = circ[,2],main = "circ",xlab = "",ylab = "",las = 1) theta <- stats::runif(n = 50,min = 0,max = 2*pi) circ2 <- data.frame(x = cos(theta),y = sin(theta)) plot(x = circ2[,1],y = circ2[,2],main = "circ2",xlab = "",ylab = "",las = 1) ## ----------------------------------------------------------------------------- mod_comp <- permutation_model_inference(circ, circ2, iterations = 20, thresh = 2, num_samples = 3, num_workers = 2L) mod_comp$p_values ## ----echo = F,fig.height = 3,fig.width = 6,fig.align = 'center'--------------- # plot par(mfrow = c(1,2)) plot(x = circ[,1],y = circ[,2],main = "circ",xlab = "",ylab = "",las = 1) circ_noise <- data.frame(x = circ[,1] + rnorm(50,sd = 0.01),y = circ[,2] + rnorm(50,sd = 0.01)) plot(x = circ_noise[,1],y = circ_noise[,2],main = "circ_noise",xlab = "",ylab = "",las = 1) ## ----------------------------------------------------------------------------- mod_comp_paired <- permutation_model_inference(circ, circ_noise, iterations = 20, thresh = 2, num_samples = 3, paired = TRUE, num_workers = 2L) mod_comp_paired$p_values ## ----eval = F----------------------------------------------------------------- # # in this case we are creating 3 samples of the rows of circ # # (and equivalently the other two datasets) # boot_samples <- lapply(X = 1:3, # FUN = function(X){return(unique(sample(1:nrow(circ),size = nrow(circ),replace = TRUE)))}) # # # carry out three model comparisons between circ, circ2 and circ3 # mod_comp_1_2 <- permutation_model_inference(circ, circ2, iterations = 20, # thresh = 2, num_samples = 3, # paired = TRUE, num_workers = 2L, # samp = boot_samples) # mod_comp_1_3 <- permutation_model_inference(circ, circ3, iterations = 20, # thresh = 2, num_samples = 3, # paired = TRUE, num_workers = 2L, # samp = boot_samples) # mod_comp_2_3 <- permutation_model_inference(circ2, circ3, iterations = 20, # thresh = 2, num_samples = 3, # paired = TRUE, num_workers = 2L, # samp = boot_samples) ## ----echo = T----------------------------------------------------------------- # create noisy copies of D1, D2 and D3 g <- generate_TDApplied_vignette_data(3,3,3) # calculate kmeans clusters with centers = 3, and sigma = t = 0.1 clust <- diagram_kkmeans(diagrams = g,centers = 3,dim = 0,t = 0.1,sigma = 0.1,num_workers = 2) # display cluster labels clust$clustering@.Data ## ----echo = T----------------------------------------------------------------- # create nine new diagrams g_new <- generate_TDApplied_vignette_data(3,3,3) # predict cluster labels predict_diagram_kkmeans(new_diagrams = g_new,clustering = clust,num_workers = 2) ## ----echo = T,fig.height = 3,fig.width = 6,fig.align = 'center'--------------- # create 9 diagrams based on D1, D2 and D3 g <- generate_TDApplied_vignette_data(3,3,3) # calculate their 2D MDS embedding in dimension 0 with the bottleneck distance mds <- diagram_mds(diagrams = g,dim = 0,p = Inf,k = 2,num_workers = 2) # plot par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) plot(mds[,1],mds[,2],xlab = "Embedding coordinate 1",ylab = "Embedding coordinate 2", main = "MDS plot",col = as.factor(rep(c("D1","D2","D3"),each = 3)),bty = "L") legend("topright", inset=c(-0.2,0), legend=levels(as.factor(c("D1","D2","D3"))), pch=16, col=unique(as.factor(c("D1","D2","D3")))) ## ----echo = T,fig.height = 3,fig.width = 6,fig.align = 'center'--------------- # create noisy copies of D1, D2 and D3 g <- generate_TDApplied_vignette_data(3,3,3) # calculate their 2D PCA embedding with sigma = t = 2 pca <- diagram_kpca(diagrams = g,dim = 0,t = 2,sigma = 2,features = 2,num_workers = 2) # plot par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) plot(pca$pca@rotated[,1],pca$pca@rotated[,2],xlab = "Embedding coordinate 1", ylab = "Embedding coordinate 2",main = "PCA plot", col = as.factor(rep(c("D1","D2","D3"),each = 3))) legend("topright",inset = c(-0.2,0), legend=levels(as.factor(c("D1","D2","D3"))), pch=16, col=unique(as.factor(c("D1","D2","D3")))) ## ----echo = T,fig.height = 3,fig.width = 6,fig.align = 'center'--------------- # create nine new diagrams g_new <- generate_TDApplied_vignette_data(3,3,3) # project new diagrams onto old model new_pca <- predict_diagram_kpca(new_diagrams = g_new,embedding = pca,num_workers = 2) # plot par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) plot(new_pca[,1],new_pca[,2],xlab = "Embedding coordinate 1", ylab = "Embedding coordinate 2",main = "PCA prediction plot", col = as.factor(rep(c("D1","D2","D3"),each = 3))) legend("topright",inset = c(-0.2,0), legend=levels(as.factor(c("D1","D2","D3"))), pch=16, col=unique(as.factor(c("D1","D2","D3")))) ## ----echo = T----------------------------------------------------------------- # create thirty noisy copies of D1, D2 and D3 g <- generate_TDApplied_vignette_data(10,10,10) # create response vector y <- as.factor(rep(c("D1","D2","D3"),each = 10)) # fit model with cross validation model_svm <- diagram_ksvm(diagrams = g,cv = 2,dim = c(0), y = y,sigma = c(1,0.1),t = c(1,2), num_workers = 2) ## ----echo = T----------------------------------------------------------------- # create nine new diagrams g_new <- generate_TDApplied_vignette_data(3,3,3) # predict predict_diagram_ksvm(new_diagrams = g_new,model = model_svm,num_workers = 2) ## ----echo = F----------------------------------------------------------------- # reset parameters par(mfrow = original_mfrow,xpd = original_xpd,mar = original_mar) options(scipen = original_scipen) do.call("RNGkind",as.list(oldRNGkind)) assign(".Random.seed", oldseed, .GlobalEnv)