## ----------------------------------------------------------------------------- library(nTARP) ## ----------------------------------------------------------------------------- data(iris) head(iris) ## ----------------------------------------------------------------------------- table(iris$Species) ## ----------------------------------------------------------------------------- set.seed(123532) #random seed for reproducibility cluster_solution <- nTARP(data = iris[,1:4], number_of_projections = 1000, withinss_threshold = 0.36 ) ## ----------------------------------------------------------------------------- cluster_solution$OptimalSolution paste("The optimal within sum of squares was", round(cluster_solution$OptimalWithinss,2)) ## ----------------------------------------------------------------------------- print("This is the random vector associated with the best cluster.") cluster_solution$OptimalProjection print("This is the boundary where the clusters transition.") cluster_solution$Threshold print("The *direction* tells us which cluster has the larger mean, so in this case, points with values larger than the threshold should be assigned to cluster 2.") cluster_solution$Direction ## ----------------------------------------------------------------------------- observation_to_classify <- iris[1,1:4] observation_after_projection <- sum(observation_to_classify * cluster_solution$OptimalProjection) print("The resulting 1-D projection is...") observation_after_projection paste("Our direction, ", cluster_solution$Direction,", tells us that cluster 2 has a mean bigger than the threshold, so since the projected value ", round(observation_after_projection,2)," is larger than the threshold ",round(cluster_solution$Threshold,2)," we should assign it to cluster 2.", sep="") ## ----------------------------------------------------------------------------- hist(cluster_solution$AllWithinss) ## ----------------------------------------------------------------------------- one_feasible_solution <- list(cluster_solution$Clusterings[[1]],cluster_solution$Clusterings[[2]]) one_feasible_solution ## ----------------------------------------------------------------------------- length(cluster_solution$Clusterings)/2 ## ----------------------------------------------------------------------------- set.seed(123532) #random seed for reproducibility cluster_solution_bisecting <- nTARP_bisecting(data = iris[,1:4], number_of_projections = 1000, withinss_threshold = 0.36, minimum_cluster_size_percent = 20 #default value ) ## ----------------------------------------------------------------------------- cluster_solution_bisecting$BestClusters ## ----------------------------------------------------------------------------- labeled_clusters <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting$BestClusters, ids = 1:nrow(iris)) labeled_clusters[sample(1:150,10,replace = FALSE),] #We'll take a sampling of the observations ## ----------------------------------------------------------------------------- table(labeled_clusters$FinalClusterID,iris$Species) ## ----------------------------------------------------------------------------- labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters$FinalClusterID,iris$Species) ## ----------------------------------------------------------------------------- #Merge clusters two more times labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters, first_cluster_to_combine = 1, second_cluster_to_combine = 2) labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters$FinalClusterID,iris$Species) ## ----------------------------------------------------------------------------- set.seed(123532) #random seed for reproducibility cluster_solution_bisecting_contextual <- nTARP_bisecting(data = iris[,1:4], number_of_projections = 1000, withinss_threshold = 0.36, minimum_cluster_size_percent = 20, contextual_variable = iris$Species ) labeled_clusters <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_contextual$BestClusters, ids = 1:nrow(iris)) ## ----------------------------------------------------------------------------- labeled_clusters_with_context <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_contextual$BestClusters, ids = 1:nrow(iris), contextual_variables_df = iris) head(labeled_clusters_with_context) ## ----------------------------------------------------------------------------- table(labeled_clusters$FinalClusterID,iris$Species) ## ----------------------------------------------------------------------------- labeled_clusters <- consolidate_clusters(cluster_path_matrix = labeled_clusters, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters$FinalClusterID,iris$Species) ## ----------------------------------------------------------------------------- library(HDclassif) data(wine) head(wine) wine[,2:14] <- scale(wine[,2:14]) ## ----------------------------------------------------------------------------- table(wine$class) ## ----------------------------------------------------------------------------- set.seed(123532) #random seed for reproducibility cluster_solution_bisecting_wine <- nTARP_bisecting(data = wine[,2:14], number_of_projections = 100^2, withinss_threshold = 0.36, minimum_cluster_size_percent = 20 #default value ) labeled_clusters_wine <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_wine$BestClusters, ids = 1:nrow(wine)) table(labeled_clusters_wine$FinalClusterID,wine$class) ## ----------------------------------------------------------------------------- #Merge clusters two more times print("This is the result of combining clusters 1 and 2, during which 3-6 were relabled to be 1-4 with the merged cluster being labeled 5.") labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters_wine$FinalClusterID,wine$class) #Merge 1 and 2 again print("This is the result of combining clusters 3 and 4 (that were relabeled 1 and 2 in the last step). Now there are 4 clusters left.") labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters_wine$FinalClusterID,wine$class) #Merge 1 and 2 again print("We combine that last two clusters, giving us 3 clusters at the end.") labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters_wine$FinalClusterID,wine$class) ## ----------------------------------------------------------------------------- set.seed(123532) #random seed for reproducibility cluster_solution_bisecting_wine <- nTARP_bisecting(data = wine[,2:14], number_of_projections = 100^2, withinss_threshold = 0.36, minimum_cluster_size_percent = 20, contextual_variable = wine$class ) labeled_clusters_wine <- build_solution_from_labeled_clusters(nTARP_best_clusters= cluster_solution_bisecting_wine$BestClusters, ids = 1:nrow(wine)) table(labeled_clusters_wine$FinalClusterID,wine$class) ## ----------------------------------------------------------------------------- #Merge clusters two more times print("This is the result of combining clusters 1 and 2, during which 3-5 were relabled to be 1-3 with the merged cluster being labeled 4.") labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters_wine$FinalClusterID,wine$class) #Merge 1 and 2 again print("We combine that last two clusters, giving us 3 clusters at the end.") labeled_clusters_wine <- consolidate_clusters(cluster_path_matrix = labeled_clusters_wine, first_cluster_to_combine = 1, second_cluster_to_combine = 2) table(labeled_clusters_wine$FinalClusterID,wine$class)