--- title: "ARLClustering - Testing WordAdjacency dataset" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{ARLClustering - Testing WordAdjacency dataset} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(arlclustering) #library(igraph) ``` ## Dataset description The Word Adjacency network dataset is provided as a gml file, containing 77 nodes and 254 edges. ## Loading network dataset - Graph Label : Word Adjacency Network - Total Nodes : 425 - Total Edges : 112 - Average Degree : 7.589286 ```{r} # Start the timer t1 <- system.time({ dataset_path <- system.file("extdata", "word_adjacencies.gml", package = "arlclustering") if (dataset_path == "") { stop("word_adjacencies.gml file not found") } g <- arlc_get_network_dataset(dataset_path, "Word Adjacency") g$graphLabel g$totalEdges g$totalNodes g$averageDegree }) # Display the total processing time message("Graph loading Processing Time: ", t1["elapsed"], " seconds\n") ``` ## Generate Transactions Next, we generate transactions from the graph, with a total rows of 102 ```{r} # Start the timer t2 <- system.time({ transactions <- arlc_gen_transactions(g$graph) transactions }) # Display the total processing time message("Transaction dataset Processing Time: ", t2["elapsed"], " seconds\n") ``` ## Get Apriori Thresholds We obtain the apriori thresholds for the generated transactions. The following are the thresholds for the apriori execution: - The Minimum Support : 0.03 - The Minimum Confidence : 0.5 - The Lift : 20.4 - The Gross Rules length : 649 - The selection Ratio : 6 ```{r} # Start the timer t3 <- system.time({ params <- arlc_get_apriori_thresholds(transactions, supportRange = seq(0.03, 0.04, by = 0.01), Conf = 0.5) params$minSupp params$minConf params$bestLift params$lenRules params$ratio }) # Display the total processing time message("Graph loading Processing Time: ", t3["elapsed"], " seconds\n") ``` ## Generate Gross Rules We use the obtained parameters to generate gross rules, where we obtain 649 rules. ```{r} # Start the timer t4 <- system.time({ minLenRules <- 1 maxLenRules <- params$lenRules if (!is.finite(maxLenRules) || maxLenRules > 5*length(transactions)) { maxLenRules <- 5*length(transactions) } grossRules <- arlc_gen_gross_rules(transactions, minSupp = params$minSupp, minConf = params$minConf, minLenRules = minLenRules+1, maxLenRules = maxLenRules) #grossRules$TotalRulesWithLengthFilter }) # Display the total number of clusters and the total processing time message("Gross rules generation Time: ", t4["elapsed"], " seconds\n") ``` ## Filter Significant and Non-Redundant Rules We filter out redundant rules from the generated gross rules. Next, we filter out non-significant rules from the non-redundant rules, and we obtain the 528 rule items. ```{r} t5 <- system.time({ NonRedRules <- arlc_get_NonR_rules(grossRules$GrossRules) NonRSigRules <- arlc_get_significant_rules(transactions, NonRedRules$FiltredRules) #NonRSigRules$TotFiltredRules }) # Display the total number of clusters and the total processing time message("\nClearing rules Processing Time: ", t5["elapsed"], " seconds\n") ``` ## Clean and genarate final Rules We clean the final set of rules to prepare for clustering. Then, we generate clusters based on the cleaned rules. The total identified clusters is 20 clusters. ```{r} t6 <- system.time({ cleanedRules <- arlc_clean_final_rules(NonRSigRules$FiltredRules) clusters <- arlc_generate_clusters(cleanedRules) #clusters$TotClusters }) # Display the total number of clusters and the total processing time message("Cleaning final rules Processing Time: ", t6["elapsed"], " seconds\n") message("The total comsumed time is:",t1["elapsed"]+ t2["elapsed"]+t3["elapsed"]+t4["elapsed"]+t5["elapsed"]+t6["elapsed"], "seconds\n") ``` ## Plot Clusters Finally, we visualize the identified clusters. ```{r} arlc_clusters_plot(g$graph, g$graphLabel, clusters$Clusters) ```