The Facebook Friends social network dataset is a gml file, containing 362 nodes and 1988 edges.
# Start the timer
t1 <- system.time({
dataset_path <- system.file("extdata", "Facebook_fiends.gml", package = "arlclustering")
if (dataset_path == "") {
stop("Facebook_fiends.gml file not found")
}
g <- arlc_get_network_dataset(dataset_path, "Facebook Friends")
g$graphLabel
g$totalNodes
g$totalEdges
g$averageDegree
})
# Display the total processing time
message("Graph loading Processing Time: ", t1["elapsed"], " seconds\n")
#> Graph loading Processing Time: 0.0379999999999994 seconds
Next, we generate transactions from the graph, with a total rows of 314
We obtain the apriori thresholds for the generated transactions. The following are the thresholds for the apriori execution: - The Minimum Support : 0.04 - The Minimum Confidence : 0.5 - The Lift : 22.42857 - The Gross Rules length : 74748 - The selection Ratio : 238
# Start the timer
t3 <- system.time({
params <- arlc_get_apriori_thresholds(transactions,
supportRange = seq(0.04, 0.05, by = 0.01),
Conf = 0.5)
params$minSupp
params$minConf
params$bestLift
params$lenRules
params$ratio
})
# Display the total processing time
message("Graph loading Processing Time: ", t3["elapsed"], " seconds\n")
#> Graph loading Processing Time: 0.0979999999999999 seconds
We use the obtained parameters to generate gross rules, where we obtain 74748 rules.
# Start the timer
t4 <- system.time({
minLenRules <- 1
maxLenRules <- params$lenRules
if (!is.finite(maxLenRules) || maxLenRules > 5*length(transactions)) {
maxLenRules <- 5*length(transactions)
}
grossRules <- arlc_gen_gross_rules(transactions,
minSupp = params$minSupp,
minConf = params$minConf,
minLenRules = minLenRules+1,
maxLenRules = maxLenRules)
grossRules$TotalRulesWithLengthFilter
})
#> Apriori
#>
#> Parameter specification:
#> confidence minval smax arem aval originalSupport maxtime support minlen
#> 0.5 0.1 1 none FALSE TRUE 5 0.04 2
#> maxlen target ext
#> 1570 rules TRUE
#>
#> Algorithmic control:
#> filter tree heap memopt load sort verbose
#> 0.1 TRUE TRUE FALSE TRUE 2 TRUE
#>
#> Absolute minimum support count: 12
#>
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[342 item(s), 314 transaction(s)] done [0.00s].
#> sorting and recoding items ... [122 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 2 3 4 5 6 7 8 done [0.01s].
#> writing ... [74748 rule(s)] done [0.01s].
#> creating S4 object ... done [0.02s].
We filter out redundant rules from the generated gross rules. Next, we filter out non-significant rules from the non-redundant rules, and we obtain the 10678 rule items.
t5 <- system.time({
NonRedRules <- arlc_get_NonR_rules(grossRules$GrossRules)
NonRSigRules <- arlc_get_significant_rules(transactions,
NonRedRules$FiltredRules)
NonRSigRules$TotFiltredRules
})
# Display the total number of clusters and the total processing time
message("\nClearing rules Processing Time: ", t5["elapsed"], " seconds\n")
#>
#> Clearing rules Processing Time: 0.789999999999999 seconds
We clean the final set of rules to prepare for clustering. Then, we generate clusters based on the cleaned rules. The total identified clusters is 20 clusters.
t6 <- system.time({
cleanedRules <- arlc_clean_final_rules(NonRSigRules$FiltredRules)
clusters <- arlc_generate_clusters(cleanedRules)
clusters$TotClusters
})
# Display the total number of clusters and the total processing time
message("Cleaning final rules Processing Time: ", t6["elapsed"], " seconds\n")
#> Cleaning final rules Processing Time: 0.729000000000001 seconds
Finally, we visualize the identified clusters.
arlc_clusters_plot(g$graph,
g$graphLabel,
clusters$Clusters)
#>
#> Total Identified Clusters: 20
#> =========================
#> Community 01:1 5 26 59 61 92 95 97 112 120 137 147 164 178 204 214 247 253 254 302 312 321 329 355
#> Community 02:10 16 39 45 70 74 84 98 127 134 135 138 140 149 193 216 227 230 234 255 351 360
#> Community 03:24 54 91 100 102 129 131 132 152 165 176 182 215 222 289 295 320 324 341 347
#> Community 04:30 131 132 165 182 289
#> Community 05:34 70 127 140 149 192 227 230 298
#> Community 06:36 39 135 140 149 351 360
#> Community 07:45 70 84 127 134 135 138 140 149 193 227 255 283 351 360
#> Community 08:54 66 69 72 76 100 102 129 131 132 165 169 182 189 203 211 215 222 233 263 289 295 311 320 324 328 341 347
#> Community 09:66 69 72 76 77 99 129 131 132 165 169 182 189 203 211 215 222 263 289 295 311 320 324 341 347
#> Community 10:70 80 84 98 127 134 135 138 140 149 192 193 216 227 230 234 255 256 298 337 351 360
#> Community 11:76 99 100 129 131 132 165 169 177 182 189 203 211 215 222 263 289 295 311 320 324 328 341 347
#> Community 12:77 100 129 131 132 165 182 215 222 289 324 341 347
#> Community 13:107 167 187 231 335
#> Community 14:129 131 132 165 169 177 182 189 203 211 215 222 233 263 289 295 311 320 324 328 341 347
#> Community 15:131 132 152 165 169 176 177 182 189 203 211 215 222 233 263 289 295 311 320 324 328 341 347
#> Community 16:135 138 140 149 192 193 216 227 230 234 255 267 351 360
#> Community 17:149 192 193 216 227 230 234 255 256 298 337 351 356 360
#> Community 18:160 187 335
#> Community 19:165 169 176 177 182 189 203 211 215 222 233 246 263 289 295 311 320 324 328 341 347
#> Community 20:187 231 244 335
#> =========================