## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup-------------------------------------------------------------------- library(tlda) ## ----echo=FALSE--------------------------------------------------------------- set.seed(8) tdm_excerpt <- biber150_ice_gb[sample(2:151, size = 10), sample(1:500, 8)] tdm_excerpt ## ----eval=FALSE--------------------------------------------------------------- # addmargins(tdm, margin = 1) ## ----------------------------------------------------------------------------- biber150_ice_gb[1:10, 1:8] ## ----------------------------------------------------------------------------- disp( subfreq = biber150_ice_gb[2,], # row 2 in the TDM represents "a" partsize = biber150_ice_gb[1,] # row 1 in the TDM contains the part sizes ) ## ----------------------------------------------------------------------------- speaker_word_count <- biber150_spokenBNC2014["word_count",] subfreq_actually <- biber150_spokenBNC2014["actually",] ## ----------------------------------------------------------------------------- disp( subfreq = subfreq_actually, partsize = speaker_word_count ) ## ----------------------------------------------------------------------------- disp( subfreq = subfreq_actually, partsize = speaker_word_count, directionality = "gries" ) ## ----------------------------------------------------------------------------- disp( subfreq = subfreq_actually, partsize = speaker_word_count, freq_adjust = TRUE ) ## ----------------------------------------------------------------------------- disp_R( subfreq = subfreq_actually, partsize = speaker_word_count, type = "relative_withsize" ) ## ----------------------------------------------------------------------------- disp_DP( subfreq = subfreq_actually, partsize = speaker_word_count, formula = "gries_2008" ) ## ----------------------------------------------------------------------------- compare_DPs <- rbind( disp_DP(subfreq = subfreq_actually, partsize = speaker_word_count, formula = "gries_2008", verbose = FALSE, print_score = FALSE), disp_DP(subfreq = subfreq_actually, partsize = speaker_word_count, formula = "lijffijt_gries_2012", verbose = FALSE, print_score = FALSE), disp_DP(subfreq = subfreq_actually, partsize = speaker_word_count, formula = "egbert_etal_2020", verbose = FALSE, print_score = FALSE )) rownames(compare_DPs) <- c( "Gries (2008)", "Lijffijt & Gries (2012)", "Egbert et al. (2020)" ) compare_DPs ## ----------------------------------------------------------------------------- disp_DA( subfreq = subfreq_actually, partsize = speaker_word_count, procedure = "shortcut" ) ## ----------------------------------------------------------------------------- compare_DAs <- rbind( disp_DA(subfreq = subfreq_actually, partsize = speaker_word_count, procedure = "basic", verbose = FALSE, print_score = FALSE), disp_DA(subfreq = subfreq_actually, partsize = speaker_word_count, procedure = "shortcut", verbose = FALSE, print_score = FALSE), disp_DA(subfreq = subfreq_actually, partsize = speaker_word_count, procedure = "shortcut_mod", verbose = FALSE, print_score = FALSE )) rownames(compare_DAs) <- c( "Basic procedure", "Shortcut", "Shortcut (modified)" ) compare_DAs ## ----------------------------------------------------------------------------- disp_DKL( subfreq = subfreq_actually, partsize = speaker_word_count, standardization = "base_e" ) ## ----------------------------------------------------------------------------- compare_DKLs <- rbind( disp_DKL(subfreq = subfreq_actually, partsize = speaker_word_count, standardization = "o2p", verbose = FALSE, print_score = FALSE), disp_DKL(subfreq = subfreq_actually, partsize = speaker_word_count, standardization = "base_e", verbose = FALSE, print_score = FALSE), disp_DKL(subfreq = subfreq_actually, partsize = speaker_word_count, standardization = "base_2", verbose = FALSE, print_score = FALSE )) rownames(compare_DKLs) <- c( "Odds-to-probability", "Base e", "Base 2" ) compare_DKLs ## ----------------------------------------------------------------------------- DM_ice_gb <- disp_tdm( tdm = biber150_ice_gb, row_partsize = "first", print_score = FALSE, verbose = FALSE) ## ----------------------------------------------------------------------------- DM_ice_gb <- data.frame(DM_ice_gb) ## ----------------------------------------------------------------------------- round(DM_ice_gb[1:10,], 2) ## ----fig.width=3.5, fig.height=2.5, warning=FALSE, message=FALSE, fig.align='center', out.width="35%"---- par(mar = c(4, 4, 1, 0.3), xpd = TRUE) hist( DM_ice_gb$DP, main = NULL, xlab = "DP", xlim = c(0,1), breaks = seq(0,1,.05), col = "grey60") ## ----fig.width=3.5, fig.height=2.5, warning=FALSE, message=FALSE, fig.align='center', out.width="35%"---- par(mar = c(4, 4, 1, 0.3), xpd = TRUE) hist( DM_ice_gb$D, main = NULL, xlab = "DP", xlim = c(0,1), breaks = seq(0,1,.05), col = "grey60") ## ----fig.width=3.5, fig.height=2.5, warning=FALSE, message=FALSE, fig.align='center', out.width="35%"---- par(mar = c(4, 4, 1, 0.3), xpd = TRUE) hist( DM_ice_gb$D2, main = NULL, xlab = "DP", xlim = c(0,1), breaks = seq(0,1,.05), col = "grey60") ## ----fig.width=5, fig.height=5, fig.align='center'---------------------------- pairs(DM_ice_gb, gap = 0, cex = .5, cex.labels = 1) ## ----------------------------------------------------------------------------- DM_ice_gb$frequency <- rowSums(biber150_ice_gb[-1,]) ## ----fig.width=3, fig.height=2.5, warning=FALSE, message=FALSE, fig.align='center', out.width="35%"---- par(mar = c(4, 4, 1, 0.3), xpd = TRUE) plot( DM_ice_gb$DP ~ log(DM_ice_gb$frequency), xlab = "Log frequency", ylab = "DP", ylim = c(0,1)) ## ----------------------------------------------------------------------------- cor( DM_ice_gb$DP, log(DM_ice_gb$frequency), method = "spearman", use = "complete.obs") ## ----------------------------------------------------------------------------- DM_ice_gb_nofreq <- disp_tdm( tdm = biber150_ice_gb, row_partsize = "first", freq_adjust = TRUE, freq_adjust_method = "even", print_score = FALSE, verbose = FALSE) DM_ice_gb_nofreq <- data.frame(DM_ice_gb_nofreq) DM_ice_gb_nofreq$frequency <- rowSums(biber150_ice_gb[-1,]) str(DM_ice_gb_nofreq) ## ----fig.width=3, fig.height=2.5, warning=FALSE, message=FALSE, fig.align='center', out.width="35%"---- oldpar <- par(mar = c(5.1, 4.1, 4.1, 2.1)) par(mar = c(4, 4, 1, 0.3), xpd = TRUE) plot( DM_ice_gb_nofreq$DP_nofreq ~ log(DM_ice_gb_nofreq$frequency), xlab = "Log frequency", ylab = "DP", ylim = c(0,1)) par(oldpar) ## ----------------------------------------------------------------------------- cor( DM_ice_gb_nofreq$DP_nofreq, log(DM_ice_gb_nofreq$frequency), method = "spearman", use = "complete.obs")