The conversim
package provides tools for analyzing
similarity between conversations, with a focus on calculating topic,
lexical, semantic, stylistic, and sentiment similarities. This package
can handle comparisons between two long speeches, a sequence of
conversations in one or multiple dyads. Some utility functions are also
provided that allow researchers to explore and visualize conversational
patterns.
You can install conversim
on CRAN:
install.packages("conversim")
library(conversim)
load(system.file("extdata", "dyad_example_data.Rdata", package = "conversim"))
load(system.file("extdata", "speeches_data.RData", package = "conversim"))
Below are examples of how to use the main functions in the
conversim
package.
# preprocess_text function
<- preprocess_text(speeches_data$text[1])
preprocessed_A <- preprocess_text(speeches_data$text[2])
preprocessed_B
# topic_similarity function
<- topic_similarity(speeches_data$text[1], speeches_data$text[2], method = "lda", num_topics = 5)
lda_similarity <- topic_similarity(speeches_data$text[1], speeches_data$text[2], method = "lsa", num_topics = 5)
lsa_similarity
# lexical_similarity function
<- lexical_similarity(preprocessed_A, preprocessed_B)
lex_similarity
# semantic_similarity function
<- semantic_similarity(speeches_data$text[1], speeches_data$text[2], method = "tfidf")
tfidf_similarity <- semantic_similarity(speeches_data$text[1], speeches_data$text[2], method = "word2vec")
word2vec_similarity
# structural_similarity function
<- structural_similarity(strsplit(speeches_data$text[1], "\n")[[1]], strsplit(speeches_data$text[2], "\n")[[1]])
struct_similarity
# stylistic_similarity function
<- stylistic_similarity(speeches_data$text[1], speeches_data$text[2])
style_similarity
# sentiment_similarity function
<- sentiment_similarity(speeches_data$text[1], speeches_data$text[2]) sent_similarity
# Preprocess the conversations from multiple dyads
<- preprocess_dyads(dyad_example_data)
preprocessed_data
# Select one dyad for comparison
<- preprocessed_data %>% filter(dyad_id == 1) %>% select(speaker_id, processed_text)
conversation
# Calculate topic similarity sequence
<- topic_sim_seq(conversation, method = "lda", num_topics = 2, window_size = 3)
topic_sim
## Lexical Similarity Sequence
<- lex_sim_seq(conversation, window_size = 3)
lexical_sim
## Semantic Similarity Sequence
<- sem_sim_seq(conversation, method = "tfidf", window_size = 3)
semantic_sim
## Stylistic Similarity Sequence
<- style_sim_seq(conversation, window_size = 3)
stylistic_sim
## Sentiment Similarity Sequence
<- sent_sim_seq(conversation, window_size = 3) sentiment_sim
# Preprocess the conversations from multiple dyads
<- preprocess_dyads(dyad_example_data)
preprocessed_data
# Calculate topic similarity for multiple dyads
<- topic_sim_dyads(preprocessed_data, method = "lda", num_topics = 3, window_size = 2)
topic_sim_results
# Calculate lexical similarity for multiple dyads
<- lexical_sim_dyads(preprocessed_data, window_size = 2)
lexical_sim_results
# Calculate semantic similarity for multiple dyads
<- semantic_sim_dyads(preprocessed_data, method = "tfidf", window_size = 2)
semantic_sim_results
# Calculate structural similarity for multiple dyads
<- structural_sim_dyads(preprocessed_data)
structural_sim_results
# Calculate stylistic similarity for multiple dyads
<- stylistic_sim_dyads(preprocessed_data)
stylistic_sim_results
# Calculate sentiment similarity for multiple dyads
<- sentiment_sim_dyads(preprocessed_data)
sentiment_sim_results
# Calculate participant similarity for multiple dyads
<- participant_sim_dyads(preprocessed_data)
participant_sim_results
# Calculate timing similarity for multiple dyads
<- timing_sim_dyads(preprocessed_data) timing_sim_results
For more tutorials, please visit liu-chao.site/conversim