## ----include = FALSE----------------------------------------------------------
library(quickSentiment)


## ----setup--------------------------------------------------------------------
library(doParallel)
# CRAN limits the number of cores used during package checks
cores <- min(2, parallel::detectCores())
registerDoParallel(cores = cores)

## -----------------------------------------------------------------------------
# Look for the file in the installed package first
csv_path <- system.file("extdata", "tweets.csv", package = "quickSentiment")

# Fallback for when you are building the package locally
if (csv_path == "") {
  csv_path <- "../inst/extdata/tweets.csv"
}
tweets <- read.csv(csv_path)
set.seed(123)

## -----------------------------------------------------------------------------
tweets$cleaned_text <- pre_process(tweets$Tweet)
tweets$sentiment = ifelse(tweets$Avg>0,'P','N')

## -----------------------------------------------------------------------------
result <- pipeline(
  # --- Define the vectorization method ---
  # Options: "bow" (raw counts), "tf" (term frequency), "tfidf"
  vect_method = "tf",
  
  # --- Define the model to train ---
  # Options: "logit", "rf", "xgb"
  model_name = "rf",
  
  # --- Specify the data and column names ---
  df = tweets,
  text_column_name = "cleaned_text",      # The column with our preprocessed text
  sentiment_column_name = "sentiment",    # The column with the target variable
  
  # --- Set vectorization options ---
  # Use n_gram = 2 for unigrams + bigrams, or 1 for just unigrams
  n_gram = 1
)

## -----------------------------------------------------------------------------

tweets$sentimentPredict <- prediction(
  pipeline_object = result,
  df = tweets,
  text_column = "cleaned_text"
)