## ---- include = FALSE---------------------------------------------------------
library(knitr)
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
options(rmarkdown.html_vignette.check_title = FALSE) # to supress R-CMD check

## to fold/hook the code
hook_output <- knit_hooks$get("output")
knit_hooks$set(output = function(x, options) {
  lines <- options$output.lines
  if (is.null(lines)) {
    return(hook_output(x, options))  # pass to default hook
  }
  x <- unlist(strsplit(x, "\n"))
  more <- "..."
  if (length(lines) == 1) {
    if (length(x) > lines) {
      # truncate the output, but add ....
      x <- c(head(x, lines), more)
    }
  } else {
    x <- c(if (abs(lines[1]) > 1) more else NULL,
           x[lines],
           if (length(x) > lines[abs(length(lines))]) more else NULL
    )
  }
  # paste these lines together
  x <- paste(c(x, ""), collapse = "\n")
  hook_output(x, options)
})

modern_r <- getRversion() >= "4.1.0"

## ----setup, message = F, warning = F------------------------------------------
library(injurytools)
library(dplyr)
library(stringr)
library(tidyr)
library(lme4)
library(pscl)
# library(glmmTMB)
library(MASS)
library(ggplot2)
library(gridExtra)
library(knitr)

## ----setup2, echo = F---------------------------------------------------------
# set the global theme of all plots
theme_set(theme_bw())

## ----datasetup1, warning = F--------------------------------------------------
## 17/18
df_exposures1718 <- prepare_exp(df_exposures0 = 
                                  raw_df_exposures |> filter(season == "17/18"),
                                player        = "player_name",
                                date          = "year",
                                time_expo     = "minutes_played") |> 
  mutate(seasonb = date2season(date))
df_injuries1718 <- prepare_inj(df_injuries0   =
                                 raw_df_injuries |> filter(season == "17/18"),
                               player         = "player_name",
                               date_injured   = "from",
                               date_recovered = "until")
injd1718 <- prepare_all(data_exposures = df_exposures1718,
                        data_injuries  = df_injuries1718,
                        exp_unit = "matches_minutes")

## ----datasetup2, warning = F--------------------------------------------------
## 18/19
df_exposures1819 <- prepare_exp(df_exposures0 = 
                                  raw_df_exposures |> filter(season == "18/19"),
                                player        = "player_name",
                                date          = "year",
                                time_expo     = "minutes_played") |> 
  mutate(seasonb = date2season(date))
df_injuries1819 <- prepare_inj(df_injuries0   = 
                                 raw_df_injuries |> filter(season == "18/19"),
                               player         = "player_name",
                               date_injured   = "from",
                               date_recovered = "until")
injd1819 <- prepare_all(data_exposures = df_exposures1819,
                        data_injuries  = df_injuries1819,
                        exp_unit = "matches_minutes")

## ---- warning = F-------------------------------------------------------------
## calculate injury summary statistics
injds1718 <- injsummary(injd1718, quiet = T)
injds1819 <- injsummary(injd1819, quiet = T)

injds1718p <- injds1718$playerwise
injds1819p <- injds1819$playerwise

injdsp <- bind_rows("Season 17-18" = injds1718p,
                    "Season 18-19" = injds1819p,
                    .id = "season")

## ---- eval = F----------------------------------------------------------------
#  ## plot
#  p1 <- ggplot(data = injdsp) +
#    geom_histogram(aes(x = injincidence, fill = season)) +
#    facet_wrap(~season) +
#    scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
#    ylab("Number of players") +
#    xlab("Incidence (number of injuries per 100 player-match)") +
#    ggtitle("Histogram of injury incidence in each season") +
#    theme(legend.position = "none")
#  
#  p2 <- ggplot(data = injdsp) +
#    geom_histogram(aes(x = injburden, fill = season)) +
#    facet_wrap(~season) +
#    scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
#    ylab("Number of players") +
#    xlab("Burden (number of days lost due to injury per 100 player-match)") +
#    ggtitle("Histogram of injury burden in each season") +
#    theme(legend.position = "none")
#  
#  grid.arrange(p1, p2, ncol = 1)

## ---- echo = F, warning = F---------------------------------------------------
p1 <- ggplot(data = injdsp) + 
  geom_histogram(aes(x = injincidence, fill = season)) + 
  facet_wrap(~season) +
  scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
  ylab("Number of players") + 
  xlab("Incidence (number of injuries per 100 player-match)") +
  ggtitle(bquote("Histogram of injury" ~ bold("incidence") ~ "in each season")) + 
  theme(legend.position = "none")

p2 <- ggplot(data = injdsp) + 
  geom_histogram(aes(x = injburden, fill = season)) + 
  facet_wrap(~season) +
  scale_fill_manual(name = "", values = c("#E7B800", "#2E9FDF")) +
  ylab("Number of players") + 
  xlab("Burden (number of days lost due to injury per 100 player-match)") +
  ggtitle(bquote("Histogram of injury" ~ bold("burden") ~ "in each season")) + 
  theme(legend.position = "none")

## ---- eval = F----------------------------------------------------------------
#  theme_counts <- theme(axis.text = element_text(size = rel(1.2)),
#                        axis.title = element_text(size = rel(1.3)),
#                        strip.text = element_text(size = rel(1.4)),
#                        plot.title = element_text(size = rel(1.4)),
#                        legend.text = element_text(size =  rel(1.3)),
#                        legend.title = element_text(size = rel(1.3)))
#  p1 <- p1 + theme_counts
#  p2 <- p2 + theme_counts

## ---- warning = F, message = F, echo = F, fig.width = 10, fig.height = 6.8----
theme_counts <- theme(axis.text = element_text(size = rel(1.2)),
                      axis.title = element_text(size = rel(1.3)),
                      strip.text = element_text(size = rel(1.4)),
                      plot.title = element_text(size = rel(1.4)),
                      legend.text = element_text(size =  rel(1.3)),
                      legend.title = element_text(size = rel(1.3)))
p1 <- p1 + theme_counts
p2 <- p2 + theme_counts

grid.arrange(p1, p2, ncol = 1)

## -----------------------------------------------------------------------------
## 17/18
df_exposures1718 <- prepare_exp(df_exposures0 = 
                                  raw_df_exposures |> filter(season == "17/18"),
                                player        = "player_name",
                                date          = "year",
                                time_expo     = "minutes_played") |> 
  mutate(seasonb = date2season(date))

injds1718p <- injds1718p |> 
  mutate(seasonb = "2017/2018") |> 
  ## join to have info, such as position, age, citizenship etc.
  left_join(df_exposures1718, by = c("player" = "player", 
                                     "seasonb" = "seasonb")) |> 
  ## create positionb column 
  ## (so that the categories are: Attack, Defender, Goalkeeper and Midfield)
  mutate(positionb = factor(str_split_i(position, "_", 1)))

## -----------------------------------------------------------------------------
## quit Goalkeepers
injds1718p <- dplyr::filter(injds1718p, positionb != "Goalkeeper") |> 
  droplevels()

## -----------------------------------------------------------------------------
incidence_glm_pois <- glm(ninjuries ~ positionb, # + offset(log(totalexpo))
                          offset = log(totalexpo),
                          data = injds1718p,
                          family = poisson)

## ---- eval = F----------------------------------------------------------------
#  # incidence_glm_pois2 <- glmmTMB(formula = ninjuries ~ foot,
#  #                                offset = log(totalexpo),
#  #                                family = poisson(), data = injds1718p)
#  # summary(incidence_glm_pois2)

## -----------------------------------------------------------------------------
df_exposures <- prepare_exp(df_exposures0 = raw_df_exposures,
                                player        = "player_name",
                                date          = "year",
                                time_expo     = "minutes_played") |> 
  mutate(seasonb = date2season(date))

injdsp <- injdsp |> 
  mutate(seasonb = if_else(season == "Season 17-18", "2017/2018", "2018/2019")) |> 
  ## join to have info, such as position, age, citizenship etc.
  left_join(df_exposures, by = c("player" = "player",
                                 "seasonb" = "seasonb")) |> 
  ## create positionb column 
  ## (so that the categories are: Attack, Defender, Goalkeeper and Midfield)
  mutate(positionb = factor(str_split_i(position, "_", 1))) |> 
  droplevels()

## ---- eval = F----------------------------------------------------------------
#  incidence_glmm_pois <- glmer(formula = ninjuries ~ positionb + (1 | player),
#                               offset = log(totalexpo),
#                               data = injdsp,
#                               family = poisson)
#  # incidence_glmm_pois2 <- glmmTMB::glmmTMB(formula = ninjuries ~ positionb + (1 | player),
#  #                                          offset = log(totalexpo),
#  #                                          data = injdsp,
#  #                                          family = poisson)

## -----------------------------------------------------------------------------
burden_glm_pois <- glm(ndayslost ~ positionb, offset = log(totalexpo), ## or ~ foot
                       data = injds1718p,
                       family = poisson)

## -----------------------------------------------------------------------------
summary(burden_glm_pois)

## -----------------------------------------------------------------------------
cbind(estimate = exp(coef(burden_glm_pois)) * c(90*100, 1, 1), 
      exp(confint(burden_glm_pois)) * c(90*100, 1, 1)) |> # (to report per 100 player-matches)
  kable()

## ---- warning = F-------------------------------------------------------------
burden_glm_nb <- glm.nb(ndayslost ~ positionb + offset(log(totalexpo)),
                           data = injds1718p)

## -----------------------------------------------------------------------------
summary(burden_glm_nb)

## -----------------------------------------------------------------------------
burden_zinfpois <- zeroinfl(ndayslost ~ positionb | positionb,
                             offset = log(totalexpo),
                             data = injds1718p,
                             link = "logit",
                             dist = "poisson",
                             trace = FALSE, EM = FALSE)
## Or:
# burden_zinfpois <- glmmTMB::glmmTMB(formula = ndayslost ~ 1 +  positionb,
#                                     offset = log(totalexpo),
#                                     ziformula = ~ 1 + positionb,
#                                     data = injds1718p,
#                                     family = poisson)

## -----------------------------------------------------------------------------
summary(burden_zinfpois)

## -----------------------------------------------------------------------------
burden_zinfnb <- zeroinfl(ndayslost ~ positionb | positionb,
                             offset = log(totalexpo),
                             data = injds1718p,
                             link = "logit",
                             dist = "negbin",
                             trace = FALSE, EM = FALSE)
## Or:
# burden_zinfnb <- glmmTMB::glmmTMB(ndayslost ~ 1 + positionb, offset = log(totalexpo),
#                                      ziformula = ~ 1 + positionb,
#                                      data = injds1718p,
#                                      family = nbinom2)

## -----------------------------------------------------------------------------
summary(burden_zinfnb)

## -----------------------------------------------------------------------------
## pois
## predprob: for each subj, prob of observing each value
phat_pois <- predprob(burden_glm_pois) 
phat_pois_mn <- apply(phat_pois, 2, mean) 
## nb
phat_nb <- predprob(burden_glm_nb)           
phat_nb_mn <- apply(phat_nb, 2, mean) 
## zinfpois
phat_zinfpois <- predprob(burden_zinfpois)            
phat_zinfpois_mn <- apply(phat_zinfpois, 2, mean) 
## zinfnb
phat_zinfnb <- predprob(burden_zinfnb)           
phat_zinfnb_mn <- apply(phat_zinfnb, 2, mean) 

## put in a data frame
idx <- seq(1, 62, length.out = 30)
df_probs <- data.frame(phat_pois_mn     = phat_pois_mn[idx],
                       phat_nb_mn       = phat_nb_mn[idx],
                       phat_zinfpois_mn = phat_zinfpois_mn[idx],
                       phat_zinfnb_mn   = phat_zinfnb_mn[idx], 
                       x= idx) |> 
  tidyr::gather(key = "prob_type", value = "value", -x) |> 
  mutate(prob_type = factor(prob_type))

## ---- fig.width = 9, fig.height = 4.8-----------------------------------------
ggplot(data = injds1718p) + 
  geom_histogram(aes(x = injburden/100, after_stat(density)), 
                 breaks = seq(-0.5, 62, length.out = 30),
                 col = "black", alpha = 0.5) +
  geom_point(data = df_probs, aes(x = x, y = value, 
                                  group = prob_type, col = prob_type)) + 
  geom_line(data = df_probs, aes(x = x, y = value, 
                                 group = prob_type, col = prob_type)) + 
  ylim(c(0, 0.3)) + xlab("Injury burden") + ylab("Density") +
  scale_color_manual(name = "Model:",
                     labels = c("Negative Binomial", "Poisson",
                                "Zero-Inflated Negative Binomial",
                                "Zero-Inflated Poisson"),
                     values = c("darkblue", "chocolate", "purple", "red")) +
  ggtitle("Histogram of injury burden in 2017/2018\nwith conditional Poisson, NB, ZIP and ZINB Densities") +
  theme_counts +
  theme(legend.position = c(0.7, 0.7))

## ---- echo = F, eval = F------------------------------------------------------
#  ## using base R
#  with(injds1718p, {
#    hist(ndayslost, prob = TRUE, breaks = seq(-0.5, 316.5, length.out = 30),
#         xlab = "Injury burden (number of injuries per player-season)",
#         main = "Histogram of overall injury burden\nwith conditional Poisson, NB, ZIP and ZINB Densities")
#    lines(x = idx, y = phat_pois_mn[idx], type = "b", lwd = 2, col = "black")
#    lines(x = idx, y = phat_nb_mn[idx], type = "b", lwd = 2, col = "purple")
#    lines(x = idx, y = phat_zinfpois_mn[idx], type = "b", lwd = 2, col = "red")
#    lines(x = idx, y = phat_zinfnb_mn[idx], type = "b", lwd = 2, col = "blue")
#  })
#  legend(250, 0.026, c("Poisson", "NB", "ZIP","ZINB"), lty = 1,
#         col = c("black", "purple", "red","blue"), lwd = 2)
#  

## -----------------------------------------------------------------------------
models <- list("Poisson model" = burden_glm_pois,
               "Negative binomial model" = burden_glm_nb,
               "Zero-inflated Poisson model" = burden_zinfpois,
               "Zero-inflated Negative Binomial model" = burden_zinfnb)

res_gof <- lapply(models, function(model) {
  aic      <- AIC(model)
  bic      <- BIC(model)
  if (class(model)[[1]] == "zeroinfl") {
    deviance <- -2*logLik(model)[[1]]
    null_model <- update(model, .~ -positionb)
    null_deviance <- -2*logLik(null_model)[[1]]
  } else {
    deviance <- model$deviance
    null_deviance <- model$null.deviance
  }
  dev_expl <- (null_deviance - deviance)/null_deviance * 100
  return(res = data.frame(aic = aic, bic = bic, dev_expl = dev_expl))
})

## -----------------------------------------------------------------------------
res_gof |>   
  bind_rows(.id = "model") |>  
  ## arrange them according to dev_expl.
  arrange(desc(dev_expl)) |> 
  knitr::kable(digits = 2,
               col.names = c("Model", "AIC", "BIC", "Deviance Explained"))