medicalrisk: Advanced comorbidity analysis

Calculating Mortality Risk

First, use the medicalrisk package to create a single dataframe with information on each patient:

library(medicalrisk)
library(plyr)
data(vt_inp_sample)
cm_df <- generate_comorbidity_df(vt_inp_sample, icd9mapfn=icd9cm_charlson_quan)
cci_df <- generate_charlson_index_df(cm_df)
rsi_df <- ddply(vt_inp_sample, .(id), function(x) { icd9cm_sessler_rsi(x$icd9cm) } )
num_icd9_df <- count(vt_inp_sample, c('id'))
num_icd9_df <- rename(num_icd9_df, c("freq" = "num_icd9"))
wide_df <- merge(merge(merge(merge(
  rsi_df, cci_df), 
    cm_df), 
      unique(vt_inp_sample[,c('id','scu_days','drg','mdc')])),
        num_icd9_df)

id	rsi_1yrpod	rsi_30dlos	rsi_30dpod	rsi_inhosp	index	mi	chf	perivasc	cvd	dementia	chrnlung	rheum
1	-2.019	0.156	-1.699	-1.848	2	FALSE	TRUE	FALSE	FALSE	FALSE	FALSE	FALSE
2	-4.142	0.893	-3.802	-3.543	0	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE
3	-2.627	0.831	-2.911	-2.861	0	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE
4	-0.798	0.336	-1.551	-0.267	4	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE
5	2.580	-1.790	2.455	1.762	2	FALSE	FALSE	FALSE	FALSE	FALSE	TRUE	FALSE

id	ulcer	liver	dm	dmcx	para	renal	tumor	modliver	mets	aids	drg	mdc	num_icd9
1	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	470	8	12
2	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	470	8	15
3	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	462	8	7
4	FALSE	FALSE	TRUE	FALSE	FALSE	TRUE	FALSE	FALSE	FALSE	FALSE	470	8	10
5	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	FALSE	689	11	24

Let’s explore the data here with some graphs. First, a histogram:

library(reshape2)
library(ggplot2)
# generate a 100 pt x 17 comorbidity table (1700 rows)
cm_melted <- melt(cm_df, id.vars=c('id'), variable.name='cm')
# get rid of all the false entries
cm_melted <- cm_melted[cm_melted$value,]
## count only flags that are true
ggplot(cm_melted, aes(cm, fill=cm)) + 
  geom_bar() + 
  scale_fill_discrete()

The chrnlung comorbidity seems well represented. Let’s create a histogram breaking down which ICD-9-CM codes are mapping to chrnlung in this dataset:

# make a histogram dataframe for all the icd-9 codes
icd9cm_df <- count(vt_inp_sample, vars='icd9cm')
# create a charlson comorbidity map for all icd-9 codes
icd9cm_charlson_df <- icd9cm_charlson_quan(icd9cm_df$icd9cm)
# isolate just the chrnlung icd_9_cm codes
icd9cm_chrnlung <- row.names(icd9cm_charlson_df[icd9cm_charlson_df$chrnlung,])
# create a hist df
icd9cm_chrnlung_hist <- icd9cm_df[icd9cm_df$icd9cm %in% icd9cm_chrnlung,]
# plot it
ggplot(icd9cm_chrnlung_hist, aes(icd9cm, freq)) + 
  geom_bar(stat="identity")

Let’s see how often ICD-9-CM codes used for chrnlung coincide within patients:

# create base dataset
pairs <- unique(
  vt_inp_sample[vt_inp_sample$icd9cm %in% icd9cm_chrnlung, c('id','icd9cm')])

# create coincidence matrix
t <- table(
    ddply(pairs, c('id'), function(x) { if (length(x$icd9cm) > 1) {
      data.frame(t(combn(as.character(x$icd9cm),2))) } })[c('X1','X2')])

	D4168	D49390
D49122	1	0
D4168	0	1

How often do comorbidities coincide?

# create coincidence matrix
t <- table(
    ddply(cm_melted, c('id'), function(x) { if (length(x$cm) > 1) {
      data.frame(t(combn(as.character(x$cm),2))) } })[c('X1','X2')])
# sort it
t <- t[order(rownames(t)),order(colnames(t))]

	chf	chrnlung	cvd	dementia	dm	liver	mets	para	perivasc	renal	rheum	tumor	ulcer
chf	0	8	0	0	3	0	1	0	4	3	3	1	1
chrnlung	0	0	0	0	7	1	0	1	0	5	3	1	0
cvd	0	1	0	2	1	0	0	1	0	1	1	0	0
dementia	0	2	0	0	0	0	0	0	0	1	1	0	0
dm	0	0	0	0	0	0	0	1	0	5	0	2	0
liver	0	0	0	0	1	0	0	1	0	0	0	0	0
mi	4	6	1	1	4	0	0	0	0	1	3	1	1
perivasc	0	3	1	1	2	0	0	0	0	3	1	1	0
rheum	0	0	0	0	1	0	1	0	0	0	0	0	0
ulcer	0	0	0	0	1	0	0	0	0	1	0	0	0

Plot the above table:

m <- melt(t)
ggplot(m[m$value>0,], aes(X1,X2)) + stat_sum(aes(group=value))

This is a scatterplot of the Charlson Comorbidity Index versus each RSI mortality estimate. A linear regression line is superimposed:

library(grid)
library(gridExtra)

## Warning: package 'gridExtra' was built under R version 3.6.2

p.inhosp <- ggplot(wide_df, aes(rsi_inhosp, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
p.30dpod <- ggplot(wide_df, aes(rsi_30dpod, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
p.1yrpod <- ggplot(wide_df, aes(rsi_1yrpod, index)) + geom_point() + geom_smooth(method=lm) +
  scale_y_continuous(limits=c(-3,10))
grid.arrange(p.inhosp, p.30dpod, p.1yrpod, nrow=1)

As expected, the Risk Stratification Index is correlated with an increased Charlson Comorbidity Index.

medicalrisk: Advanced comorbidity analysis

Patrick McCormick patrick.mccormick@alum.mit.edu

2020-02-28

Introduction

Calculating Mortality Risk