library(readxl)
library(haven)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## -- Attaching packages ------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 3.2.1 <U+221A> readr 1.3.1
## <U+221A> tibble 2.1.3 <U+221A> purrr 0.3.2
## <U+221A> tidyr 1.0.0 <U+221A> stringr 1.4.0
## <U+221A> ggplot2 3.2.1 <U+221A> forcats 0.4.0
## -- Conflicts ---------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(Amelia)
## Loading required package: Rcpp
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.7.5, built: 2018-05-07)
## ## Copyright (C) 2005-2020 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:purrr':
##
## transpose
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(psycho)
## Registered S3 method overwritten by 'MuMIn':
## method from
## predict.merMod lme4
## Registered S3 methods overwritten by 'huge':
## method from
## plot.sim BDgraph
## print.sim BDgraph
## Registered S3 method overwritten by 'xts':
## method from
## as.zoo.xts zoo
## message: psycho's `analyze()` is deprecated in favour of the report package. Check it out at https://github.com/easystats/report
library(ggplot2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(DescTools)
##
## Attaching package: 'DescTools'
## The following object is masked from 'package:data.table':
##
## %like%
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(car)
## Loading required package: carData
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
##
## Attaching package: 'car'
## The following object is masked from 'package:DescTools':
##
## Recode
## The following object is masked from 'package:purrr':
##
## some
## The following object is masked from 'package:dplyr':
##
## recode
library(tibble)
library(nnet)
resp <- read_excel("C:/Users/HP/Desktop/resp.xlsx")
summary(resp)
## gender SITEID SUBJID TRTPN
## Length:582 Min. : 1.00 Min. : 1.0 Min. :1.000
## Class :character 1st Qu.: 23.00 1st Qu.:197.2 1st Qu.:1.000
## Mode :character Median : 52.00 Median :408.0 Median :1.000
## Mean : 59.42 Mean :402.4 Mean :1.498
## 3rd Qu.: 99.00 3rd Qu.:599.8 3rd Qu.:2.000
## Max. :141.00 Max. :797.0 Max. :2.000
## responseCategory
## Length:582
## Class :character
## Mode :character
##
##
##
resp$gender <- as.factor(resp$gender)
resp$responseCategory<- as.factor(resp$responseCategory)
resp$TRTPN <- as.factor(resp$TRTPN)
str(resp)
## Classes 'tbl_df', 'tbl' and 'data.frame': 582 obs. of 5 variables:
## $ gender : Factor w/ 2 levels "FEMALE","MALE": 2 1 2 2 1 1 2 1 2 2 ...
## $ SITEID : num 1 1 1 1 1 1 1 1 1 1 ...
## $ SUBJID : num 27 39 126 154 161 198 221 280 540 557 ...
## $ TRTPN : Factor w/ 2 levels "1","2": 2 1 2 1 1 1 1 1 1 1 ...
## $ responseCategory: Factor w/ 5 levels "CR","NE","PD",..: 5 3 3 5 3 5 5 5 5 5 ...
#Recoding target variable into binary outcome ('responder (1, event)-nonresponder(0)')
response <- function(res){
res <- as.character(res)
if (res=='CR'| res=='PR'){
return(1)
}else{
return(0)
}
}
resp$responseCategory <- sapply(resp$responseCategory,response)
table(resp$responseCategory)
##
## 0 1
## 490 92
resp$responseCategory<- as.factor(resp$responseCategory)
#creating training and test subset
library(caret)
## Loading required package: lattice
## Registered S3 methods overwritten by 'lava':
## method from
## plot.sim huge
## print.sim huge
##
## Attaching package: 'caret'
## The following objects are masked from 'package:DescTools':
##
## MAE, RMSE
## The following object is masked from 'package:purrr':
##
## lift
library(caTools)
set.seed(101)
split = sample.split(resp$responseCategory, SplitRatio = 0.70)
final.train = subset(resp, split == TRUE)
final.test = subset(resp, split == FALSE)
summary(final.train)
## gender SITEID SUBJID TRTPN responseCategory
## FEMALE:188 Min. : 1.00 Min. : 4.0 1:207 0:343
## MALE :219 1st Qu.: 23.00 1st Qu.:190.0 2:200 1: 64
## Median : 51.00 Median :415.0
## Mean : 59.25 Mean :398.6
## 3rd Qu.: 97.50 3rd Qu.:591.5
## Max. :141.00 Max. :797.0
summary(final.test)
## gender SITEID SUBJID TRTPN responseCategory
## FEMALE: 75 Min. : 1.00 Min. : 1.0 1:85 0:147
## MALE :100 1st Qu.: 21.00 1st Qu.:222.5 2:90 1: 28
## Median : 55.00 Median :406.0
## Mean : 59.83 Mean :411.4
## 3rd Qu.:102.00 3rd Qu.:616.5
## Max. :141.00 Max. :795.0
#logit model with TRTPN as predictor
logit <- glm(formula = responseCategory ~ TRTPN, family = binomial(logit), data = resp)
summary(logit)
##
## Call:
## glm(formula = responseCategory ~ TRTPN, family = binomial(logit),
## data = resp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6526 -0.6526 -0.5149 -0.5149 2.0427
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.4385 0.1486 -9.677 <2e-16 ***
## TRTPN2 -0.5153 0.2320 -2.222 0.0263 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 508.05 on 581 degrees of freedom
## Residual deviance: 503.01 on 580 degrees of freedom
## AIC: 507.01
##
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(logit), confint(logit)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.2372881 0.1756712 0.3149589
## TRTPN2 0.5973003 0.3765543 0.9371668
#Odds ratio is exponented reggresion coefficient in logistic regression that says to us
#how much times is event more probable when predictor changes for one unit (OR=OR(x+1)/OR(x)).
#For categorical predictors, odd ratio is saying to us how much times is an event more likely to occur
#for respondetns in one category of the predictor. In our example, how much time the respondets from trial 1 will
#more likely to have good response to the treatment.probability OR=p/(1-p); for probability of 0,5(50%), odd ratio
#is 1, meaning that both events have the same probability of occurance.
#In our case, TRTPN treatmant has 0,59 time less chances to give response to the treatment than treatment 1.
#Confidence interval (can be arbitrary selected) is an interval that tell us with which possibility
#our result is in that range. For our results, for this sample, we are sure with 95% that our odds ratio for TRTPN
#goes from 0.37 to 0.93. Larger samples decrease the standard error which improves confidence interval (making it more narrow).
logit1 <- glm(formula = responseCategory ~ TRTPN + gender + factor(gender):factor(TRTPN), family = binomial(logit), data = resp)
#P value for the interaction term is 0.415 which is high. We can accept the null hypothesis
# There is no impact of the interaction on the response (target variable) so we could
#drop interaction from the logit model.
summary(logit1)
##
## Call:
## glm(formula = responseCategory ~ TRTPN + gender + factor(gender):factor(TRTPN),
## family = binomial(logit), data = resp)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6902 -0.6109 -0.5302 -0.5035 2.0631
##
## Coefficients: (3 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.2028 0.4103 -2.931 0.00337
## TRTPN2 -0.6881 0.3105 -2.216 0.02669
## genderMALE -0.1106 0.3587 -0.308 0.75779
## factor(gender)FEMALE:factor(TRTPN)1 -0.3814 0.4675 -0.816 0.41464
## factor(gender)MALE:factor(TRTPN)1 NA NA NA NA
## factor(gender)FEMALE:factor(TRTPN)2 NA NA NA NA
## factor(gender)MALE:factor(TRTPN)2 NA NA NA NA
##
## (Intercept) **
## TRTPN2 *
## genderMALE
## factor(gender)FEMALE:factor(TRTPN)1
## factor(gender)MALE:factor(TRTPN)1
## factor(gender)FEMALE:factor(TRTPN)2
## factor(gender)MALE:factor(TRTPN)2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 508.05 on 581 degrees of freedom
## Residual deviance: 502.09 on 578 degrees of freedom
## AIC: 510.09
##
## Number of Fisher Scoring iterations: 4
exp(cbind(OR = coef(logit1), confint(logit1)))
## Waiting for profiling to be done...
## OR 2.5 % 97.5 %
## (Intercept) 0.3003647 0.1327742 0.6678051
## TRTPN2 0.5025338 0.2698128 0.9166996
## genderMALE 0.8952703 0.4440637 1.8300400
## factor(gender)FEMALE:factor(TRTPN)1 0.6829305 0.2728769 1.7155189
## factor(gender)MALE:factor(TRTPN)1 NA NA NA
## factor(gender)FEMALE:factor(TRTPN)2 NA NA NA
## factor(gender)MALE:factor(TRTPN)2 NA NA NA
table(resp$gender, resp$TRTPN)
##
## 1 2
## FEMALE 141 122
## MALE 151 168
#odds ratio for treatment 1 vs treatment 2 | Male, Wald method for confidence intervals
or <- (151/319)/(168/319)
or
## [1] 0.8988095
#standard eror for interaction term is 0.46
#confidence interval (95%) for particular odd ratio is
#upper cri
exp(log(or) + (0.46 * 1.96))
## [1] 2.214255
#lower cri
exp(log(or) - (0.46 * 1.96))
## [1] 0.3648445
#odd ratio for treatment 1 vs treatment 2 | Female
or1 <- (141/263)/(122/263)
or1
## [1] 1.155738
#standard eror for interaction term is 0.46
#confidence interval (95%) for particular odd ratio is
#upper cri
exp(log(or1) + (0.46 * 1.96))
## [1] 2.847208
#lower cri
exp(log(or1) - (0.46 * 1.96))
## [1] 0.4691367
#Odd ratio for Female vs Male | treatment 1
or2 <- (141/292)/(151/292)
or2
## [1] 0.9337748
#standard eror for interaction term is 0.46
#confidence interval (95%) for particular odd ratio is
#upper cri
exp(log(or2) + (0.46 * 1.96))
## [1] 2.300393
#lower cri
exp(log(or2) - (0.46 * 1.96))
## [1] 0.3790376
#Odds ratio Female vs Male | treatment 2.
or3 <- (122/290)/(168/290)
or3
## [1] 0.7261905
#standard eror for interaction term is 0.46
#confidence interval (95%) for particular odd ratio is
#upper cri
exp(log(or3) + (0.46 * 1.96))
## [1] 1.789
#lower cri
exp(log(or3) - (0.46 * 1.96))
## [1] 0.294775