library(readxl)
library(tableone)
library(table1)
##
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
##
## units, units<-
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:table1':
##
## label, label<-, units
## The following objects are masked from 'package:base':
##
## format.pval, units
library(naniar)
library(tableone)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
data <-read_excel("/Users/carolinaferreiraatuesta/Documents/MS Vaccine/data.xlsx")
newdata <- subset(data, data$vaccine_2017_2018 & data$age >= 40)
summary(newdata)
## age sex ethnicity education_level
## Min. :40.00 Min. :0.0000 Min. :1.000 Min. :1.000
## 1st Qu.:46.00 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:1.000
## Median :53.00 Median :1.0000 Median :3.000 Median :1.000
## Mean :52.87 Mean :0.6724 Mean :2.802 Mean :1.802
## 3rd Qu.:58.00 3rd Qu.:1.0000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :77.00 Max. :1.0000 Max. :5.000 Max. :4.000
##
## income_decile duration_ms type_ms dmt
## Min. : 1.000 Min. : 4.0 Min. :1.000 Min. : 1.000
## 1st Qu.: 4.000 1st Qu.: 8.5 1st Qu.:1.000 1st Qu.: 5.000
## Median : 5.500 Median :13.0 Median :1.000 Median : 8.000
## Mean : 5.658 Mean :15.0 Mean :1.724 Mean : 7.643
## 3rd Qu.: 8.000 3rd Qu.:19.0 3rd Qu.:3.000 3rd Qu.:10.000
## Max. :10.000 Max. :67.0 Max. :4.000 Max. :13.000
## NA's :2 NA's :1 NA's :1
## edss vaccine_flu_2018_2019 source_hc source_sm
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:2.500 1st Qu.:1.0000 1st Qu.:1.0000 1st Qu.:0.00000
## Median :4.000 Median :1.0000 Median :1.0000 Median :0.00000
## Mean :4.061 Mean :0.9565 Mean :0.8522 Mean :0.06957
## 3rd Qu.:6.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :8.000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## NA's :1 NA's :1 NA's :1 NA's :1
## source_ff source_other recomendation_for lack_for
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.07826 Mean :0.1043 Mean :0.8707 Mean :0.1034
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## NA's :1 NA's :1
## for_hcms for_hcnoms recommendation_against lack_against
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:1.0000 1st Qu.:0.00000 1st Qu.:1.0000
## Median :0.0000 Median :1.0000 Median :0.00000 Median :1.0000
## Mean :0.1944 Mean :0.8148 Mean :0.08621 Mean :0.9052
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
## NA's :8 NA's :8
## against_hcms against_nonms novaccine_fear_effects novaccine_fear_ms
## Min. :0.00000 Min. :0.0000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :0.0000 Median :0.000000 Median :0.000000
## Mean :0.09524 Mean :0.4286 Mean :0.008621 Mean :0.008621
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.0000 Max. :1.000000 Max. :1.000000
## NA's :95 NA's :95
## novaccine_nomedicalissue novaccine_negative_experience novaccine_dmtissue
## Min. :0.00000 Min. :0.000000 Min. :0
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0
## Median :0.00000 Median :0.000000 Median :0
## Mean :0.03448 Mean :0.008621 Mean :0
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0
## Max. :1.00000 Max. :1.000000 Max. :0
##
## novaccine_antivaccine novaccine_flu_nosevere vaccine_2017_2018
## Min. :0 Min. :0 Min. :1
## 1st Qu.:0 1st Qu.:0 1st Qu.:1
## Median :0 Median :0 Median :1
## Mean :0 Mean :0 Mean :1
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:1
## Max. :0 Max. :0 Max. :1
##
listvars <- c("age", "sex", "ethnicity")
table1 <- CreateTableOne(vars= listvars,
strata= "sex",
data = data)
table1
## Stratified by sex
## 0 1 p test
## n 61 144
## age (mean (SD)) 46.50 (11.53) 47.35 (11.22) 0.625
## sex (mean (SD)) 0.00 (0.00) 1.00 (0.00) <0.001
## ethnicity (mean (SD)) 2.85 (0.79) 2.85 (0.69) 0.988
qqplot(data$age, data$duration_ms)
hist(data$age)
#Linearkdnekne ###
pre <- data[c(1:28)]
misplot <- gg_miss_var(pre, show_pct = TRUE)
misplot$data
## # A tibble: 28 x 3
## variable n_miss pct_miss
## <chr> <int> <dbl>
## 1 against_hcms 173 84.4
## 2 against_nonms 173 84.4
## 3 for_hcms 26 12.7
## 4 for_hcnoms 26 12.7
## 5 edss 11 5.37
## 6 age 3 1.46
## 7 source_hc 3 1.46
## 8 source_sm 3 1.46
## 9 source_ff 3 1.46
## 10 source_other 3 1.46
## # … with 18 more rows
#pdf("misplot2.pdf")
misplot +
theme_bw() +
theme(
plot.title = element_text(face = "bold", size = 12),
axis.text.y = element_text(size=4))
misplot
#dev.off()
data$sex[data$sex == "1"] <- "Female"
library(gmodels)
#data$sex[data$sex == '0'] <- 'Male'
mytable <-table(data$dmt, data$sex)
mytable2 <- CrossTable(data$dmt, data$sex)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 204
##
##
## | data$sex
## data$dmt | 0 | Female | Row Total |
## -------------|-----------|-----------|-----------|
## 1 | 3 | 6 | 9 |
## | 0.035 | 0.015 | |
## | 0.333 | 0.667 | 0.044 |
## | 0.049 | 0.042 | |
## | 0.015 | 0.029 | |
## -------------|-----------|-----------|-----------|
## 2 | 3 | 4 | 7 |
## | 0.393 | 0.168 | |
## | 0.429 | 0.571 | 0.034 |
## | 0.049 | 0.028 | |
## | 0.015 | 0.020 | |
## -------------|-----------|-----------|-----------|
## 4 | 8 | 12 | 20 |
## | 0.682 | 0.291 | |
## | 0.400 | 0.600 | 0.098 |
## | 0.131 | 0.084 | |
## | 0.039 | 0.059 | |
## -------------|-----------|-----------|-----------|
## 5 | 10 | 18 | 28 |
## | 0.316 | 0.135 | |
## | 0.357 | 0.643 | 0.137 |
## | 0.164 | 0.126 | |
## | 0.049 | 0.088 | |
## -------------|-----------|-----------|-----------|
## 6 | 1 | 22 | 23 |
## | 5.023 | 2.143 | |
## | 0.043 | 0.957 | 0.113 |
## | 0.016 | 0.154 | |
## | 0.005 | 0.108 | |
## -------------|-----------|-----------|-----------|
## 7 | 10 | 21 | 31 |
## | 0.058 | 0.025 | |
## | 0.323 | 0.677 | 0.152 |
## | 0.164 | 0.147 | |
## | 0.049 | 0.103 | |
## -------------|-----------|-----------|-----------|
## 8 | 3 | 6 | 9 |
## | 0.035 | 0.015 | |
## | 0.333 | 0.667 | 0.044 |
## | 0.049 | 0.042 | |
## | 0.015 | 0.029 | |
## -------------|-----------|-----------|-----------|
## 9 | 0 | 3 | 3 |
## | 0.897 | 0.383 | |
## | 0.000 | 1.000 | 0.015 |
## | 0.000 | 0.021 | |
## | 0.000 | 0.015 | |
## -------------|-----------|-----------|-----------|
## 10 | 15 | 37 | 52 |
## | 0.019 | 0.008 | |
## | 0.288 | 0.712 | 0.255 |
## | 0.246 | 0.259 | |
## | 0.074 | 0.181 | |
## -------------|-----------|-----------|-----------|
## 12 | 0 | 1 | 1 |
## | 0.299 | 0.128 | |
## | 0.000 | 1.000 | 0.005 |
## | 0.000 | 0.007 | |
## | 0.000 | 0.005 | |
## -------------|-----------|-----------|-----------|
## 13 | 8 | 13 | 21 |
## | 0.471 | 0.201 | |
## | 0.381 | 0.619 | 0.103 |
## | 0.131 | 0.091 | |
## | 0.039 | 0.064 | |
## -------------|-----------|-----------|-----------|
## Column Total | 61 | 143 | 204 |
## | 0.299 | 0.701 | |
## -------------|-----------|-----------|-----------|
##
##
margin.table(mytable, 1) # A frequencies (summed over B)
##
## 1 2 4 5 6 7 8 9 10 12 13
## 9 7 20 28 23 31 9 3 52 1 21
margin.table(mytable, 2) # B frequencies (summed over A)
##
## 0 Female
## 61 143
prop.table(mytable) # cell percentages
##
## 0 Female
## 1 0.014705882 0.029411765
## 2 0.014705882 0.019607843
## 4 0.039215686 0.058823529
## 5 0.049019608 0.088235294
## 6 0.004901961 0.107843137
## 7 0.049019608 0.102941176
## 8 0.014705882 0.029411765
## 9 0.000000000 0.014705882
## 10 0.073529412 0.181372549
## 12 0.000000000 0.004901961
## 13 0.039215686 0.063725490
prop.table(mytable, 1) # row percentages
##
## 0 Female
## 1 0.33333333 0.66666667
## 2 0.42857143 0.57142857
## 4 0.40000000 0.60000000
## 5 0.35714286 0.64285714
## 6 0.04347826 0.95652174
## 7 0.32258065 0.67741935
## 8 0.33333333 0.66666667
## 9 0.00000000 1.00000000
## 10 0.28846154 0.71153846
## 12 0.00000000 1.00000000
## 13 0.38095238 0.61904762
prop.table(mytable, 2) # column percentages
##
## 0 Female
## 1 0.049180328 0.041958042
## 2 0.049180328 0.027972028
## 4 0.131147541 0.083916084
## 5 0.163934426 0.125874126
## 6 0.016393443 0.153846154
## 7 0.163934426 0.146853147
## 8 0.049180328 0.041958042
## 9 0.000000000 0.020979021
## 10 0.245901639 0.258741259
## 12 0.000000000 0.006993007
## 13 0.131147541 0.090909091
summary(mytable)
## Number of cases in table: 204
## Number of factors: 2
## Test for independence of all factors:
## Chisq = 11.74, df = 10, p-value = 0.3028
## Chi-squared approximation may be incorrect
hist(data)
data$sex[data$sex == "1"] <- "Female"
listvars <- c(
"dmt",
"age",
"edss",
"type_ms",
"sex")
tab3 <- CreateTableOne(vars = listvars,
strata = "ethnicity",
data = data,
test = TRUE,
testApprox = chisq.test,
smd= TRUE,
addOverall= TRUE)
tab3
## Stratified by ethnicity
## Overall 1 2 3
## n 205 18 12 161
## dmt (mean (SD)) 7.36 (3.18) 7.50 (3.00) 7.18 (3.46) 7.43 (3.20)
## age (mean (SD)) 47.10 (11.29) 43.22 (8.19) 46.75 (11.16) 48.13 (11.49)
## edss (mean (SD)) 3.63 (2.28) 3.62 (2.14) 4.62 (1.69) 3.67 (2.31)
## type_ms (mean (SD)) 1.55 (0.85) 1.56 (0.92) 1.58 (0.90) 1.57 (0.86)
## sex = Female (%) 144 (70.2) 11 (61.1) 10 (83.3) 115 (71.4)
## Stratified by ethnicity
## 4 5 p test
## n 10 4
## dmt (mean (SD)) 7.30 (3.37) 4.50 (2.08) 0.498
## age (mean (SD)) 39.50 (10.52) 44.00 (10.42) 0.080
## edss (mean (SD)) 2.75 (2.47) 1.50 (1.22) 0.118
## type_ms (mean (SD)) 1.40 (0.70) 1.00 (0.00) 0.728
## sex = Female (%) 5 (50.0) 3 (75.0) 0.432
data$age <- as.numeric(data$age)
data$dmt<- as.numeric(data$dmt)
myvars <- c("age", "dmt", "ethnicity", "education_level", "duration_ms", "novaccine_antivaccine", "edss")
newdata <- data[myvars]
cor( newdata,use="complete.obs", method="pearson")
## age dmt ethnicity education_level
## age 1.000000000 0.19320477 0.021808659 0.002223026
## dmt 0.193204768 1.00000000 -0.056740515 0.063049518
## ethnicity 0.021808659 -0.05674051 1.000000000 0.050932734
## education_level 0.002223026 0.06304952 0.050932734 1.000000000
## duration_ms 0.437399338 0.06616759 -0.008271633 0.038555352
## novaccine_antivaccine -0.169791688 0.08306735 -0.035516066 -0.014735117
## edss 0.394568226 0.03184122 -0.126199702 -0.036780911
## duration_ms novaccine_antivaccine edss
## age 0.437399338 -0.169791688 0.394568226
## dmt 0.066167588 0.083067353 0.031841221
## ethnicity -0.008271633 -0.035516066 -0.126199702
## education_level 0.038555352 -0.014735117 -0.036780911
## duration_ms 1.000000000 -0.072284761 0.261524584
## novaccine_antivaccine -0.072284761 1.000000000 0.002197616
## edss 0.261524584 0.002197616 1.000000000
library(corrgram)
##
## Attaching package: 'corrgram'
## The following object is masked from 'package:lattice':
##
## panel.fill
corrgram(newdata, order=NULL, lower.panel=panel.shade,
upper.panel=NULL, text.panel=panel.txt,
main="Corr table")
library(survminer)
## Loading required package: ggpubr
data <-read_excel("/Users/carolinaferreiraatuesta/Documents/MS Vaccine/data.xlsx")
linreg2 <- glm(data$sex ~ data$age, data=data, family ="binomial") #logistic
summary(linreg2)
##
## Call:
## glm(formula = data$sex ~ data$age, family = "binomial", data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6375 -1.5150 0.8228 0.8514 0.8905
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.544862 0.660443 0.825 0.409
## data$age 0.006747 0.013737 0.491 0.623
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 245.76 on 201 degrees of freedom
## Residual deviance: 245.52 on 200 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 249.52
##
## Number of Fisher Scoring iterations: 4
linreg <- lm(duration_ms ~ age +sex +data$income_decile +edss, data=data) #linear
summary(linreg)
##
## Call:
## lm(formula = duration_ms ~ age + sex + data$income_decile + edss,
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12.728 -4.848 -0.999 3.435 47.090
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2273 2.5812 -0.088 0.930
## age 0.3094 0.0535 5.784 3.11e-08 ***
## sex -1.3449 1.1859 -1.134 0.258
## data$income_decile -0.2976 0.2153 -1.382 0.169
## edss 0.3394 0.2607 1.302 0.195
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.449 on 183 degrees of freedom
## (17 observations deleted due to missingness)
## Multiple R-squared: 0.2111, Adjusted R-squared: 0.1938
## F-statistic: 12.24 on 4 and 183 DF, p-value: 7.709e-09
confint(linreg)
## 2.5 % 97.5 %
## (Intercept) -5.3199850 4.8653490
## age 0.2038743 0.4149964
## sex -3.6846596 0.9948382
## data$income_decile -0.7223681 0.1271597
## edss -0.1749120 0.8538071
exp(linreg$coefficients)
## (Intercept) age sex data$income_decile
## 0.7966674 1.3626555 0.2605630 0.7425952
## edss
## 1.4041716
exp(confint(linreg))
## 2.5 % 97.5 %
## (Intercept) 0.004892827 129.716208
## age 1.226144000 1.514365
## sex 0.025105719 2.704287
## data$income_decile 0.485600949 1.135598
## edss 0.839530909 2.348571
vif(linreg)
## age sex data$income_decile edss
## 1.218241 1.006664 1.030482 1.185031
data <- na.omit(data) #ignore NAs
linreg <- lm(data$duration_ms ~ data$age, data=data)
#residual plot
data$predicted <- predict(linreg) # Save the predicted values
data$residuals <- residuals(linreg)
# Save the residual values
ggplot(data, aes(x = age, y = duration_ms)) +
geom_smooth(method = "lm", se = FALSE, color = "lightgrey") + # regression line
geom_segment(aes(xend = age, yend = predicted), alpha = .2) + # draw line from point to line
geom_point(aes(color = abs(residuals), size = abs(residuals))) + # size of the points
scale_color_continuous(low = "green", high = "red") + # colour of the points mapped to residual size - green smaller, red larger
guides(color = FALSE, size = FALSE) + # Size legend removed
geom_point(aes(y = predicted), shape = 1) +
theme_bw()
## `geom_smooth()` using formula 'y ~ x'
fit <- lapply(
listvars,function(x)
lm(formula(paste("data$vaccine_flu_2018_2019 ~", x)),
data = data))
lapply(fit, summary)
## [[1]]
##
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8132 0.1723 0.1950 0.2157 0.2447
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.862884 0.253140 3.409 0.002 **
## dmt -0.008274 0.031792 -0.260 0.797
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4135 on 28 degrees of freedom
## Multiple R-squared: 0.002413, Adjusted R-squared: -0.03321
## F-statistic: 0.06774 on 1 and 28 DF, p-value: 0.7966
##
##
## [[2]]
##
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.85567 0.00405 0.08561 0.24545 0.48359
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.190199 0.300933 0.632 0.5325
## age 0.013049 0.006261 2.084 0.0464 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3852 on 28 degrees of freedom
## Multiple R-squared: 0.1343, Adjusted R-squared: 0.1034
## F-statistic: 4.343 on 1 and 28 DF, p-value: 0.04639
##
##
## [[3]]
##
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.01270 0.06946 0.09685 0.25432 0.42548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.57452 0.14704 3.907 0.000539 ***
## edss 0.05477 0.03118 1.757 0.089901 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.393 on 28 degrees of freedom
## Multiple R-squared: 0.09927, Adjusted R-squared: 0.0671
## F-statistic: 3.086 on 1 and 28 DF, p-value: 0.0899
##
##
## [[4]]
##
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.82682 0.07263 0.17318 0.27374 0.27374
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.62570 0.16100 3.886 0.00057 ***
## type_ms 0.10056 0.08259 1.218 0.23357
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4035 on 28 degrees of freedom
## Multiple R-squared: 0.05028, Adjusted R-squared: 0.01636
## F-statistic: 1.482 on 1 and 28 DF, p-value: 0.2336
##
##
## [[5]]
##
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)),
## data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8 0.2 0.2 0.2 0.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.000e-01 1.309e-01 6.11 1.36e-06 ***
## sex 1.720e-16 1.604e-01 0.00 1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.414 on 28 degrees of freedom
## Multiple R-squared: 9.784e-31, Adjusted R-squared: -0.03571
## F-statistic: 2.739e-29 on 1 and 28 DF, p-value: 1
newdata <- data %>%
filter(sex == "Female") %>%
group_by(dmt) %>%
summarise(age_mean = mean(age, na.rm = TRUE))
newdata
## # A tibble: 0 x 2
## # … with 2 variables: dmt <dbl>, age_mean <dbl>
data$age_cat <- findInterval(data$age, c(0,10,20,40), rightmost.closed = TRUE)
ggplot(data,
aes(x = age_cat,
fill = as.factor(dmt))) +
geom_bar(position = "stack")