library(readxl)
library(tableone)
library(table1)
## 
## Attaching package: 'table1'
## The following objects are masked from 'package:base':
## 
##     units, units<-
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:table1':
## 
##     label, label<-, units
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(naniar)
library(tableone)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
data <-read_excel("/Users/carolinaferreiraatuesta/Documents/MS Vaccine/data.xlsx")

newdata <- subset(data, data$vaccine_2017_2018 &  data$age >= 40)
summary(newdata)
##       age             sex           ethnicity     education_level
##  Min.   :40.00   Min.   :0.0000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:46.00   1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:1.000  
##  Median :53.00   Median :1.0000   Median :3.000   Median :1.000  
##  Mean   :52.87   Mean   :0.6724   Mean   :2.802   Mean   :1.802  
##  3rd Qu.:58.00   3rd Qu.:1.0000   3rd Qu.:3.000   3rd Qu.:3.000  
##  Max.   :77.00   Max.   :1.0000   Max.   :5.000   Max.   :4.000  
##                                                                  
##  income_decile     duration_ms      type_ms           dmt        
##  Min.   : 1.000   Min.   : 4.0   Min.   :1.000   Min.   : 1.000  
##  1st Qu.: 4.000   1st Qu.: 8.5   1st Qu.:1.000   1st Qu.: 5.000  
##  Median : 5.500   Median :13.0   Median :1.000   Median : 8.000  
##  Mean   : 5.658   Mean   :15.0   Mean   :1.724   Mean   : 7.643  
##  3rd Qu.: 8.000   3rd Qu.:19.0   3rd Qu.:3.000   3rd Qu.:10.000  
##  Max.   :10.000   Max.   :67.0   Max.   :4.000   Max.   :13.000  
##  NA's   :2        NA's   :1                      NA's   :1       
##       edss       vaccine_flu_2018_2019   source_hc        source_sm      
##  Min.   :0.000   Min.   :0.0000        Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:2.500   1st Qu.:1.0000        1st Qu.:1.0000   1st Qu.:0.00000  
##  Median :4.000   Median :1.0000        Median :1.0000   Median :0.00000  
##  Mean   :4.061   Mean   :0.9565        Mean   :0.8522   Mean   :0.06957  
##  3rd Qu.:6.000   3rd Qu.:1.0000        3rd Qu.:1.0000   3rd Qu.:0.00000  
##  Max.   :8.000   Max.   :1.0000        Max.   :1.0000   Max.   :1.00000  
##  NA's   :1       NA's   :1             NA's   :1        NA's   :1        
##    source_ff        source_other    recomendation_for    lack_for     
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.0000    Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.0000    1st Qu.:0.0000  
##  Median :0.00000   Median :0.0000   Median :1.0000    Median :0.0000  
##  Mean   :0.07826   Mean   :0.1043   Mean   :0.8707    Mean   :0.1034  
##  3rd Qu.:0.00000   3rd Qu.:0.0000   3rd Qu.:1.0000    3rd Qu.:0.0000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.0000    Max.   :1.0000  
##  NA's   :1         NA's   :1                                          
##     for_hcms        for_hcnoms     recommendation_against  lack_against   
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000        Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.00000        1st Qu.:1.0000  
##  Median :0.0000   Median :1.0000   Median :0.00000        Median :1.0000  
##  Mean   :0.1944   Mean   :0.8148   Mean   :0.08621        Mean   :0.9052  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.00000        3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000        Max.   :1.0000  
##  NA's   :8        NA's   :8                                               
##   against_hcms     against_nonms    novaccine_fear_effects novaccine_fear_ms 
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000000       Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:0.000000       1st Qu.:0.000000  
##  Median :0.00000   Median :0.0000   Median :0.000000       Median :0.000000  
##  Mean   :0.09524   Mean   :0.4286   Mean   :0.008621       Mean   :0.008621  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.000000       3rd Qu.:0.000000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.000000       Max.   :1.000000  
##  NA's   :95        NA's   :95                                                
##  novaccine_nomedicalissue novaccine_negative_experience novaccine_dmtissue
##  Min.   :0.00000          Min.   :0.000000              Min.   :0         
##  1st Qu.:0.00000          1st Qu.:0.000000              1st Qu.:0         
##  Median :0.00000          Median :0.000000              Median :0         
##  Mean   :0.03448          Mean   :0.008621              Mean   :0         
##  3rd Qu.:0.00000          3rd Qu.:0.000000              3rd Qu.:0         
##  Max.   :1.00000          Max.   :1.000000              Max.   :0         
##                                                                           
##  novaccine_antivaccine novaccine_flu_nosevere vaccine_2017_2018
##  Min.   :0             Min.   :0              Min.   :1        
##  1st Qu.:0             1st Qu.:0              1st Qu.:1        
##  Median :0             Median :0              Median :1        
##  Mean   :0             Mean   :0              Mean   :1        
##  3rd Qu.:0             3rd Qu.:0              3rd Qu.:1        
##  Max.   :0             Max.   :0              Max.   :1        
## 
listvars <- c("age", "sex", "ethnicity")

table1 <- CreateTableOne(vars= listvars,
                         strata= "sex",
                         data = data)
table1
##                        Stratified by sex
##                         0             1             p      test
##   n                        61           144                    
##   age (mean (SD))       46.50 (11.53) 47.35 (11.22)  0.625     
##   sex (mean (SD))        0.00 (0.00)   1.00 (0.00)  <0.001     
##   ethnicity (mean (SD))  2.85 (0.79)   2.85 (0.69)   0.988
qqplot(data$age, data$duration_ms)

hist(data$age)

#Linearkdnekne ###

pre <- data[c(1:28)]

misplot <- gg_miss_var(pre, show_pct = TRUE)
misplot$data
## # A tibble: 28 x 3
##    variable      n_miss pct_miss
##    <chr>          <int>    <dbl>
##  1 against_hcms     173    84.4 
##  2 against_nonms    173    84.4 
##  3 for_hcms          26    12.7 
##  4 for_hcnoms        26    12.7 
##  5 edss              11     5.37
##  6 age                3     1.46
##  7 source_hc          3     1.46
##  8 source_sm          3     1.46
##  9 source_ff          3     1.46
## 10 source_other       3     1.46
## # … with 18 more rows
#pdf("misplot2.pdf")
misplot  +
  theme_bw() + 
  theme(
    plot.title = element_text(face = "bold", size = 12),
    axis.text.y = element_text(size=4))

misplot

#dev.off() 
data$sex[data$sex == "1"] <- "Female"
library(gmodels)
#data$sex[data$sex == '0'] <- 'Male'
mytable <-table(data$dmt, data$sex)
mytable2 <- CrossTable(data$dmt, data$sex)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  204 
## 
##  
##              | data$sex 
##     data$dmt |         0 |    Female | Row Total | 
## -------------|-----------|-----------|-----------|
##            1 |         3 |         6 |         9 | 
##              |     0.035 |     0.015 |           | 
##              |     0.333 |     0.667 |     0.044 | 
##              |     0.049 |     0.042 |           | 
##              |     0.015 |     0.029 |           | 
## -------------|-----------|-----------|-----------|
##            2 |         3 |         4 |         7 | 
##              |     0.393 |     0.168 |           | 
##              |     0.429 |     0.571 |     0.034 | 
##              |     0.049 |     0.028 |           | 
##              |     0.015 |     0.020 |           | 
## -------------|-----------|-----------|-----------|
##            4 |         8 |        12 |        20 | 
##              |     0.682 |     0.291 |           | 
##              |     0.400 |     0.600 |     0.098 | 
##              |     0.131 |     0.084 |           | 
##              |     0.039 |     0.059 |           | 
## -------------|-----------|-----------|-----------|
##            5 |        10 |        18 |        28 | 
##              |     0.316 |     0.135 |           | 
##              |     0.357 |     0.643 |     0.137 | 
##              |     0.164 |     0.126 |           | 
##              |     0.049 |     0.088 |           | 
## -------------|-----------|-----------|-----------|
##            6 |         1 |        22 |        23 | 
##              |     5.023 |     2.143 |           | 
##              |     0.043 |     0.957 |     0.113 | 
##              |     0.016 |     0.154 |           | 
##              |     0.005 |     0.108 |           | 
## -------------|-----------|-----------|-----------|
##            7 |        10 |        21 |        31 | 
##              |     0.058 |     0.025 |           | 
##              |     0.323 |     0.677 |     0.152 | 
##              |     0.164 |     0.147 |           | 
##              |     0.049 |     0.103 |           | 
## -------------|-----------|-----------|-----------|
##            8 |         3 |         6 |         9 | 
##              |     0.035 |     0.015 |           | 
##              |     0.333 |     0.667 |     0.044 | 
##              |     0.049 |     0.042 |           | 
##              |     0.015 |     0.029 |           | 
## -------------|-----------|-----------|-----------|
##            9 |         0 |         3 |         3 | 
##              |     0.897 |     0.383 |           | 
##              |     0.000 |     1.000 |     0.015 | 
##              |     0.000 |     0.021 |           | 
##              |     0.000 |     0.015 |           | 
## -------------|-----------|-----------|-----------|
##           10 |        15 |        37 |        52 | 
##              |     0.019 |     0.008 |           | 
##              |     0.288 |     0.712 |     0.255 | 
##              |     0.246 |     0.259 |           | 
##              |     0.074 |     0.181 |           | 
## -------------|-----------|-----------|-----------|
##           12 |         0 |         1 |         1 | 
##              |     0.299 |     0.128 |           | 
##              |     0.000 |     1.000 |     0.005 | 
##              |     0.000 |     0.007 |           | 
##              |     0.000 |     0.005 |           | 
## -------------|-----------|-----------|-----------|
##           13 |         8 |        13 |        21 | 
##              |     0.471 |     0.201 |           | 
##              |     0.381 |     0.619 |     0.103 | 
##              |     0.131 |     0.091 |           | 
##              |     0.039 |     0.064 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |        61 |       143 |       204 | 
##              |     0.299 |     0.701 |           | 
## -------------|-----------|-----------|-----------|
## 
## 
margin.table(mytable, 1) # A frequencies (summed over B)
## 
##  1  2  4  5  6  7  8  9 10 12 13 
##  9  7 20 28 23 31  9  3 52  1 21
margin.table(mytable, 2) # B frequencies (summed over A)
## 
##      0 Female 
##     61    143
prop.table(mytable) # cell percentages
##     
##                0      Female
##   1  0.014705882 0.029411765
##   2  0.014705882 0.019607843
##   4  0.039215686 0.058823529
##   5  0.049019608 0.088235294
##   6  0.004901961 0.107843137
##   7  0.049019608 0.102941176
##   8  0.014705882 0.029411765
##   9  0.000000000 0.014705882
##   10 0.073529412 0.181372549
##   12 0.000000000 0.004901961
##   13 0.039215686 0.063725490
prop.table(mytable, 1) # row percentages
##     
##               0     Female
##   1  0.33333333 0.66666667
##   2  0.42857143 0.57142857
##   4  0.40000000 0.60000000
##   5  0.35714286 0.64285714
##   6  0.04347826 0.95652174
##   7  0.32258065 0.67741935
##   8  0.33333333 0.66666667
##   9  0.00000000 1.00000000
##   10 0.28846154 0.71153846
##   12 0.00000000 1.00000000
##   13 0.38095238 0.61904762
prop.table(mytable, 2) # column percentages
##     
##                0      Female
##   1  0.049180328 0.041958042
##   2  0.049180328 0.027972028
##   4  0.131147541 0.083916084
##   5  0.163934426 0.125874126
##   6  0.016393443 0.153846154
##   7  0.163934426 0.146853147
##   8  0.049180328 0.041958042
##   9  0.000000000 0.020979021
##   10 0.245901639 0.258741259
##   12 0.000000000 0.006993007
##   13 0.131147541 0.090909091
summary(mytable)
## Number of cases in table: 204 
## Number of factors: 2 
## Test for independence of all factors:
##  Chisq = 11.74, df = 10, p-value = 0.3028
##  Chi-squared approximation may be incorrect
hist(data)

data$sex[data$sex == "1"] <- "Female"
listvars <- c(
  "dmt",
  "age",
  "edss",
  "type_ms",
  "sex")

tab3 <- CreateTableOne(vars = listvars, 
                               strata = "ethnicity", 
                               data = data, 
                                test = TRUE, 
                               testApprox = chisq.test,
                       smd= TRUE,
                       addOverall= TRUE)
tab3
##                      Stratified by ethnicity
##                       Overall       1             2             3            
##   n                     205            18            12           161        
##   dmt (mean (SD))      7.36 (3.18)   7.50 (3.00)   7.18 (3.46)   7.43 (3.20) 
##   age (mean (SD))     47.10 (11.29) 43.22 (8.19)  46.75 (11.16) 48.13 (11.49)
##   edss (mean (SD))     3.63 (2.28)   3.62 (2.14)   4.62 (1.69)   3.67 (2.31) 
##   type_ms (mean (SD))  1.55 (0.85)   1.56 (0.92)   1.58 (0.90)   1.57 (0.86) 
##   sex = Female (%)      144 (70.2)     11 (61.1)     10 (83.3)    115 (71.4) 
##                      Stratified by ethnicity
##                       4             5             p      test
##   n                      10             4                    
##   dmt (mean (SD))      7.30 (3.37)   4.50 (2.08)   0.498     
##   age (mean (SD))     39.50 (10.52) 44.00 (10.42)  0.080     
##   edss (mean (SD))     2.75 (2.47)   1.50 (1.22)   0.118     
##   type_ms (mean (SD))  1.40 (0.70)   1.00 (0.00)   0.728     
##   sex = Female (%)        5 (50.0)      3 (75.0)   0.432
data$age <- as.numeric(data$age)
data$dmt<- as.numeric(data$dmt)
myvars <- c("age",  "dmt", "ethnicity", "education_level", "duration_ms", "novaccine_antivaccine",  "edss")
newdata <- data[myvars]
cor( newdata,use="complete.obs", method="pearson")
##                                age         dmt    ethnicity education_level
## age                    1.000000000  0.19320477  0.021808659     0.002223026
## dmt                    0.193204768  1.00000000 -0.056740515     0.063049518
## ethnicity              0.021808659 -0.05674051  1.000000000     0.050932734
## education_level        0.002223026  0.06304952  0.050932734     1.000000000
## duration_ms            0.437399338  0.06616759 -0.008271633     0.038555352
## novaccine_antivaccine -0.169791688  0.08306735 -0.035516066    -0.014735117
## edss                   0.394568226  0.03184122 -0.126199702    -0.036780911
##                        duration_ms novaccine_antivaccine         edss
## age                    0.437399338          -0.169791688  0.394568226
## dmt                    0.066167588           0.083067353  0.031841221
## ethnicity             -0.008271633          -0.035516066 -0.126199702
## education_level        0.038555352          -0.014735117 -0.036780911
## duration_ms            1.000000000          -0.072284761  0.261524584
## novaccine_antivaccine -0.072284761           1.000000000  0.002197616
## edss                   0.261524584           0.002197616  1.000000000
library(corrgram)
## 
## Attaching package: 'corrgram'
## The following object is masked from 'package:lattice':
## 
##     panel.fill
corrgram(newdata,  order=NULL, lower.panel=panel.shade,
  upper.panel=NULL, text.panel=panel.txt,
  main="Corr table")

regression

library(survminer)
## Loading required package: ggpubr
data <-read_excel("/Users/carolinaferreiraatuesta/Documents/MS Vaccine/data.xlsx")

linreg2 <- glm(data$sex ~ data$age, data=data, family ="binomial") #logistic 
summary(linreg2)
## 
## Call:
## glm(formula = data$sex ~ data$age, family = "binomial", data = data)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6375  -1.5150   0.8228   0.8514   0.8905  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.544862   0.660443   0.825    0.409
## data$age    0.006747   0.013737   0.491    0.623
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 245.76  on 201  degrees of freedom
## Residual deviance: 245.52  on 200  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 249.52
## 
## Number of Fisher Scoring iterations: 4
linreg <- lm(duration_ms ~ age +sex  +data$income_decile +edss, data=data) #linear
summary(linreg)
## 
## Call:
## lm(formula = duration_ms ~ age + sex + data$income_decile + edss, 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -12.728  -4.848  -0.999   3.435  47.090 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -0.2273     2.5812  -0.088    0.930    
## age                  0.3094     0.0535   5.784 3.11e-08 ***
## sex                 -1.3449     1.1859  -1.134    0.258    
## data$income_decile  -0.2976     0.2153  -1.382    0.169    
## edss                 0.3394     0.2607   1.302    0.195    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.449 on 183 degrees of freedom
##   (17 observations deleted due to missingness)
## Multiple R-squared:  0.2111, Adjusted R-squared:  0.1938 
## F-statistic: 12.24 on 4 and 183 DF,  p-value: 7.709e-09
confint(linreg)
##                         2.5 %    97.5 %
## (Intercept)        -5.3199850 4.8653490
## age                 0.2038743 0.4149964
## sex                -3.6846596 0.9948382
## data$income_decile -0.7223681 0.1271597
## edss               -0.1749120 0.8538071
exp(linreg$coefficients)
##        (Intercept)                age                sex data$income_decile 
##          0.7966674          1.3626555          0.2605630          0.7425952 
##               edss 
##          1.4041716
exp(confint(linreg))
##                          2.5 %     97.5 %
## (Intercept)        0.004892827 129.716208
## age                1.226144000   1.514365
## sex                0.025105719   2.704287
## data$income_decile 0.485600949   1.135598
## edss               0.839530909   2.348571
vif(linreg)
##                age                sex data$income_decile               edss 
##           1.218241           1.006664           1.030482           1.185031
data <- na.omit(data) #ignore NAs
linreg <- lm(data$duration_ms ~ data$age, data=data)

#residual plot


data$predicted <- predict(linreg)   # Save the predicted values
data$residuals <- residuals(linreg)
# Save the residual values
ggplot(data, aes(x = age, y = duration_ms)) +
  geom_smooth(method = "lm", se = FALSE, color = "lightgrey") +     # regression line  
  geom_segment(aes(xend = age, yend = predicted), alpha = .2) +      # draw line from point to line
  geom_point(aes(color = abs(residuals), size = abs(residuals))) +  # size of the points
  scale_color_continuous(low = "green", high = "red") +             # colour of the points mapped to residual size - green smaller, red larger
  guides(color = FALSE, size = FALSE) +                             # Size legend removed
  geom_point(aes(y = predicted), shape = 1) +
  theme_bw()
## `geom_smooth()` using formula 'y ~ x'

fit <- lapply(
  listvars,function(x)
    lm(formula(paste("data$vaccine_flu_2018_2019 ~", x)), 
  data = data))
lapply(fit, summary)
## [[1]]
## 
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)), 
##     data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8132  0.1723  0.1950  0.2157  0.2447 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  0.862884   0.253140   3.409    0.002 **
## dmt         -0.008274   0.031792  -0.260    0.797   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4135 on 28 degrees of freedom
## Multiple R-squared:  0.002413,   Adjusted R-squared:  -0.03321 
## F-statistic: 0.06774 on 1 and 28 DF,  p-value: 0.7966
## 
## 
## [[2]]
## 
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)), 
##     data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.85567  0.00405  0.08561  0.24545  0.48359 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 0.190199   0.300933   0.632   0.5325  
## age         0.013049   0.006261   2.084   0.0464 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3852 on 28 degrees of freedom
## Multiple R-squared:  0.1343, Adjusted R-squared:  0.1034 
## F-statistic: 4.343 on 1 and 28 DF,  p-value: 0.04639
## 
## 
## [[3]]
## 
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)), 
##     data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.01270  0.06946  0.09685  0.25432  0.42548 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.57452    0.14704   3.907 0.000539 ***
## edss         0.05477    0.03118   1.757 0.089901 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.393 on 28 degrees of freedom
## Multiple R-squared:  0.09927,    Adjusted R-squared:  0.0671 
## F-statistic: 3.086 on 1 and 28 DF,  p-value: 0.0899
## 
## 
## [[4]]
## 
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)), 
##     data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.82682  0.07263  0.17318  0.27374  0.27374 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.62570    0.16100   3.886  0.00057 ***
## type_ms      0.10056    0.08259   1.218  0.23357    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4035 on 28 degrees of freedom
## Multiple R-squared:  0.05028,    Adjusted R-squared:  0.01636 
## F-statistic: 1.482 on 1 and 28 DF,  p-value: 0.2336
## 
## 
## [[5]]
## 
## Call:
## lm(formula = formula(paste("data$vaccine_flu_2018_2019 ~", x)), 
##     data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##   -0.8    0.2    0.2    0.2    0.2 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.000e-01  1.309e-01    6.11 1.36e-06 ***
## sex         1.720e-16  1.604e-01    0.00        1    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.414 on 28 degrees of freedom
## Multiple R-squared:  9.784e-31,  Adjusted R-squared:  -0.03571 
## F-statistic: 2.739e-29 on 1 and 28 DF,  p-value: 1
newdata <- data %>%
  filter(sex == "Female") %>%
  group_by(dmt) %>%
  summarise(age_mean = mean(age, na.rm = TRUE))
newdata
## # A tibble: 0 x 2
## # … with 2 variables: dmt <dbl>, age_mean <dbl>
data$age_cat <- findInterval(data$age, c(0,10,20,40), rightmost.closed = TRUE)
ggplot(data, 
       aes(x = age_cat, 
           fill = as.factor(dmt))) + 
  geom_bar(position = "stack")