R Markdown

Descriptive statistics of baseline parameters. The primary parameters that are used in this analysis are diabetes and depressed.

NHS <-read.csv("C:/Users/Baha/Downloads/NHS.csv")
## descriptives of the parameters
summary(as.numeric(!is.na(NHS$Diabetes)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1
summary(as.numeric(!is.na(NHS$Depressed)))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1
# Calculate baseline prevalence of the depressed symptoms parameter
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
prevalence <- NHS %>%
  summarize(
    prevalence = mean(Depressed)
  )
# Print the result
print(prevalence)
##   prevalence
## 1  0.1891816

The proportion of the population in the data set who get depressed due to hypothyroidism is 18.9%

## Distribution of demographic characteristics
library(ggplot2)
ggplot(NHS, aes(x = Age)) +
  geom_histogram(binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Age", x = "Age", y = "Frequency")

##Distribution of gender
# Create a bar plot for gender distribution
ggplot(NHS, aes(x = Gender, fill = Gender)) +
  geom_bar() +
  labs(title = "Distribution of Gender", x = "Gender", y = "Count") +
  scale_fill_manual(values = c("male" = "skyblue", "female" = "pink"))
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
##   the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
##   variable into a factor?

## distribution of social economic variables
## distribution of income
NHS %>% 
  ggplot(aes(x = HHIncomeMid)) +
  geom_histogram(binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Household income", x = "Income", y = "Frequency")

attach(NHS)
# Fit logistic regression model
model <- glm(Diabetes ~ Depressed + Age + Gender, data = NHS, family = "binomial")
# Display the model summary
summary(model)
## 
## Call:
## glm(formula = Diabetes ~ Depressed + Age + Gender, family = "binomial", 
##     data = NHS)
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -5.656410   0.247999 -22.808  < 2e-16 ***
## Depressed    0.522361   0.105858   4.935 8.03e-07 ***
## Age          0.060941   0.003721  16.377  < 2e-16 ***
## Gender       0.304277   0.133285   2.283   0.0224 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2011.5  on 3567  degrees of freedom
## Residual deviance: 1604.4  on 3564  degrees of freedom
## AIC: 1612.4
## 
## Number of Fisher Scoring iterations: 6
# Fit logistic regression model with interaction terms
model_logit <- glm(Diabetes ~ Depressed + Age + Gender + Depressed * Age + Depressed * Gender, data = NHS, family = "binomial")
# Display the model summary
summary(model_logit)
## 
## Call:
## glm(formula = Diabetes ~ Depressed + Age + Gender + Depressed * 
##     Age + Depressed * Gender, family = "binomial", data = NHS)
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -5.711279   0.272125 -20.988   <2e-16 ***
## Depressed         0.716696   0.389046   1.842   0.0654 .  
## Age               0.061765   0.004102  15.058   <2e-16 ***
## Gender            0.316505   0.149893   2.112   0.0347 *  
## Depressed:Age    -0.003203   0.006419  -0.499   0.6178    
## Depressed:Gender -0.040985   0.213453  -0.192   0.8477    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2011.5  on 3567  degrees of freedom
## Residual deviance: 1604.2  on 3562  degrees of freedom
## AIC: 1616.2
## 
## Number of Fisher Scoring iterations: 6
## Sensitivity analysis
##Change model specifications
model_alt <- glm(Diabetes ~ Depressed + Age + Gender + Depressed*Age, data = NHS, family = "binomial")
summary(model_alt)
## 
## Call:
## glm(formula = Diabetes ~ Depressed + Age + Gender + Depressed * 
##     Age, family = "binomial", data = NHS)
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   -5.702434   0.267960 -21.281   <2e-16 ***
## Depressed      0.691588   0.367075   1.884   0.0596 .  
## Age            0.061732   0.004097  15.068   <2e-16 ***
## Gender         0.303325   0.133197   2.277   0.0228 *  
## Depressed:Age -0.003058   0.006378  -0.479   0.6316    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2011.5  on 3567  degrees of freedom
## Residual deviance: 1604.2  on 3563  degrees of freedom
## AIC: 1614.2
## 
## Number of Fisher Scoring iterations: 6