NHS <-read.csv("C:/Users/Baha/Downloads/NHS.csv")
## descriptives of the parameters
summary(as.numeric(!is.na(NHS$Diabetes)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
summary(as.numeric(!is.na(NHS$Depressed)))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
# Calculate baseline prevalence of the depressed symptoms parameter
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
prevalence <- NHS %>%
summarize(
prevalence = mean(Depressed)
)
# Print the result
print(prevalence)
## prevalence
## 1 0.1891816
The proportion of the population in the data set who get depressed due to hypothyroidism is 18.9%
## Distribution of demographic characteristics
library(ggplot2)
ggplot(NHS, aes(x = Age)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
labs(title = "Distribution of Age", x = "Age", y = "Frequency")
##Distribution of gender
# Create a bar plot for gender distribution
ggplot(NHS, aes(x = Gender, fill = Gender)) +
geom_bar() +
labs(title = "Distribution of Gender", x = "Gender", y = "Count") +
scale_fill_manual(values = c("male" = "skyblue", "female" = "pink"))
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## distribution of social economic variables
## distribution of income
NHS %>%
ggplot(aes(x = HHIncomeMid)) +
geom_histogram(binwidth = 5, fill = "skyblue", color = "black", alpha = 0.7) +
labs(title = "Distribution of Household income", x = "Income", y = "Frequency")
attach(NHS)
# Fit logistic regression model
model <- glm(Diabetes ~ Depressed + Age + Gender, data = NHS, family = "binomial")
# Display the model summary
summary(model)
##
## Call:
## glm(formula = Diabetes ~ Depressed + Age + Gender, family = "binomial",
## data = NHS)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.656410 0.247999 -22.808 < 2e-16 ***
## Depressed 0.522361 0.105858 4.935 8.03e-07 ***
## Age 0.060941 0.003721 16.377 < 2e-16 ***
## Gender 0.304277 0.133285 2.283 0.0224 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2011.5 on 3567 degrees of freedom
## Residual deviance: 1604.4 on 3564 degrees of freedom
## AIC: 1612.4
##
## Number of Fisher Scoring iterations: 6
# Fit logistic regression model with interaction terms
model_logit <- glm(Diabetes ~ Depressed + Age + Gender + Depressed * Age + Depressed * Gender, data = NHS, family = "binomial")
# Display the model summary
summary(model_logit)
##
## Call:
## glm(formula = Diabetes ~ Depressed + Age + Gender + Depressed *
## Age + Depressed * Gender, family = "binomial", data = NHS)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.711279 0.272125 -20.988 <2e-16 ***
## Depressed 0.716696 0.389046 1.842 0.0654 .
## Age 0.061765 0.004102 15.058 <2e-16 ***
## Gender 0.316505 0.149893 2.112 0.0347 *
## Depressed:Age -0.003203 0.006419 -0.499 0.6178
## Depressed:Gender -0.040985 0.213453 -0.192 0.8477
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2011.5 on 3567 degrees of freedom
## Residual deviance: 1604.2 on 3562 degrees of freedom
## AIC: 1616.2
##
## Number of Fisher Scoring iterations: 6
## Sensitivity analysis
##Change model specifications
model_alt <- glm(Diabetes ~ Depressed + Age + Gender + Depressed*Age, data = NHS, family = "binomial")
summary(model_alt)
##
## Call:
## glm(formula = Diabetes ~ Depressed + Age + Gender + Depressed *
## Age, family = "binomial", data = NHS)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.702434 0.267960 -21.281 <2e-16 ***
## Depressed 0.691588 0.367075 1.884 0.0596 .
## Age 0.061732 0.004097 15.068 <2e-16 ***
## Gender 0.303325 0.133197 2.277 0.0228 *
## Depressed:Age -0.003058 0.006378 -0.479 0.6316
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2011.5 on 3567 degrees of freedom
## Residual deviance: 1604.2 on 3563 degrees of freedom
## AIC: 1614.2
##
## Number of Fisher Scoring iterations: 6