knitr::opts_chunk$set(echo = TRUE)
rm(list=ls()) # clear workspace
cat("\014") # clear console
library(haven)
library(ggplot2)
library(plm)
## Warning: 程辑包'plm'是用R版本4.3.2 来建造的
library(psych)
## Warning: 程辑包'psych'是用R版本4.3.2 来建造的
##
## 载入程辑包:'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(gplots)
## Warning: 程辑包'gplots'是用R版本4.3.2 来建造的
##
## 载入程辑包:'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(car)
## Warning: 程辑包'car'是用R版本4.3.2 来建造的
## 载入需要的程辑包:carData
##
## 载入程辑包:'car'
## The following object is masked from 'package:psych':
##
## logit
library(dplyr)
##
## 载入程辑包:'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:plm':
##
## between, lag, lead
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(tidyr)
data source: VizHub - GBD Results (healthdata.org)
hlda <- read.csv("hldata.csv")
hda<- read_xlsx("zhdd.xlsx")
hd<-read.csv("jhdd.csv")
Merge data
hlda <- bind_rows(hlda, hda, hd)
data source: LAUS database(https://beta.bls.gov/dataQuery/find?fq=survey:%5Bla%5D&s=popularity:D)
mdata <- read_xlsx("michi_unem.xlsx")
wdata <- read_xlsx("wis_unem.xlsx")
mdata_filtered <- mdata %>%
filter(Period == 'Annual')
mdata_filtered <- mdata_filtered %>%
mutate(location_name = "Michigan")
wdata_filtered <- wdata %>%
filter(Period == 'Annual')
wdata_filtered <- wdata_filtered %>%
mutate(location_name= "Wisconsin")
combined_data <- bind_rows(mdata_filtered, wdata_filtered)
unique_combined_data <- distinct(combined_data)
hlda<- merge(hlda, unique_combined_data, by = c("location_name", "year"))
Summary data
summary(hlda)
## location_name year measure_id measure_name
## Length:17600 Min. :2000 Min. :1.000 Length:17600
## Class :character 1st Qu.:2005 1st Qu.:2.000 Class :character
## Mode :character Median :2010 Median :5.000 Mode :character
## Mean :2010 Mean :3.552
## 3rd Qu.:2014 3rd Qu.:5.000
## Max. :2019 Max. :6.000
## location_id sex_id sex_name age_id age_name
## Min. :545.0 Min. :3 Length:17600 Min. :22 Length:17600
## 1st Qu.:545.0 1st Qu.:3 Class :character 1st Qu.:22 Class :character
## Median :558.5 Median :3 Mode :character Median :22 Mode :character
## Mean :558.5 Mean :3 Mean :22
## 3rd Qu.:572.0 3rd Qu.:3 3rd Qu.:22
## Max. :572.0 Max. :3 Max. :22
## cause_id cause_name metric_id metric_name
## Min. : 492.0 Length:17600 Min. :1 Length:17600
## 1st Qu.: 515.8 Class :character 1st Qu.:1 Class :character
## Median : 535.0 Mode :character Median :2 Mode :character
## Mean : 583.8 Mean :2
## 3rd Qu.: 570.0 3rd Qu.:3
## Max. :1027.0 Max. :3
## val upper lower Period
## Min. : 0.0 Min. : 0.0 Min. : 0.0 Length:17600
## 1st Qu.: 0.0 1st Qu.: 0.0 1st Qu.: 0.0 Class :character
## Median : 140.8 Median : 168.3 Median : 103.8 Mode :character
## Mean : 20789.9 Mean : 23832.9 Mean : 18102.3
## 3rd Qu.: 3239.0 3rd Qu.: 3858.0 3rd Qu.: 2671.2
## Max. :1388806.1 Max. :1540001.5 Max. :1258338.9
## civilian noninstitutional population labor force participation rate
## Min. :4086373 Min. :60.10
## 1st Qu.:4431063 1st Qu.:62.35
## Median :6093415 Median :67.60
## Mean :6095620 Mean :66.52
## 3rd Qu.:7771971 3rd Qu.:69.75
## Max. :8049908 Max. :73.10
## employment-population ratio labor force employment
## Min. :53.90 Min. :2985643 Min. :2820620
## 1st Qu.:58.90 1st Qu.:3077936 1st Qu.:2885438
## Median :63.75 Median :3905368 Median :3606647
## Mean :62.47 Mean :3995593 Mean :3741855
## 3rd Qu.:65.88 3rd Qu.:4923034 3rd Qu.:4649855
## Max. :70.60 Max. :5156695 Max. :4968878
## unemployment unemployment_rate
## Min. : 93239 Min. : 3.000
## 1st Qu.:150467 1st Qu.: 4.550
## Median :219139 Median : 5.350
## Mean :253738 Mean : 6.160
## 3rd Qu.:345972 3rd Qu.: 7.125
## Max. :638574 Max. :13.100
Find is there have missing value
missing_count <- sum(is.na(hlda))
# Filter the data for Eating disorders
eating_disorders_df <- dplyr::filter(hlda, cause_name == "Eating disorders")
# Filter the data for Asthma
asthma_df <- dplyr::filter(hlda, cause_name == "Asthma")
# Filter the data for Interstitial lung disease and pulmonary sarcoidosis
interstitial_lung_disease_df <- dplyr::filter(hlda, cause_name == "Interstitial lung disease and pulmonary sarcoidosis")
#Filter the data for Other chronic respiratory diseases
other_respiratory_diseases_df <- dplyr::filter(hlda, cause_name == "Other chronic respiratory diseases")
# Filter the data for Ischemic heart disease
ischemic_heart_disease_df <- dplyr::filter(hlda, cause_name == "Ischemic heart disease")
# Filter the data for Alcohol use disorders
alcohol_use_disorders_df <- dplyr::filter(hlda, cause_name == "Alcohol use disorders")
Trend analysis - asthma
# For asthma_md5
asthma_md5 <- asthma_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 1)
# Create the line plot for Asthma with detailed data points
ggplot(asthma_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
geom_point() + # Display data points
geom_vline(xintercept = 2014, linetype = "dashed", color = "black") + # Support line at year = 2014
geom_text(aes(label = val), vjust = -0.5, hjust = -0.5, size = 3) + # Display data point values
labs(title = "Comparison of 'val' metric for Asthma through years (metric_id == 1)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
# For asthma_md6
asthma_md6 <- asthma_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 3)
# Create the line plot for Asthma with detailed data points
ggplot(asthma_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
geom_point() + # Display data points
geom_vline(xintercept = 2014, linetype = "dashed", color = "black") + # Support line at year = 2014
geom_text(aes(label = val), vjust = -0.5, hjust = -0.5, size = 3) + # Display data point values
labs(title = "Comparison of 'val' metric for Asthma through years (metric_id == 3)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
Difference analysis - asthma
# For asthma_md5
asthma_md5_diff <- asthma_md5 %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for asthma_md5
ggplot(asthma_md5_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For asthma_md6
asthma_md6_diff <- asthma_md6 %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]),
unedifference = mean(unemployment_rate[location_name == "Michigan"]) - mean(unemployment_rate[location_name == "Wisconsin"]))
# Plot the difference values over the years for asthma_md6
ggplot(asthma_md6_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
difference=β0+β1×time_t+β2×intervention_dummy+β3×time_t×intervention_dummy+ϵ
Where:
difference is the dependent variable (the difference).
time_t represents the time variable, which has been converted to a numeric factor.
intervention_dummy is a binary variable indicating whether the year is greater than or equal to 2014.
β0 is the intercept, representing the value of the dependent variable when all predictors are zero.
β1 is the coefficient for the effect of ‘time_t’ on the dependent variable.
β2 is the coefficient for the effect of ‘intervention_dummy’ on the dependent variable.
β3 is the coefficient for the interaction effect between ‘time_t’ and ‘intervention_dummy’.
ϵ is the error term, representing the variability in the dependent variable that is not explained by the predictors.
This formula describes how the ‘difference’ variable is predicted based on time, intervention status, and the interaction between time and intervention status, with each coefficient indicating the strength and direction of their respective effects.
# Fit a linear regression model using lm() with ‘difference’ as the dependent variable
# ‘time_t’ as the continuous predictor representing time
# ‘intervention_dummy’ as the binary predictor representing intervention period (after 2014)
# Interaction between ‘time_t’ and ‘intervention_dummy’ to capture the change in slope after 2014
# Convert the 'year' variable to a numeric factor and create a new variable 'time_t'
asthma_md6_diff$time_t <- as.numeric(factor(asthma_md6_diff$year, levels = unique(asthma_md6_diff$year)))
# Create a binary variable 'intervention_dummy' indicating whether the year is greater than or equal to 2014
asthma_md6_diff$intervention_dummy <- ifelse(asthma_md6_diff$year >= 2014, 1, 0)
#Modeling
ymodel <- lm(difference ~ time_t + intervention_dummy + time_t:intervention_dummy, data = asthma_md6_diff)
# Display the summary of the linear regression model
summary(ymodel)
##
## Call:
## lm(formula = difference ~ time_t + intervention_dummy + time_t:intervention_dummy,
## data = asthma_md6_diff)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.000 -7.008 2.670 8.646 27.333
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 43.935 8.545 5.142 9.84e-05 ***
## time_t 11.180 1.004 11.141 5.99e-09 ***
## intervention_dummy 205.609 64.191 3.203 0.00554 **
## time_t:intervention_dummy -13.656 3.755 -3.637 0.00222 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.14 on 16 degrees of freedom
## Multiple R-squared: 0.9369, Adjusted R-squared: 0.925
## F-statistic: 79.13 on 3 and 16 DF, p-value: 8.2e-10
val=β0+β1×torc+β2×intervention_dummy+β3×unemployment_rate+β4×year_factor+β5×torc×intervention_dummy+ϵ
Where:
val is the dependent variable (the ‘val’ metric).
torc is a binary variable indicating whether the location is Michigan.
intervention_dummy is a binary variable indicating whether the year is greater than or equal to 2014.
unemployment_rate is a continuous predictor representing the unemployment rate.
year_factor is a categorical predictor representing the year (as a factor variable).
β0 is the intercept, representing the expected value of ‘val’ when all predictors are zero.
β1 is the coefficient for the effect of ‘torc’ (Michigan location) on ‘val’.
β2 is the coefficient for the effect of ‘intervention_dummy’ (intervention period) on ‘val’.
β3 is the coefficient for the effect of ‘unemployment_rate’ on ‘val’.
β4 is the coefficient for the effect of ‘year_factor’ on ‘val’.
β5 is the coefficient for the interaction effect between ‘torc’ and ‘intervention_dummy’.
ϵ is the error term, representing the variability in ‘val’ that is not explained by the predictors.
# Fit a linear regression model using lm() with ‘val’ as the dependent variable
# ‘torc’ as the binary predictor indicating Michigan location
# ‘intervention_dummy’ as the binary predictor representing intervention period (after 2014)
# Interaction between ‘torc’ and ‘intervention_dummy’ to capture the differential effect of the intervention in Michigan
# ‘unemployment_rate’ as a continuous predictor
# ‘year’ as a factor variable
DiD - asthma
measure_id == 6(Incidence),
metric_id == 3(Rate)
# Create a binary variable 'torc' indicating whether the location is Michigan (1) or not (0)
asthma_md6$torc <- ifelse(asthma_md6$location_name == "Michigan", 1, 0)
# Create a binary variable 'intervention_dummy' indicating whether the year is greater than or equal to 2014
asthma_md6$intervention_dummy <- ifelse(asthma_md6$year >= 2014, 1, 0)
#Modeling
ymodel <- lm(val ~ torc * intervention_dummy + unemployment_rate + factor(year), data = asthma_md6)
# Display the summary of the linear regression model
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate +
## factor(year), data = asthma_md6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.649 -7.023 0.000 7.023 38.649
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 770.938 27.867 27.665 1.41e-15 ***
## torc 70.582 20.628 3.422 0.003252 **
## intervention_dummy 292.813 26.510 11.045 3.54e-09 ***
## unemployment_rate 27.056 8.711 3.106 0.006420 **
## factor(year)2001 -36.175 27.066 -1.337 0.198996
## factor(year)2002 -61.207 31.440 -1.947 0.068277 .
## factor(year)2003 -75.548 34.938 -2.162 0.045138 *
## factor(year)2004 -62.151 32.842 -1.892 0.075591 .
## factor(year)2005 -50.939 31.714 -1.606 0.126641
## factor(year)2006 -28.927 31.990 -0.904 0.378506
## factor(year)2007 16.040 32.555 0.493 0.628531
## factor(year)2008 57.797 36.197 1.597 0.128739
## factor(year)2009 -10.165 70.210 -0.145 0.886592
## factor(year)2010 44.118 64.534 0.684 0.503415
## factor(year)2011 109.802 52.304 2.099 0.051029 .
## factor(year)2012 159.188 45.901 3.468 0.002941 **
## factor(year)2013 190.766 44.077 4.328 0.000457 ***
## factor(year)2014 -95.098 33.426 -2.845 0.011191 *
## factor(year)2015 -49.951 26.887 -1.858 0.080606 .
## factor(year)2016 -46.388 25.552 -1.815 0.087140 .
## factor(year)2017 -42.047 24.722 -1.701 0.107205
## factor(year)2018 -22.860 24.588 -0.930 0.365518
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy 102.270 18.619 5.493 3.96e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 24.58 on 17 degrees of freedom
## Multiple R-squared: 0.99, Adjusted R-squared: 0.9771
## F-statistic: 76.48 on 22 and 17 DF, p-value: 5.869e-13
measure_id == 6(Incidence),
metric_id == 1(Number)
asthma_md5$torc <- ifelse(asthma_md5$location_name == "Michigan", 1, 0)
asthma_md5$intervention_dummy <- ifelse(asthma_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy +unemployment_rate+ factor(year), data = asthma_md5)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate +
## factor(year), data = asthma_md5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5832.0 -969.6 0.0 969.6 5832.0
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 33225.3 4216.5 7.880 4.48e-07 ***
## torc 45343.6 3121.2 14.528 5.14e-11 ***
## intervention_dummy 21852.6 4011.2 5.448 4.34e-05 ***
## unemployment_rate 3925.9 1318.0 2.979 0.00843 **
## factor(year)2001 -4936.9 4095.4 -1.205 0.24454
## factor(year)2002 -8399.1 4757.2 -1.766 0.09542 .
## factor(year)2003 -10411.6 5286.4 -1.970 0.06541 .
## factor(year)2004 -8550.8 4969.2 -1.721 0.10345
## factor(year)2005 -7159.6 4798.5 -1.492 0.15401
## factor(year)2006 -5310.5 4840.4 -1.097 0.28789
## factor(year)2007 -1644.4 4925.8 -0.334 0.74259
## factor(year)2008 891.4 5476.9 0.163 0.87263
## factor(year)2009 -12262.5 10623.4 -1.154 0.26435
## factor(year)2010 -6573.7 9764.5 -0.673 0.50985
## factor(year)2011 1454.3 7914.0 0.184 0.85637
## factor(year)2012 6941.7 6945.2 1.000 0.33157
## factor(year)2013 9940.8 6669.3 1.491 0.15440
## factor(year)2014 -11794.1 5057.6 -2.332 0.03226 *
## factor(year)2015 -5793.1 4068.3 -1.424 0.17255
## factor(year)2016 -4697.5 3866.3 -1.215 0.24098
## factor(year)2017 -3480.5 3740.7 -0.930 0.36517
## factor(year)2018 -1469.8 3720.3 -0.395 0.69770
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy 14213.8 2817.3 5.045 9.97e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3720 on 17 degrees of freedom
## Multiple R-squared: 0.9939, Adjusted R-squared: 0.986
## F-statistic: 125.5 on 22 and 17 DF, p-value: 9.316e-15
Trend analysis - Interstitial lung disease
# Filtered data for measure_id == 6 (Interstitial lung disease and pulmonary sarcoidosis)
interstitial_lung_disease_md5 <- interstitial_lung_disease_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 1)
# Create the line plot for Interstitial lung disease and pulmonary sarcoidosis measure_id == 5
ggplot(interstitial_lung_disease_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Interstitial lung disease and pulmonary sarcoidosis through years (metric_id == 1)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
# Filtered data for measure_id == 6 (Interstitial lung disease and pulmonary sarcoidosis)
interstitial_lung_disease_md6 <- interstitial_lung_disease_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 3)
# Create the line plot for Interstitial lung disease and pulmonary sarcoidosis measure_id == 6
ggplot(interstitial_lung_disease_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Interstitial lung disease and pulmonary sarcoidosis through years (metric_id == 3)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
Difference analysis - Interstitial lung disease
# For interstitial_lung_disease_md5
interstitial_lung_disease_md5_diff <- interstitial_lung_disease_md5 %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for interstitial_lung_disease_md5
ggplot(interstitial_lung_disease_md5_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For interstitial_lung_disease_md6
interstitial_lung_disease_md6_diff <- interstitial_lung_disease_md6 %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for interstitial_lung_disease_md6
ggplot(interstitial_lung_disease_md6_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
DiD - Interstitial lung disease
measure_id == 6(Incidence),
metric_id == 3(Rate)
interstitial_lung_disease_md6$torc <- ifelse(interstitial_lung_disease_md6$location_name == "Michigan", 1, 0)
interstitial_lung_disease_md6$intervention_dummy <- ifelse(interstitial_lung_disease_md6$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = interstitial_lung_disease_md6)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) +
## unemployment_rate, data = interstitial_lung_disease_md6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.72 -10.64 0.00 10.64 30.72
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 436.944 25.011 17.470 2.69e-12 ***
## torc 21.580 18.514 1.166 0.259856
## intervention_dummy 694.683 23.793 29.197 5.74e-16 ***
## factor(year)2001 -23.990 24.292 -0.988 0.337216
## factor(year)2002 -43.840 28.218 -1.554 0.138691
## factor(year)2003 -56.821 31.357 -1.812 0.087676 .
## factor(year)2004 -47.881 29.476 -1.624 0.122682
## factor(year)2005 -35.144 28.463 -1.235 0.233729
## factor(year)2006 22.887 28.712 0.797 0.436360
## factor(year)2007 147.250 29.218 5.040 0.000101 ***
## factor(year)2008 289.706 32.487 8.918 8.07e-08 ***
## factor(year)2009 339.471 63.014 5.387 4.91e-05 ***
## factor(year)2010 440.404 57.920 7.604 7.24e-07 ***
## factor(year)2011 511.217 46.943 10.890 4.37e-09 ***
## factor(year)2012 562.732 41.196 13.660 1.35e-10 ***
## factor(year)2013 600.131 39.560 15.170 2.59e-11 ***
## factor(year)2014 -87.561 30.000 -2.919 0.009575 **
## factor(year)2015 -23.055 24.131 -0.955 0.352773
## factor(year)2016 51.182 22.933 2.232 0.039379 *
## factor(year)2017 119.073 22.188 5.366 5.13e-05 ***
## factor(year)2018 102.762 22.068 4.657 0.000226 ***
## factor(year)2019 NA NA NA NA
## unemployment_rate 20.352 7.818 2.603 0.018554 *
## torc:intervention_dummy 108.807 16.711 6.511 5.33e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 22.06 on 17 degrees of freedom
## Multiple R-squared: 0.9982, Adjusted R-squared: 0.9959
## F-statistic: 435.8 on 22 and 17 DF, p-value: < 2.2e-16
measure_id == 6(Incidence),
metric_id == 1(Number)
interstitial_lung_disease_md5$torc <- ifelse(interstitial_lung_disease_md5$location_name == "Michigan", 1, 0)
interstitial_lung_disease_md5$intervention_dummy <- ifelse(interstitial_lung_disease_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = interstitial_lung_disease_md5)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) +
## unemployment_rate, data = interstitial_lung_disease_md5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10529 -2216 0 2216 10529
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1972.1 8089.3 0.244 0.81031
## torc 22413.5 5988.0 3.743 0.00162 **
## intervention_dummy 43254.9 7695.4 5.621 3.06e-05 ***
## factor(year)2001 -9556.9 7857.0 -1.216 0.24047
## factor(year)2002 -16697.8 9126.6 -1.830 0.08491 .
## factor(year)2003 -21205.8 10142.0 -2.091 0.05187 .
## factor(year)2004 -18281.5 9533.5 -1.918 0.07212 .
## factor(year)2005 -15987.3 9205.9 -1.737 0.10054
## factor(year)2006 -11645.5 9286.3 -1.254 0.22680
## factor(year)2007 -2358.8 9450.1 -0.250 0.80589
## factor(year)2008 5369.1 10507.3 0.511 0.61593
## factor(year)2009 -17680.8 20380.9 -0.868 0.39774
## factor(year)2010 -5405.7 18733.2 -0.289 0.77641
## factor(year)2011 9576.6 15183.0 0.631 0.53660
## factor(year)2012 18808.8 13324.3 1.412 0.17611
## factor(year)2013 23313.5 12794.9 1.822 0.08608 .
## factor(year)2014 -22313.5 9703.0 -2.300 0.03441 *
## factor(year)2015 -9015.4 7804.9 -1.155 0.26403
## factor(year)2016 -236.2 7417.5 -0.032 0.97497
## factor(year)2017 8272.3 7176.5 1.153 0.26499
## factor(year)2018 8890.1 7137.4 1.246 0.22982
## factor(year)2019 NA NA NA NA
## unemployment_rate 7625.8 2528.6 3.016 0.00779 **
## torc:intervention_dummy 32009.6 5404.9 5.922 1.67e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7136 on 17 degrees of freedom
## Multiple R-squared: 0.9838, Adjusted R-squared: 0.9627
## F-statistic: 46.81 on 22 and 17 DF, p-value: 3.413e-11
Trend analysis - Other chronic respiratory diseases
# Filter the data for measure_id == 2
other_respiratory_diseases_md1 <- other_respiratory_diseases_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 2,
metric_id == 1)
# Create the line plot for other_respiratory_diseases_md1
ggplot(other_respiratory_diseases_md1, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Other chronic respiratory diseases through years(metric_id == 1)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
# Filtered data for measure_id == 2 (Other chronic respiratory diseases)
other_respiratory_diseases_md2 <- other_respiratory_diseases_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 2,
metric_id == 3)
# Create the line plot for Other chronic respiratory diseases measure_id == 2
ggplot(other_respiratory_diseases_md2, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Other chronic respiratory diseases through years (metric_id == 3)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
Difference analysis - Other chronic respiratory diseases
# For other_respiratory_diseases_md1
other_respiratory_diseases_md1_diff <- other_respiratory_diseases_md1 %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for other_respiratory_diseases_md1
ggplot(other_respiratory_diseases_md1_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For other_respiratory_diseases_md2
other_respiratory_diseases_md2_diff <- other_respiratory_diseases_md2 %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for other_respiratory_diseases_md2
ggplot(other_respiratory_diseases_md2_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 2)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
DiD - Other chronic respiratory diseases
measure_id == 2(DALY),
metric_id == 1(Number)
other_respiratory_diseases_md2$torc <- ifelse(other_respiratory_diseases_md2$location_name == "Michigan", 1, 0)
other_respiratory_diseases_md2$intervention_dummy <- ifelse(other_respiratory_diseases_md2$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc + intervention_dummy + torc :intervention_dummy+factor(year)+unemployment_rate, data = other_respiratory_diseases_md2)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc + intervention_dummy + torc:intervention_dummy +
## factor(year) + unemployment_rate, data = other_respiratory_diseases_md2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27598 -0.09002 0.00000 0.09002 0.27598
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.24161 0.22606 111.661 < 2e-16 ***
## torc 1.51034 0.16734 9.026 6.80e-08 ***
## intervention_dummy 9.07727 0.21505 42.210 < 2e-16 ***
## factor(year)2001 0.39315 0.21956 1.791 0.091186 .
## factor(year)2002 1.00606 0.25504 3.945 0.001045 **
## factor(year)2003 1.25417 0.28342 4.425 0.000371 ***
## factor(year)2004 1.85323 0.26641 6.956 2.32e-06 ***
## factor(year)2005 3.11516 0.25726 12.109 8.76e-10 ***
## factor(year)2006 3.85157 0.25951 14.842 3.66e-11 ***
## factor(year)2007 4.81369 0.26408 18.228 1.36e-12 ***
## factor(year)2008 5.11066 0.29363 17.405 2.86e-12 ***
## factor(year)2009 3.97222 0.56955 6.974 2.24e-06 ***
## factor(year)2010 4.90640 0.52350 9.372 3.97e-08 ***
## factor(year)2011 6.07190 0.42429 14.311 6.52e-11 ***
## factor(year)2012 6.52057 0.37235 17.512 2.59e-12 ***
## factor(year)2013 7.26161 0.35756 20.309 2.33e-13 ***
## factor(year)2014 -0.78762 0.27115 -2.905 0.009863 **
## factor(year)2015 -0.02940 0.21811 -0.135 0.894368
## factor(year)2016 0.39881 0.20728 1.924 0.071263 .
## factor(year)2017 0.15203 0.20055 0.758 0.458795
## factor(year)2018 0.12162 0.19946 0.610 0.550095
## factor(year)2019 NA NA NA NA
## unemployment_rate 0.34346 0.07066 4.861 0.000147 ***
## torc:intervention_dummy -0.39950 0.15104 -2.645 0.017016 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1994 on 17 degrees of freedom
## Multiple R-squared: 0.9984, Adjusted R-squared: 0.9963
## F-statistic: 483.1 on 22 and 17 DF, p-value: < 2.2e-16
measure_id == 2(DALY),
metric_id == 3(Rate)
other_respiratory_diseases_md1$torc <- ifelse(other_respiratory_diseases_md1$location_name == "Michigan", 1, 0)
other_respiratory_diseases_md1$intervention_dummy <- ifelse(other_respiratory_diseases_md1$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc + intervention_dummy + torc :intervention_dummy+factor(year)+unemployment_rate, data = other_respiratory_diseases_md1)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc + intervention_dummy + torc:intervention_dummy +
## factor(year) + unemployment_rate, data = other_respiratory_diseases_md1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -40.71 -18.26 0.00 18.26 40.71
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1111.878 36.274 30.652 2.55e-16 ***
## torc 1384.045 26.851 51.545 < 2e-16 ***
## intervention_dummy 693.690 34.507 20.103 2.75e-13 ***
## factor(year)2001 -31.946 35.232 -0.907 0.377222
## factor(year)2002 -28.518 40.925 -0.697 0.495330
## factor(year)2003 -32.289 45.478 -0.710 0.487342
## factor(year)2004 43.413 42.750 1.016 0.324095
## factor(year)2005 156.348 41.281 3.787 0.001470 **
## factor(year)2006 218.305 41.641 5.243 6.62e-05 ***
## factor(year)2007 286.968 42.376 6.772 3.26e-06 ***
## factor(year)2008 280.942 47.116 5.963 1.54e-05 ***
## factor(year)2009 -48.139 91.391 -0.527 0.605185
## factor(year)2010 61.671 84.003 0.734 0.472856
## factor(year)2011 233.917 68.083 3.436 0.003154 **
## factor(year)2012 314.629 59.748 5.266 6.31e-05 ***
## factor(year)2013 385.627 57.375 6.721 3.59e-06 ***
## factor(year)2014 -196.425 43.510 -4.515 0.000306 ***
## factor(year)2015 -60.469 34.999 -1.728 0.102150
## factor(year)2016 -8.223 33.261 -0.247 0.807688
## factor(year)2017 -1.250 32.181 -0.039 0.969469
## factor(year)2018 14.054 32.005 0.439 0.666112
## factor(year)2019 NA NA NA NA
## unemployment_rate 81.724 11.338 7.208 1.46e-06 ***
## torc:intervention_dummy 66.066 24.236 2.726 0.014376 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 32 on 17 degrees of freedom
## Multiple R-squared: 0.9994, Adjusted R-squared: 0.9985
## F-statistic: 1190 on 22 and 17 DF, p-value: < 2.2e-16
Trend analysis - Eating disorders
# Filtered data for measure_id == 1
eating_disorders_md1 <- eating_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 3)
# Create the line plot for Eating disorders measure_id == 1
ggplot(eating_disorders_md1, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Eating disorders through years (Measure ID = 1)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
eating_disorders_md2 <- eating_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 1)
# Create the line plot for Eating disorders measure_id == 2
ggplot(eating_disorders_md2, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Eating disorders through years (Measure ID = 1)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
Difference analysis - Eating disorders
# For eating_disorders_md1
eating_disorders_md1_diff <- eating_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 1) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for eating_disorders_md1
ggplot(eating_disorders_md1_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For eating_disorders_md2
eating_disorders_md2_diff <- eating_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 3) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for eating_disorders_md2
ggplot(eating_disorders_md2_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
DiD - Eating disorders
measure_id == 1(Death),
metric_id == 1(Number)
eating_disorders_md1 $torc <- ifelse(eating_disorders_md1 $location_name == "Michigan", 1, 0)
eating_disorders_md1 $intervention_dummy <- ifelse(eating_disorders_md1 $year >= 2014, 1, 0)
ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = eating_disorders_md1 )
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) +
## unemployment_rate, data = eating_disorders_md1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.853e-04 -7.078e-05 0.000e+00 7.078e-05 1.853e-04
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.469e-02 1.621e-04 90.629 < 2e-16 ***
## torc -1.544e-03 1.200e-04 -12.870 3.42e-10 ***
## intervention_dummy 5.889e-04 1.542e-04 3.820 0.001370 **
## factor(year)2001 -1.176e-05 1.574e-04 -0.075 0.941321
## factor(year)2002 1.521e-03 1.828e-04 8.320 2.13e-07 ***
## factor(year)2003 1.853e-03 2.032e-04 9.119 5.88e-08 ***
## factor(year)2004 1.516e-03 1.910e-04 7.939 4.05e-07 ***
## factor(year)2005 7.885e-04 1.844e-04 4.276 0.000511 ***
## factor(year)2006 6.605e-04 1.860e-04 3.550 0.002460 **
## factor(year)2007 1.020e-03 1.893e-04 5.389 4.89e-05 ***
## factor(year)2008 9.317e-04 2.105e-04 4.426 0.000370 ***
## factor(year)2009 4.534e-04 4.083e-04 1.111 0.282230
## factor(year)2010 1.257e-03 3.753e-04 3.350 0.003794 **
## factor(year)2011 1.101e-03 3.042e-04 3.618 0.002123 **
## factor(year)2012 1.019e-03 2.669e-04 3.819 0.001373 **
## factor(year)2013 9.385e-04 2.563e-04 3.662 0.001933 **
## factor(year)2014 8.092e-04 1.944e-04 4.163 0.000651 ***
## factor(year)2015 6.017e-04 1.564e-04 3.849 0.001288 **
## factor(year)2016 6.688e-04 1.486e-04 4.501 0.000315 ***
## factor(year)2017 5.441e-04 1.438e-04 3.784 0.001480 **
## factor(year)2018 2.451e-04 1.430e-04 1.714 0.104686
## factor(year)2019 NA NA NA NA
## unemployment_rate 2.556e-05 5.065e-05 0.505 0.620333
## torc:intervention_dummy -7.679e-05 1.083e-04 -0.709 0.487816
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.000143 on 17 degrees of freedom
## Multiple R-squared: 0.9893, Adjusted R-squared: 0.9754
## F-statistic: 71.41 on 22 and 17 DF, p-value: 1.039e-12
measure_id == 1(Death),
metric_id == 3(Rate)
eating_disorders_md2 $torc <- ifelse(eating_disorders_md2 $location_name == "Michigan", 1, 0)
eating_disorders_md2 $intervention_dummy <- ifelse(eating_disorders_md2 $year >= 2014, 1, 0)
ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = eating_disorders_md2 )
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) +
## unemployment_rate, data = eating_disorders_md2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.022956 -0.007447 0.000000 0.007447 0.022956
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.798925 0.022524 35.469 < 2e-16 ***
## torc 0.548483 0.016673 32.896 < 2e-16 ***
## intervention_dummy 0.101363 0.021428 4.731 0.000193 ***
## factor(year)2001 0.011800 0.021878 0.539 0.596630
## factor(year)2002 0.134850 0.025413 5.306 5.80e-05 ***
## factor(year)2003 0.167366 0.028240 5.927 1.66e-05 ***
## factor(year)2004 0.142138 0.026546 5.354 5.26e-05 ***
## factor(year)2005 0.088482 0.025634 3.452 0.003046 **
## factor(year)2006 0.082430 0.025857 3.188 0.005387 **
## factor(year)2007 0.111439 0.026314 4.235 0.000558 ***
## factor(year)2008 0.112870 0.029257 3.858 0.001262 **
## factor(year)2009 0.112420 0.056750 1.981 0.064010 .
## factor(year)2010 0.171404 0.052162 3.286 0.004360 **
## factor(year)2011 0.146694 0.042277 3.470 0.002929 **
## factor(year)2012 0.134272 0.037101 3.619 0.002119 **
## factor(year)2013 0.125873 0.035627 3.533 0.002554 **
## factor(year)2014 0.084965 0.027018 3.145 0.005909 **
## factor(year)2015 0.061044 0.021733 2.809 0.012078 *
## factor(year)2016 0.059968 0.020654 2.903 0.009889 **
## factor(year)2017 0.045251 0.019983 2.264 0.036907 *
## factor(year)2018 0.019040 0.019874 0.958 0.351483
## factor(year)2019 NA NA NA NA
## unemployment_rate -0.005317 0.007041 -0.755 0.460457
## torc:intervention_dummy -0.073433 0.015050 -4.879 0.000141 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01987 on 17 degrees of freedom
## Multiple R-squared: 0.9976, Adjusted R-squared: 0.9944
## F-statistic: 315.4 on 22 and 17 DF, p-value: < 2.2e-16
Trend analysis - Ischemic heart disease
# Filtered data for measure_id == 6 (Ischemic heart disease)
ischemic_heart_disease_md5 <- ischemic_heart_disease_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 1)
# Create the line plot for Ischemic heart disease
ggplot(ischemic_heart_disease_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Ischemic heart disease through years (Measure ID = 6)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
# Filtered data for measure_id == 6 (Ischemic heart disease)
ischemic_heart_disease_md6 <- ischemic_heart_disease_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 3)
# Create the line plot for Ischemic heart disease measure_id == 6
ggplot(ischemic_heart_disease_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Ischemic heart disease through years (Measure ID = 6)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
Difference analysis - Ischemic heart disease
# For ischemic_heart_disease_md5
ischemic_heart_disease_md5_diff <- ischemic_heart_disease_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 1) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for ischemic_heart_disease_md5
ggplot(ischemic_heart_disease_md5_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For ischemic_heart_disease_md6
ischemic_heart_disease_md6_diff <- ischemic_heart_disease_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 6,
metric_id == 3) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for ischemic_heart_disease_md6
ggplot(ischemic_heart_disease_md6_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
DiD - Ischemic heart disease
measure_id == 6(Incidence),
metric_id == 1(Number)
ischemic_heart_disease_md5$torc <- ifelse(ischemic_heart_disease_md5$location_name == "Michigan", 1, 0)
ischemic_heart_disease_md5$intervention_dummy <- ifelse(ischemic_heart_disease_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy+ unemployment_rate + factor(year), data = ischemic_heart_disease_md5)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate +
## factor(year), data = ischemic_heart_disease_md5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -588.6 -230.4 0.0 230.4 588.6
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20391.84 560.59 36.375 < 2e-16 ***
## torc 22443.10 414.97 54.083 < 2e-16 ***
## intervention_dummy -6580.36 533.29 -12.339 6.56e-10 ***
## unemployment_rate 703.07 175.23 4.012 0.000903 ***
## factor(year)2001 -1372.19 544.49 -2.520 0.022021 *
## factor(year)2002 -2656.84 632.48 -4.201 0.000601 ***
## factor(year)2003 -3743.77 702.84 -5.327 5.57e-05 ***
## factor(year)2004 -4152.74 660.68 -6.286 8.21e-06 ***
## factor(year)2005 -4465.80 637.98 -7.000 2.14e-06 ***
## factor(year)2006 -4920.78 643.55 -7.646 6.72e-07 ***
## factor(year)2007 -5442.14 654.90 -8.310 2.17e-07 ***
## factor(year)2008 -6294.96 728.16 -8.645 1.25e-07 ***
## factor(year)2009 -9853.04 1412.41 -6.976 2.23e-06 ***
## factor(year)2010 -9558.63 1298.22 -7.363 1.11e-06 ***
## factor(year)2011 -8636.83 1052.19 -8.208 2.57e-07 ***
## factor(year)2012 -8213.23 923.38 -8.895 8.37e-08 ***
## factor(year)2013 -8205.06 886.70 -9.253 4.77e-08 ***
## factor(year)2014 -949.44 672.42 -1.412 0.176002
## factor(year)2015 -98.92 540.89 -0.183 0.857051
## factor(year)2016 119.82 514.04 0.233 0.818467
## factor(year)2017 363.24 497.34 0.730 0.475105
## factor(year)2018 409.81 494.63 0.829 0.418859
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy 287.10 374.56 0.766 0.453900
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 494.6 on 17 degrees of freedom
## Multiple R-squared: 0.9993, Adjusted R-squared: 0.9984
## F-statistic: 1082 on 22 and 17 DF, p-value: < 2.2e-16
measure_id == 6(Incidence)
metric_id == 3(Rate)
ischemic_heart_disease_md6$torc <- ifelse(ischemic_heart_disease_md6$location_name == "Michigan", 1, 0)
ischemic_heart_disease_md6$intervention_dummy <- ifelse(ischemic_heart_disease_md6$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy+ unemployment_rate + factor(year), data = ischemic_heart_disease_md6)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate +
## factor(year), data = ischemic_heart_disease_md6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.443 -5.166 0.000 5.166 23.443
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 333.745 17.125 19.489 4.57e-13 ***
## torc 37.325 12.676 2.944 0.009065 **
## intervention_dummy -133.544 16.291 -8.198 2.61e-07 ***
## unemployment_rate 25.419 5.353 4.749 0.000186 ***
## factor(year)2001 -41.550 16.633 -2.498 0.023039 *
## factor(year)2002 -75.986 19.321 -3.933 0.001072 **
## factor(year)2003 -102.048 21.470 -4.753 0.000184 ***
## factor(year)2004 -103.805 20.182 -5.143 8.13e-05 ***
## factor(year)2005 -106.819 19.489 -5.481 4.06e-05 ***
## factor(year)2006 -115.846 19.659 -5.893 1.77e-05 ***
## factor(year)2007 -126.549 20.006 -6.326 7.60e-06 ***
## factor(year)2008 -149.452 22.244 -6.719 3.60e-06 ***
## factor(year)2009 -270.560 43.146 -6.271 8.45e-06 ***
## factor(year)2010 -256.401 39.657 -6.465 5.82e-06 ***
## factor(year)2011 -219.655 32.142 -6.834 2.91e-06 ***
## factor(year)2012 -200.478 28.207 -7.107 1.76e-06 ***
## factor(year)2013 -196.236 27.086 -7.245 1.37e-06 ***
## factor(year)2014 -57.694 20.541 -2.809 0.012081 *
## factor(year)2015 -24.352 16.523 -1.474 0.158801
## factor(year)2016 -13.723 15.702 -0.874 0.394345
## factor(year)2017 -1.836 15.192 -0.121 0.905238
## factor(year)2018 5.188 15.110 0.343 0.735526
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy 59.059 11.442 5.162 7.82e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.11 on 17 degrees of freedom
## Multiple R-squared: 0.9749, Adjusted R-squared: 0.9423
## F-statistic: 29.96 on 22 and 17 DF, p-value: 1.291e-09
Trend analysis - Alcohol use disorders
# Filtered data for measure_id == 1 (Alcohol use disorders)
alcohol_use_disorders_md1 <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 3)
# Create the line plot for Alcohol use disorders measure_id == 1
ggplot(alcohol_use_disorders_md1, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 1)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
# Filtered data for measure_id == 1 (Alcohol use disorders)
alcohol_use_disorders_md2 <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 1)
# Create the line plot for Alcohol use disorders
ggplot(alcohol_use_disorders_md2, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 2)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
# Filtered data for measure_id == 2 (Alcohol use disorders)
alcohol_use_disorders_md5 <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 2,
metric_id == 3)
# Create the line plot for Alcohol use disorders measure_id == 2
ggplot(alcohol_use_disorders_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 5)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
# Filtered data for measure_id == 2 (Alcohol use disorders)
alcohol_use_disorders_md6 <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 2,
metric_id == 1)
# Create the line plot for Alcohol use disorders measure_id == 6
ggplot(alcohol_use_disorders_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 6)",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
Difference analysis - Alcohol use disorders
# For alcohol_use_disorders_md1
alcohol_use_disorders_md1_diff <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 3) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for alcohol_use_disorders_md1
ggplot(alcohol_use_disorders_md1_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For alcohol_use_disorders_md2
alcohol_use_disorders_md2_diff <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 1,
metric_id == 1) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for alcohol_use_disorders_md2
ggplot(alcohol_use_disorders_md2_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 2)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For alcohol_use_disorders_md5
alcohol_use_disorders_md5_diff <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 2,
metric_id == 3) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for alcohol_use_disorders_md5
ggplot(alcohol_use_disorders_md5_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For alcohol_use_disorders_md6
alcohol_use_disorders_md6_diff <- alcohol_use_disorders_df %>%
filter(location_name %in% c("Michigan", "Wisconsin"),
measure_id == 2,
metric_id == 1) %>%
group_by(year) %>%
summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))
# Plot the difference values over the years for alcohol_use_disorders_md6
ggplot(alcohol_use_disorders_md6_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
DiD - Alcohol use disorders
measure_id == 1(Death),
metric_id == 3(Rate)
alcohol_use_disorders_md1$torc <- ifelse(alcohol_use_disorders_md1$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md1$intervention_dummy <- ifelse(alcohol_use_disorders_md1$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md1)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year),
## data = alcohol_use_disorders_md1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.07100 -0.02373 0.00000 0.02373 0.07100
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.48168 0.03876 89.826 < 2e-16 ***
## torc -0.58067 0.02002 -29.011 < 2e-16 ***
## intervention_dummy 1.64138 0.05602 29.300 < 2e-16 ***
## factor(year)2001 0.14331 0.05296 2.706 0.014463 *
## factor(year)2002 0.25367 0.05296 4.790 0.000147 ***
## factor(year)2003 0.31741 0.05296 5.994 1.14e-05 ***
## factor(year)2004 0.37269 0.05296 7.038 1.45e-06 ***
## factor(year)2005 0.50455 0.05296 9.528 1.87e-08 ***
## factor(year)2006 0.55431 0.05296 10.467 4.40e-09 ***
## factor(year)2007 0.66532 0.05296 12.564 2.40e-10 ***
## factor(year)2008 0.73669 0.05296 13.911 4.52e-11 ***
## factor(year)2009 0.82080 0.05296 15.499 7.44e-12 ***
## factor(year)2010 0.97771 0.05296 18.463 3.81e-13 ***
## factor(year)2011 1.11023 0.05296 20.965 4.26e-14 ***
## factor(year)2012 1.19162 0.05296 22.502 1.25e-14 ***
## factor(year)2013 1.35840 0.05296 25.651 1.26e-15 ***
## factor(year)2014 -0.02394 0.05296 -0.452 0.656597
## factor(year)2015 0.11402 0.05296 2.153 0.045118 *
## factor(year)2016 0.24878 0.05296 4.698 0.000179 ***
## factor(year)2017 0.16494 0.05296 3.115 0.005984 **
## factor(year)2018 0.08728 0.05296 1.648 0.116684
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy -0.26363 0.03654 -7.214 1.03e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.05296 on 18 degrees of freedom
## Multiple R-squared: 0.997, Adjusted R-squared: 0.9935
## F-statistic: 285.8 on 21 and 18 DF, p-value: < 2.2e-16
measure_id == 1(Death),
metric_id == 1(Number)
alcohol_use_disorders_md2$torc <- ifelse(alcohol_use_disorders_md2$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md2$intervention_dummy <- ifelse(alcohol_use_disorders_md2$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md2)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year),
## data = alcohol_use_disorders_md2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.336 -4.206 0.000 4.206 10.336
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 176.0832 6.4565 27.272 4.30e-16 ***
## torc 121.0568 3.3341 36.309 < 2e-16 ***
## intervention_dummy 120.2046 9.3315 12.882 1.60e-10 ***
## factor(year)2001 11.5136 8.8212 1.305 0.20826
## factor(year)2002 20.9308 8.8212 2.373 0.02900 *
## factor(year)2003 26.5242 8.8212 3.007 0.00757 **
## factor(year)2004 31.8001 8.8212 3.605 0.00202 **
## factor(year)2005 42.1669 8.8212 4.780 0.00015 ***
## factor(year)2006 47.3682 8.8212 5.370 4.20e-05 ***
## factor(year)2007 56.0065 8.8212 6.349 5.56e-06 ***
## factor(year)2008 63.2615 8.8212 7.172 1.12e-06 ***
## factor(year)2009 70.9364 8.8212 8.042 2.28e-07 ***
## factor(year)2010 83.5093 8.8212 9.467 2.06e-08 ***
## factor(year)2011 93.4474 8.8212 10.593 3.65e-09 ***
## factor(year)2012 98.8016 8.8212 11.200 1.52e-09 ***
## factor(year)2013 110.9533 8.8212 12.578 2.36e-10 ***
## factor(year)2014 -0.4215 8.8212 -0.048 0.96242
## factor(year)2015 10.1948 8.8212 1.156 0.26291
## factor(year)2016 20.1592 8.8212 2.285 0.03464 *
## factor(year)2017 13.3461 8.8212 1.513 0.14765
## factor(year)2018 7.0281 8.8212 0.797 0.43599
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy 1.8569 6.0872 0.305 0.76383
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.821 on 18 degrees of freedom
## Multiple R-squared: 0.9939, Adjusted R-squared: 0.9867
## F-statistic: 138.9 on 21 and 18 DF, p-value: 7.866e-16
measure_id == 2(DALY),
metric_id == 3(Rate)
alcohol_use_disorders_md5$torc <- ifelse(alcohol_use_disorders_md5$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md5$intervention_dummy <- ifelse(alcohol_use_disorders_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md5)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year),
## data = alcohol_use_disorders_md5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9415 -0.9172 0.0000 0.9172 2.9415
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 368.9525 1.5283 241.421 < 2e-16 ***
## torc -12.6790 0.7892 -16.066 4.06e-12 ***
## intervention_dummy 49.9864 2.2088 22.631 1.13e-14 ***
## factor(year)2001 6.3210 2.0880 3.027 0.007242 **
## factor(year)2002 12.0451 2.0880 5.769 1.82e-05 ***
## factor(year)2003 15.8617 2.0880 7.597 5.08e-07 ***
## factor(year)2004 18.9949 2.0880 9.097 3.75e-08 ***
## factor(year)2005 24.6329 2.0880 11.797 6.63e-10 ***
## factor(year)2006 26.6471 2.0880 12.762 1.86e-10 ***
## factor(year)2007 30.2521 2.0880 14.489 2.30e-11 ***
## factor(year)2008 32.0852 2.0880 15.367 8.60e-12 ***
## factor(year)2009 34.5255 2.0880 16.535 2.50e-12 ***
## factor(year)2010 39.6908 2.0880 19.009 2.31e-13 ***
## factor(year)2011 44.1026 2.0880 21.122 3.75e-14 ***
## factor(year)2012 46.1160 2.0880 22.086 1.73e-14 ***
## factor(year)2013 51.0898 2.0880 24.468 2.89e-15 ***
## factor(year)2014 8.2103 2.0880 3.932 0.000977 ***
## factor(year)2015 11.1097 2.0880 5.321 4.66e-05 ***
## factor(year)2016 12.7107 2.0880 6.088 9.43e-06 ***
## factor(year)2017 6.4916 2.0880 3.109 0.006059 **
## factor(year)2018 2.4146 2.0880 1.156 0.262620
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy -7.8084 1.4408 -5.419 3.78e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.088 on 18 degrees of freedom
## Multiple R-squared: 0.9945, Adjusted R-squared: 0.9881
## F-statistic: 155.7 on 21 and 18 DF, p-value: 2.836e-16
measure_id == 2(DALY),
metric_id == 1(Number)
alcohol_use_disorders_md6$torc <- ifelse(alcohol_use_disorders_md6$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md6$intervention_dummy <- ifelse(alcohol_use_disorders_md6$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md6)
summary(ymodel)
##
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year),
## data = alcohol_use_disorders_md6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -403.4 -122.5 0.0 122.5 403.4
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19546.7 202.6 96.497 < 2e-16 ***
## torc 15938.1 104.6 152.367 < 2e-16 ***
## intervention_dummy 4725.1 292.8 16.140 3.76e-12 ***
## factor(year)2001 569.8 276.8 2.059 0.054271 .
## factor(year)2002 1088.8 276.8 3.934 0.000972 ***
## factor(year)2003 1458.2 276.8 5.269 5.20e-05 ***
## factor(year)2004 1779.0 276.8 6.428 4.75e-06 ***
## factor(year)2005 2258.9 276.8 8.162 1.84e-07 ***
## factor(year)2006 2505.5 276.8 9.053 4.03e-08 ***
## factor(year)2007 2818.6 276.8 10.185 6.73e-09 ***
## factor(year)2008 3055.0 276.8 11.039 1.91e-09 ***
## factor(year)2009 3318.9 276.8 11.992 5.10e-10 ***
## factor(year)2010 3748.6 276.8 13.545 7.02e-11 ***
## factor(year)2011 4090.4 276.8 14.780 1.65e-11 ***
## factor(year)2012 4213.3 276.8 15.224 1.01e-11 ***
## factor(year)2013 4581.7 276.8 16.555 2.45e-12 ***
## factor(year)2014 703.3 276.8 2.541 0.020467 *
## factor(year)2015 936.3 276.8 3.383 0.003312 **
## factor(year)2016 1040.7 276.8 3.760 0.001433 **
## factor(year)2017 533.0 276.8 1.926 0.070055 .
## factor(year)2018 202.1 276.8 0.730 0.474673
## factor(year)2019 NA NA NA NA
## torc:intervention_dummy -1190.0 191.0 -6.231 7.04e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 276.8 on 18 degrees of freedom
## Multiple R-squared: 0.9995, Adjusted R-squared: 0.9988
## F-statistic: 1569 on 21 and 18 DF, p-value: < 2.2e-16
Load data : health data (sex analysis)
hbdata <- read.csv("hdd.csv")
Select the disease and measurement we want to focus on
hbdata_md1 <- hbdata %>%
filter(location_name %in% c("Michigan"),
measure_id == 2,
metric_id == 3)
hbdata_md2 <- hbdata %>%
filter(location_name %in% c("Michigan"),
measure_id == 2,
metric_id == 1)
hbdata_md3 <- hbdata %>%
filter(location_name %in% c("Michigan"),
measure_id == 5,
metric_id == 3)
hbdata_md4 <- hbdata %>%
filter(location_name %in% c("Michigan"),
measure_id == 5,
metric_id == 1)
hbdata_md5 <- hbdata %>%
filter(location_name %in% c("Michigan"),
measure_id == 6,
metric_id == 3)
hbdata_md6 <- hbdata %>%
filter(location_name %in% c("Michigan"),
measure_id == 6,
metric_id == 1)
Trend analysis - Total burden related to hepatitis B
ggplot(hbdata_md1, aes(x = year, y = val, color = sex_name, group = sex_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for BB through years",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
ggplot(hbdata_md2, aes(x = year, y = val, color = sex_name, group = sex_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for BB through years",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
ggplot(hbdata_md3, aes(x = year, y = val, color = sex_name, group = sex_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for BB through years",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
ggplot(hbdata_md4, aes(x = year, y = val, color = sex_name, group = sex_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for BB through years",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
ggplot(hbdata_md5, aes(x = year, y = val, color = sex_name, group = sex_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for BB through years",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
ggplot(hbdata_md6, aes(x = year, y = val, color = sex_name, group = sex_name)) +
geom_line() +
labs(title = "Comparison of 'val' metric for BB through years",
x = "Year",
y = "Value",
color = "Location") +
theme_bw()
Difference analysis - Total burden related to hepatitis B
hbdata_md1_diff <- hbdata_md1 %>%
group_by(year) %>%
summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))
# Plot the difference values over the years for hbdata_md1
ggplot(hbdata_md1_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
hbdata_md2_diff <- hbdata_md2 %>%
group_by(year) %>%
summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))
# Plot the difference values over the years for hbdata_md1
ggplot(hbdata_md2_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For hbdata_md3
hbdata_md3_diff <- hbdata_md3 %>%
group_by(year) %>%
summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))
# Plot the difference values over the years for hbdata_md3
ggplot(hbdata_md3_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 3)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For hbdata_md4
hbdata_md4_diff <- hbdata_md4 %>%
group_by(year) %>%
summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))
# Plot the difference values over the years for hbdata_md4
ggplot(hbdata_md4_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 4)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For hbdata_md5
hbdata_md5_diff <- hbdata_md5 %>%
group_by(year) %>%
summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))
# Plot the difference values over the years for hbdata_md5
ggplot(hbdata_md5_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
# For hbdata_md6
hbdata_md6_diff <- hbdata_md6 %>%
group_by(year) %>%
summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))
# Plot the difference values over the years for hbdata_md6
ggplot(hbdata_md6_diff, aes(x = year, y = difference)) +
geom_line() +
geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +
labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
x = "Year",
y = "Difference",
color = "Location") +
theme_bw()
val=β0+β1×sex+β2×intervention_dummy+β3×year_factor+β4×(sex×intervention_dummy)+ϵ
Where:
val is the dependent variable (the ‘val’ metric).
sex is a binary variable indicating whether the individual is Male.
intervention_dummy is a binary variable indicating whether the year is after 2014 (intervention period).
year_factor is a categorical predictor representing the year (as a factor variable).
β0 is the intercept, representing the expected value of ‘val’ when all predictors are zero.
β1 is the coefficient for the effect of ‘sex’ on ‘val’.
β2 is the coefficient for the effect of ‘intervention_dummy’ on ‘val’.
β3 is the coefficient for the effect of ‘year_factor’ on ‘val’.
β4 is the coefficient for the interaction effect between ‘sex’ and ‘intervention_dummy’.
ϵ is the error term, representing the variability in ‘val’ that is not explained by the predictors.
# Fit a linear regression model using lm() with ‘val’ as the dependent variable
# ‘sex’ as the binary predictor representing Male
# ‘intervention_dummy’ as the binary predictor representing intervention period (after 2014)
# Interaction between ‘sex’ and ‘intervention_dummy’ to capture the differential effect of intervention for Males
# ‘year’ as a factor variable
DiD - Total burden related to hepatitis B- sex
measure_id == 2(DALY),
metric_id == 3(Rate)
# Create a binary variable 'sex' indicating whether the sex is Male (1) or not (0)
hbdata_md1$sex <- ifelse(hbdata_md1$sex_name == "Male", 1, 0)
# Create a binary variable 'intervention_dummy' indicating whether the year is greater than or equal to 2014
hbdata_md1$intervention_dummy <- ifelse(hbdata_md1$year >= 2014, 1, 0)
#Modeling
ymodel <- lm(val ~ sex * intervention_dummy + factor(year), data = hbdata_md1)
# Display the summary of the linear regression model
summary(ymodel)
##
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4388 -0.1772 0.0000 0.1772 0.4388
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.34110 0.26948 53.218 < 2e-16 ***
## sex 22.85500 0.13916 164.238 < 2e-16 ***
## intervention_dummy -1.05045 0.38948 -2.697 0.0147 *
## factor(year)2001 -0.06825 0.36818 -0.185 0.8550
## factor(year)2002 0.06340 0.36818 0.172 0.8652
## factor(year)2003 -0.67466 0.36818 -1.832 0.0835 .
## factor(year)2004 -0.94794 0.36818 -2.575 0.0191 *
## factor(year)2005 -0.47071 0.36818 -1.278 0.2173
## factor(year)2006 -0.47206 0.36818 -1.282 0.2161
## factor(year)2007 -0.59184 0.36818 -1.607 0.1253
## factor(year)2008 -0.50993 0.36818 -1.385 0.1830
## factor(year)2009 -0.66234 0.36818 -1.799 0.0888 .
## factor(year)2010 -0.09127 0.36818 -0.248 0.8070
## factor(year)2011 0.20625 0.36818 0.560 0.5823
## factor(year)2012 0.11561 0.36818 0.314 0.7571
## factor(year)2013 0.98474 0.36818 2.675 0.0155 *
## factor(year)2014 2.65186 0.36818 7.203 1.06e-06 ***
## factor(year)2015 3.10777 0.36818 8.441 1.13e-07 ***
## factor(year)2016 1.94611 0.36818 5.286 5.02e-05 ***
## factor(year)2017 -0.03174 0.36818 -0.086 0.9323
## factor(year)2018 -0.09007 0.36818 -0.245 0.8095
## factor(year)2019 NA NA NA NA
## sex:intervention_dummy -0.09975 0.25407 -0.393 0.6992
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3682 on 18 degrees of freedom
## Multiple R-squared: 0.9995, Adjusted R-squared: 0.999
## F-statistic: 1841 on 21 and 18 DF, p-value: < 2.2e-16
measure_id == 2(DALY),
metric_id == 1(Number)
hbdata_md2$sex <- ifelse(hbdata_md2$sex_name == "Male", 1, 0)
hbdata_md2$intervention_dummy <- ifelse(hbdata_md2$year >= 2014, 1, 0)
ymodel <- lm(val ~ sex*intervention_dummy+factor(year), data = hbdata_md2)
summary(ymodel)
##
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.980 -9.126 0.000 9.126 21.980
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 721.09444 13.63350 52.891 < 2e-16 ***
## sex 1088.39547 7.04031 154.595 < 2e-16 ***
## intervention_dummy -65.57082 19.70448 -3.328 0.00374 **
## factor(year)2001 -0.10306 18.62691 -0.006 0.99565
## factor(year)2002 8.29753 18.62691 0.445 0.66130
## factor(year)2003 -27.05767 18.62691 -1.453 0.16354
## factor(year)2004 -40.04614 18.62691 -2.150 0.04541 *
## factor(year)2005 -16.53151 18.62691 -0.888 0.38651
## factor(year)2006 -17.45172 18.62691 -0.937 0.36121
## factor(year)2007 -24.25653 18.62691 -1.302 0.20926
## factor(year)2008 -21.64332 18.62691 -1.162 0.26044
## factor(year)2009 -30.99888 18.62691 -1.664 0.11338
## factor(year)2010 -5.25081 18.62691 -0.282 0.78124
## factor(year)2011 6.77967 18.62691 0.364 0.72012
## factor(year)2012 0.01649 18.62691 0.001 0.99930
## factor(year)2013 40.08647 18.62691 2.152 0.04521 *
## factor(year)2014 143.71347 18.62691 7.715 4.09e-07 ***
## factor(year)2015 163.47784 18.62691 8.776 6.39e-08 ***
## factor(year)2016 103.93741 18.62691 5.580 2.69e-05 ***
## factor(year)2017 4.83927 18.62691 0.260 0.79797
## factor(year)2018 -1.13131 18.62691 -0.061 0.95224
## factor(year)2019 NA NA NA NA
## sex:intervention_dummy -25.55646 12.85379 -1.988 0.06221 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.63 on 18 degrees of freedom
## Multiple R-squared: 0.9995, Adjusted R-squared: 0.9988
## F-statistic: 1613 on 21 and 18 DF, p-value: < 2.2e-16
measure_id == 5(Prevalence),
metric_id == 3(Rate)
# For hbdata_md3
hbdata_md3$sex <- ifelse(hbdata_md3$sex_name == "Male", 1, 0)
hbdata_md3$intervention_dummy <- ifelse(hbdata_md3$year >= 2014, 1, 0)
ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md3)
summary(ymodel)
##
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -22.941 -9.433 0.000 9.433 22.941
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 304.117 14.318 21.240 3.4e-14 ***
## sex 285.494 7.394 38.613 < 2e-16 ***
## intervention_dummy -96.556 20.694 -4.666 0.000192 ***
## factor(year)2001 -2.966 19.562 -0.152 0.881154
## factor(year)2002 -8.025 19.562 -0.410 0.686461
## factor(year)2003 -13.882 19.562 -0.710 0.487015
## factor(year)2004 -19.076 19.562 -0.975 0.342398
## factor(year)2005 -22.245 19.562 -1.137 0.270376
## factor(year)2006 -27.823 19.562 -1.422 0.172035
## factor(year)2007 -38.743 19.562 -1.981 0.063134 .
## factor(year)2008 -51.524 19.562 -2.634 0.016852 *
## factor(year)2009 -62.611 19.562 -3.201 0.004955 **
## factor(year)2010 -68.501 19.562 -3.502 0.002547 **
## factor(year)2011 -72.588 19.562 -3.711 0.001600 **
## factor(year)2012 -79.361 19.562 -4.057 0.000740 ***
## factor(year)2013 -87.150 19.562 -4.455 0.000306 ***
## factor(year)2014 29.507 19.562 1.508 0.148803
## factor(year)2015 24.948 19.562 1.275 0.218408
## factor(year)2016 14.151 19.562 0.723 0.478746
## factor(year)2017 3.664 19.562 0.187 0.853534
## factor(year)2018 1.483 19.562 0.076 0.940411
## factor(year)2019 NA NA NA NA
## sex:intervention_dummy -54.374 13.499 -4.028 0.000789 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.56 on 18 degrees of freedom
## Multiple R-squared: 0.9915, Adjusted R-squared: 0.9815
## F-statistic: 99.49 on 21 and 18 DF, p-value: 1.517e-14
measure_id == 5(Prevalence),
metric_id == 1(Number)
# For hbdata_md4
hbdata_md4$sex <- ifelse(hbdata_md4$sex_name == "Male", 1, 0)
hbdata_md4$intervention_dummy <- ifelse(hbdata_md4$year >= 2014, 1, 0)
ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md4)
summary(ymodel)
##
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1072 -490 0 490 1072
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15254.02 704.89 21.640 2.46e-14 ***
## sex 13446.72 364.00 36.941 < 2e-16 ***
## intervention_dummy -4997.23 1018.77 -4.905 0.000114 ***
## factor(year)2001 -93.32 963.06 -0.097 0.923881
## factor(year)2002 -308.95 963.06 -0.321 0.752055
## factor(year)2003 -577.34 963.06 -0.599 0.556325
## factor(year)2004 -823.64 963.06 -0.855 0.403662
## factor(year)2005 -982.85 963.06 -1.021 0.320989
## factor(year)2006 -1268.92 963.06 -1.318 0.204168
## factor(year)2007 -1823.31 963.06 -1.893 0.074516 .
## factor(year)2008 -2476.08 963.06 -2.571 0.019233 *
## factor(year)2009 -3050.88 963.06 -3.168 0.005325 **
## factor(year)2010 -3374.99 963.06 -3.504 0.002532 **
## factor(year)2011 -3613.01 963.06 -3.752 0.001461 **
## factor(year)2012 -3981.40 963.06 -4.134 0.000623 ***
## factor(year)2013 -4397.78 963.06 -4.566 0.000239 ***
## factor(year)2014 1612.01 963.06 1.674 0.111450
## factor(year)2015 1356.65 963.06 1.409 0.175967
## factor(year)2016 797.48 963.06 0.828 0.418479
## factor(year)2017 252.35 963.06 0.262 0.796271
## factor(year)2018 109.22 963.06 0.113 0.910964
## factor(year)2019 NA NA NA NA
## sex:intervention_dummy -2785.55 664.58 -4.191 0.000549 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 963.1 on 18 degrees of freedom
## Multiple R-squared: 0.9908, Adjusted R-squared: 0.98
## F-statistic: 92.09 on 21 and 18 DF, p-value: 3.003e-14
measure_id == 6(Incidence),
metric_id == 3(Rate)
# For hbdata_md5
hbdata_md5$sex <- ifelse(hbdata_md5$sex_name == "Male", 1, 0)
hbdata_md5$intervention_dummy <- ifelse(hbdata_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md5)
summary(ymodel)
##
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md5)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.093 -3.536 0.000 3.536 6.093
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.9728 4.4038 15.208 1.02e-11 ***
## sex 56.6121 2.2741 24.894 2.14e-15 ***
## intervention_dummy -34.5190 6.3649 -5.423 3.75e-05 ***
## factor(year)2001 -1.5912 6.0168 -0.264 0.794434
## factor(year)2002 -3.9425 6.0168 -0.655 0.520597
## factor(year)2003 -6.5535 6.0168 -1.089 0.290432
## factor(year)2004 -8.8915 6.0168 -1.478 0.156749
## factor(year)2005 -10.4929 6.0168 -1.744 0.098223 .
## factor(year)2006 -12.5781 6.0168 -2.090 0.051031 .
## factor(year)2007 -16.1239 6.0168 -2.680 0.015293 *
## factor(year)2008 -20.1571 6.0168 -3.350 0.003564 **
## factor(year)2009 -23.6987 6.0168 -3.939 0.000963 ***
## factor(year)2010 -25.8136 6.0168 -4.290 0.000441 ***
## factor(year)2011 -27.2333 6.0168 -4.526 0.000261 ***
## factor(year)2012 -29.1726 6.0168 -4.849 0.000129 ***
## factor(year)2013 -31.3383 6.0168 -5.208 5.93e-05 ***
## factor(year)2014 9.7734 6.0168 1.624 0.121683
## factor(year)2015 8.0636 6.0168 1.340 0.196858
## factor(year)2016 4.1145 6.0168 0.684 0.502792
## factor(year)2017 0.3563 6.0168 0.059 0.953428
## factor(year)2018 -0.3195 6.0168 -0.053 0.958241
## factor(year)2019 NA NA NA NA
## sex:intervention_dummy -17.3500 4.1520 -4.179 0.000564 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.017 on 18 degrees of freedom
## Multiple R-squared: 0.9818, Adjusted R-squared: 0.9605
## F-statistic: 46.14 on 21 and 18 DF, p-value: 1.286e-11
measure_id == 6(Incidence),
metric_id == 1(Number)
# For hbdata_md6
hbdata_md6$sex <- ifelse(hbdata_md6$sex_name == "Male", 1, 0)
hbdata_md6$intervention_dummy <- ifelse(hbdata_md6$year >= 2014, 1, 0)
ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md6)
summary(ymodel)
##
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md6)
##
## Residuals:
## Min 1Q Median 3Q Max
## -299.1 -177.8 0.0 177.8 299.1
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3352.901 213.903 15.675 6.16e-12 ***
## sex 2668.770 110.459 24.161 3.61e-15 ***
## intervention_dummy -1747.163 309.154 -5.651 2.32e-05 ***
## factor(year)2001 -67.672 292.247 -0.232 0.819493
## factor(year)2002 -177.315 292.247 -0.607 0.551606
## factor(year)2003 -302.749 292.247 -1.036 0.313950
## factor(year)2004 -416.807 292.247 -1.426 0.170920
## factor(year)2005 -496.751 292.247 -1.700 0.106390
## factor(year)2006 -601.742 292.247 -2.059 0.054262 .
## factor(year)2007 -779.436 292.247 -2.667 0.015712 *
## factor(year)2008 -982.199 292.247 -3.361 0.003480 **
## factor(year)2009 -1161.611 292.247 -3.975 0.000888 ***
## factor(year)2010 -1271.710 292.247 -4.351 0.000385 ***
## factor(year)2011 -1348.193 292.247 -4.613 0.000216 ***
## factor(year)2012 -1449.925 292.247 -4.961 0.000101 ***
## factor(year)2013 -1562.401 292.247 -5.346 4.42e-05 ***
## factor(year)2014 504.270 292.247 1.725 0.101565
## factor(year)2015 414.899 292.247 1.420 0.172787
## factor(year)2016 216.962 292.247 0.742 0.467428
## factor(year)2017 28.701 292.247 0.098 0.922853
## factor(year)2018 -9.826 292.247 -0.034 0.973548
## factor(year)2019 NA NA NA NA
## sex:intervention_dummy -854.978 201.670 -4.239 0.000493 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 292.2 on 18 degrees of freedom
## Multiple R-squared: 0.9812, Adjusted R-squared: 0.9593
## F-statistic: 44.78 on 21 and 18 DF, p-value: 1.667e-11