Housekeeping

knitr::opts_chunk$set(echo = TRUE)
rm(list=ls()) # clear workspace
cat("\014")  # clear console
library(haven)
library(ggplot2)
library(plm)
## Warning: 程辑包'plm'是用R版本4.3.2 来建造的
library(psych)
## Warning: 程辑包'psych'是用R版本4.3.2 来建造的
## 
## 载入程辑包:'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(gplots)
## Warning: 程辑包'gplots'是用R版本4.3.2 来建造的
## 
## 载入程辑包:'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(car)
## Warning: 程辑包'car'是用R版本4.3.2 来建造的
## 载入需要的程辑包:carData
## 
## 载入程辑包:'car'
## The following object is masked from 'package:psych':
## 
##     logit
library(dplyr)
## 
## 载入程辑包:'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:plm':
## 
##     between, lag, lead
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(tidyr)

Load data : MI & WI’S health data

data source: VizHub - GBD Results (healthdata.org)

hlda <- read.csv("hldata.csv")
hda<- read_xlsx("zhdd.xlsx")
hd<-read.csv("jhdd.csv")

Merge data

hlda <- bind_rows(hlda, hda, hd)

Load data : employment data

data source: LAUS database(https://beta.bls.gov/dataQuery/find?fq=survey:%5Bla%5D&s=popularity:D)

mdata <- read_xlsx("michi_unem.xlsx")
wdata <- read_xlsx("wis_unem.xlsx")
mdata_filtered <- mdata %>%
    filter(Period == 'Annual')
mdata_filtered <- mdata_filtered %>%
  mutate(location_name = "Michigan")
wdata_filtered <- wdata %>%
  filter(Period == 'Annual')
wdata_filtered <- wdata_filtered %>%
  mutate(location_name= "Wisconsin")
combined_data <- bind_rows(mdata_filtered, wdata_filtered)
unique_combined_data <- distinct(combined_data)
hlda<- merge(hlda, unique_combined_data, by = c("location_name", "year"))

Summary data

summary(hlda)
##  location_name           year        measure_id    measure_name      
##  Length:17600       Min.   :2000   Min.   :1.000   Length:17600      
##  Class :character   1st Qu.:2005   1st Qu.:2.000   Class :character  
##  Mode  :character   Median :2010   Median :5.000   Mode  :character  
##                     Mean   :2010   Mean   :3.552                     
##                     3rd Qu.:2014   3rd Qu.:5.000                     
##                     Max.   :2019   Max.   :6.000                     
##   location_id        sex_id    sex_name             age_id     age_name        
##  Min.   :545.0   Min.   :3   Length:17600       Min.   :22   Length:17600      
##  1st Qu.:545.0   1st Qu.:3   Class :character   1st Qu.:22   Class :character  
##  Median :558.5   Median :3   Mode  :character   Median :22   Mode  :character  
##  Mean   :558.5   Mean   :3                      Mean   :22                     
##  3rd Qu.:572.0   3rd Qu.:3                      3rd Qu.:22                     
##  Max.   :572.0   Max.   :3                      Max.   :22                     
##     cause_id       cause_name          metric_id metric_name       
##  Min.   : 492.0   Length:17600       Min.   :1   Length:17600      
##  1st Qu.: 515.8   Class :character   1st Qu.:1   Class :character  
##  Median : 535.0   Mode  :character   Median :2   Mode  :character  
##  Mean   : 583.8                      Mean   :2                     
##  3rd Qu.: 570.0                      3rd Qu.:3                     
##  Max.   :1027.0                      Max.   :3                     
##       val                upper               lower              Period         
##  Min.   :      0.0   Min.   :      0.0   Min.   :      0.0   Length:17600      
##  1st Qu.:      0.0   1st Qu.:      0.0   1st Qu.:      0.0   Class :character  
##  Median :    140.8   Median :    168.3   Median :    103.8   Mode  :character  
##  Mean   :  20789.9   Mean   :  23832.9   Mean   :  18102.3                     
##  3rd Qu.:   3239.0   3rd Qu.:   3858.0   3rd Qu.:   2671.2                     
##  Max.   :1388806.1   Max.   :1540001.5   Max.   :1258338.9                     
##  civilian noninstitutional population labor force participation rate
##  Min.   :4086373                      Min.   :60.10                 
##  1st Qu.:4431063                      1st Qu.:62.35                 
##  Median :6093415                      Median :67.60                 
##  Mean   :6095620                      Mean   :66.52                 
##  3rd Qu.:7771971                      3rd Qu.:69.75                 
##  Max.   :8049908                      Max.   :73.10                 
##  employment-population ratio  labor force        employment     
##  Min.   :53.90               Min.   :2985643   Min.   :2820620  
##  1st Qu.:58.90               1st Qu.:3077936   1st Qu.:2885438  
##  Median :63.75               Median :3905368   Median :3606647  
##  Mean   :62.47               Mean   :3995593   Mean   :3741855  
##  3rd Qu.:65.88               3rd Qu.:4923034   3rd Qu.:4649855  
##  Max.   :70.60               Max.   :5156695   Max.   :4968878  
##   unemployment    unemployment_rate
##  Min.   : 93239   Min.   : 3.000   
##  1st Qu.:150467   1st Qu.: 4.550   
##  Median :219139   Median : 5.350   
##  Mean   :253738   Mean   : 6.160   
##  3rd Qu.:345972   3rd Qu.: 7.125   
##  Max.   :638574   Max.   :13.100

Find is there have missing value

missing_count <- sum(is.na(hlda))

Select the disease and measurement to do subgroup analysis

  1. Mental disorders
# Filter the data for Eating disorders
eating_disorders_df <- dplyr::filter(hlda, cause_name == "Eating disorders")
  1. Chronic respiratory diseases
# Filter the data for Asthma
asthma_df <- dplyr::filter(hlda, cause_name == "Asthma")

# Filter the data for Interstitial lung disease and pulmonary sarcoidosis
interstitial_lung_disease_df <- dplyr::filter(hlda, cause_name == "Interstitial lung disease and pulmonary sarcoidosis")

#Filter the data for Other chronic respiratory diseases
other_respiratory_diseases_df <- dplyr::filter(hlda, cause_name == "Other chronic respiratory diseases")
  1. Cardiovascular diseases
# Filter the data for Ischemic heart disease
ischemic_heart_disease_df <- dplyr::filter(hlda, cause_name == "Ischemic heart disease")
  1. Substance use disorders
# Filter the data for Alcohol use disorders
alcohol_use_disorders_df <- dplyr::filter(hlda, cause_name == "Alcohol use disorders")

Trend analysis - asthma

# For asthma_md5
asthma_md5 <- asthma_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 1)

# Create the line plot for Asthma with detailed data points
ggplot(asthma_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  geom_point() +  # Display data points
  geom_vline(xintercept = 2014, linetype = "dashed", color = "black") +  # Support line at year = 2014
  geom_text(aes(label = val), vjust = -0.5, hjust = -0.5, size = 3) +  # Display data point values
  labs(title = "Comparison of 'val' metric for Asthma through years (metric_id == 1)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

# For asthma_md6
asthma_md6 <- asthma_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 3)

# Create the line plot for Asthma with detailed data points
ggplot(asthma_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  geom_point() +  # Display data points
  geom_vline(xintercept = 2014, linetype = "dashed", color = "black") +  # Support line at year = 2014
  geom_text(aes(label = val), vjust = -0.5, hjust = -0.5, size = 3) +  # Display data point values
  labs(title = "Comparison of 'val' metric for Asthma through years (metric_id == 3)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

Difference analysis - asthma

# For asthma_md5
asthma_md5_diff <- asthma_md5 %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for asthma_md5
ggplot(asthma_md5_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For asthma_md6
asthma_md6_diff <- asthma_md6 %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]),
            unedifference = mean(unemployment_rate[location_name == "Michigan"]) - mean(unemployment_rate[location_name == "Wisconsin"]))

# Plot the difference values over the years for asthma_md6
ggplot(asthma_md6_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

Interrupted Time Series Analysis

difference=β0​+β1​×time_t+β2​×intervention_dummy+β3​×time_t×intervention_dummy+ϵ

Where:

This formula describes how the ‘difference’ variable is predicted based on time, intervention status, and the interaction between time and intervention status, with each coefficient indicating the strength and direction of their respective effects.

# Fit a linear regression model using lm() with ‘difference’ as the dependent variable

# ‘time_t’ as the continuous predictor representing time

# ‘intervention_dummy’ as the binary predictor representing intervention period (after 2014)

# Interaction between ‘time_t’ and ‘intervention_dummy’ to capture the change in slope after 2014

# Convert the 'year' variable to a numeric factor and create a new variable 'time_t'
asthma_md6_diff$time_t <- as.numeric(factor(asthma_md6_diff$year, levels = unique(asthma_md6_diff$year)))

# Create a binary variable 'intervention_dummy' indicating whether the year is greater than or equal to 2014
asthma_md6_diff$intervention_dummy <- ifelse(asthma_md6_diff$year >= 2014, 1, 0)

#Modeling
ymodel <- lm(difference ~ time_t + intervention_dummy + time_t:intervention_dummy, data = asthma_md6_diff)

# Display the summary of the linear regression model
summary(ymodel)
## 
## Call:
## lm(formula = difference ~ time_t + intervention_dummy + time_t:intervention_dummy, 
##     data = asthma_md6_diff)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.000  -7.008   2.670   8.646  27.333 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 43.935      8.545   5.142 9.84e-05 ***
## time_t                      11.180      1.004  11.141 5.99e-09 ***
## intervention_dummy         205.609     64.191   3.203  0.00554 ** 
## time_t:intervention_dummy  -13.656      3.755  -3.637  0.00222 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.14 on 16 degrees of freedom
## Multiple R-squared:  0.9369, Adjusted R-squared:  0.925 
## F-statistic: 79.13 on 3 and 16 DF,  p-value: 8.2e-10

DiD

val=β0​+β1​×torc+β2​×intervention_dummy+β3​×unemployment_rate+β4​×year_factor+β5​×torc×intervention_dummy+ϵ

Where:

# Fit a linear regression model using lm() with ‘val’ as the dependent variable

# ‘torc’ as the binary predictor indicating Michigan location

# ‘intervention_dummy’ as the binary predictor representing intervention period (after 2014)

# Interaction between ‘torc’ and ‘intervention_dummy’ to capture the differential effect of the intervention in Michigan

# ‘unemployment_rate’ as a continuous predictor

# ‘year’ as a factor variable

DiD - asthma

measure_id == 6(Incidence),

metric_id == 3(Rate)

# Create a binary variable 'torc' indicating whether the location is Michigan (1) or not (0)
asthma_md6$torc <- ifelse(asthma_md6$location_name == "Michigan", 1, 0)

# Create a binary variable 'intervention_dummy' indicating whether the year is greater than or equal to 2014
asthma_md6$intervention_dummy <- ifelse(asthma_md6$year >= 2014, 1, 0)

#Modeling
ymodel <- lm(val ~ torc * intervention_dummy + unemployment_rate + factor(year), data = asthma_md6)

# Display the summary of the linear regression model
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate + 
##     factor(year), data = asthma_md6)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.649  -7.023   0.000   7.023  38.649 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              770.938     27.867  27.665 1.41e-15 ***
## torc                      70.582     20.628   3.422 0.003252 ** 
## intervention_dummy       292.813     26.510  11.045 3.54e-09 ***
## unemployment_rate         27.056      8.711   3.106 0.006420 ** 
## factor(year)2001         -36.175     27.066  -1.337 0.198996    
## factor(year)2002         -61.207     31.440  -1.947 0.068277 .  
## factor(year)2003         -75.548     34.938  -2.162 0.045138 *  
## factor(year)2004         -62.151     32.842  -1.892 0.075591 .  
## factor(year)2005         -50.939     31.714  -1.606 0.126641    
## factor(year)2006         -28.927     31.990  -0.904 0.378506    
## factor(year)2007          16.040     32.555   0.493 0.628531    
## factor(year)2008          57.797     36.197   1.597 0.128739    
## factor(year)2009         -10.165     70.210  -0.145 0.886592    
## factor(year)2010          44.118     64.534   0.684 0.503415    
## factor(year)2011         109.802     52.304   2.099 0.051029 .  
## factor(year)2012         159.188     45.901   3.468 0.002941 ** 
## factor(year)2013         190.766     44.077   4.328 0.000457 ***
## factor(year)2014         -95.098     33.426  -2.845 0.011191 *  
## factor(year)2015         -49.951     26.887  -1.858 0.080606 .  
## factor(year)2016         -46.388     25.552  -1.815 0.087140 .  
## factor(year)2017         -42.047     24.722  -1.701 0.107205    
## factor(year)2018         -22.860     24.588  -0.930 0.365518    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy  102.270     18.619   5.493 3.96e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 24.58 on 17 degrees of freedom
## Multiple R-squared:   0.99,  Adjusted R-squared:  0.9771 
## F-statistic: 76.48 on 22 and 17 DF,  p-value: 5.869e-13

measure_id == 6(Incidence),

metric_id == 1(Number)

asthma_md5$torc <- ifelse(asthma_md5$location_name == "Michigan", 1, 0)
asthma_md5$intervention_dummy <- ifelse(asthma_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy +unemployment_rate+ factor(year), data = asthma_md5)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate + 
##     factor(year), data = asthma_md5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5832.0  -969.6     0.0   969.6  5832.0 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              33225.3     4216.5   7.880 4.48e-07 ***
## torc                     45343.6     3121.2  14.528 5.14e-11 ***
## intervention_dummy       21852.6     4011.2   5.448 4.34e-05 ***
## unemployment_rate         3925.9     1318.0   2.979  0.00843 ** 
## factor(year)2001         -4936.9     4095.4  -1.205  0.24454    
## factor(year)2002         -8399.1     4757.2  -1.766  0.09542 .  
## factor(year)2003        -10411.6     5286.4  -1.970  0.06541 .  
## factor(year)2004         -8550.8     4969.2  -1.721  0.10345    
## factor(year)2005         -7159.6     4798.5  -1.492  0.15401    
## factor(year)2006         -5310.5     4840.4  -1.097  0.28789    
## factor(year)2007         -1644.4     4925.8  -0.334  0.74259    
## factor(year)2008           891.4     5476.9   0.163  0.87263    
## factor(year)2009        -12262.5    10623.4  -1.154  0.26435    
## factor(year)2010         -6573.7     9764.5  -0.673  0.50985    
## factor(year)2011          1454.3     7914.0   0.184  0.85637    
## factor(year)2012          6941.7     6945.2   1.000  0.33157    
## factor(year)2013          9940.8     6669.3   1.491  0.15440    
## factor(year)2014        -11794.1     5057.6  -2.332  0.03226 *  
## factor(year)2015         -5793.1     4068.3  -1.424  0.17255    
## factor(year)2016         -4697.5     3866.3  -1.215  0.24098    
## factor(year)2017         -3480.5     3740.7  -0.930  0.36517    
## factor(year)2018         -1469.8     3720.3  -0.395  0.69770    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy  14213.8     2817.3   5.045 9.97e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3720 on 17 degrees of freedom
## Multiple R-squared:  0.9939, Adjusted R-squared:  0.986 
## F-statistic: 125.5 on 22 and 17 DF,  p-value: 9.316e-15

Trend analysis - Interstitial lung disease

# Filtered data for measure_id == 6 (Interstitial lung disease and pulmonary sarcoidosis)
interstitial_lung_disease_md5 <- interstitial_lung_disease_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 1)

# Create the line plot for Interstitial lung disease and pulmonary sarcoidosis measure_id == 5
ggplot(interstitial_lung_disease_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Interstitial lung disease and pulmonary sarcoidosis through years (metric_id == 1)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

# Filtered data for measure_id == 6 (Interstitial lung disease and pulmonary sarcoidosis)
interstitial_lung_disease_md6 <- interstitial_lung_disease_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 3)

# Create the line plot for Interstitial lung disease and pulmonary sarcoidosis measure_id == 6
ggplot(interstitial_lung_disease_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Interstitial lung disease and pulmonary sarcoidosis through years (metric_id == 3)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

Difference analysis - Interstitial lung disease

# For interstitial_lung_disease_md5
interstitial_lung_disease_md5_diff <- interstitial_lung_disease_md5 %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for interstitial_lung_disease_md5
ggplot(interstitial_lung_disease_md5_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For interstitial_lung_disease_md6
interstitial_lung_disease_md6_diff <- interstitial_lung_disease_md6 %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for interstitial_lung_disease_md6
ggplot(interstitial_lung_disease_md6_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

DiD - Interstitial lung disease

measure_id == 6(Incidence),

metric_id == 3(Rate)

interstitial_lung_disease_md6$torc <- ifelse(interstitial_lung_disease_md6$location_name == "Michigan", 1, 0)
interstitial_lung_disease_md6$intervention_dummy <- ifelse(interstitial_lung_disease_md6$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = interstitial_lung_disease_md6)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) + 
##     unemployment_rate, data = interstitial_lung_disease_md6)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -30.72 -10.64   0.00  10.64  30.72 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              436.944     25.011  17.470 2.69e-12 ***
## torc                      21.580     18.514   1.166 0.259856    
## intervention_dummy       694.683     23.793  29.197 5.74e-16 ***
## factor(year)2001         -23.990     24.292  -0.988 0.337216    
## factor(year)2002         -43.840     28.218  -1.554 0.138691    
## factor(year)2003         -56.821     31.357  -1.812 0.087676 .  
## factor(year)2004         -47.881     29.476  -1.624 0.122682    
## factor(year)2005         -35.144     28.463  -1.235 0.233729    
## factor(year)2006          22.887     28.712   0.797 0.436360    
## factor(year)2007         147.250     29.218   5.040 0.000101 ***
## factor(year)2008         289.706     32.487   8.918 8.07e-08 ***
## factor(year)2009         339.471     63.014   5.387 4.91e-05 ***
## factor(year)2010         440.404     57.920   7.604 7.24e-07 ***
## factor(year)2011         511.217     46.943  10.890 4.37e-09 ***
## factor(year)2012         562.732     41.196  13.660 1.35e-10 ***
## factor(year)2013         600.131     39.560  15.170 2.59e-11 ***
## factor(year)2014         -87.561     30.000  -2.919 0.009575 ** 
## factor(year)2015         -23.055     24.131  -0.955 0.352773    
## factor(year)2016          51.182     22.933   2.232 0.039379 *  
## factor(year)2017         119.073     22.188   5.366 5.13e-05 ***
## factor(year)2018         102.762     22.068   4.657 0.000226 ***
## factor(year)2019              NA         NA      NA       NA    
## unemployment_rate         20.352      7.818   2.603 0.018554 *  
## torc:intervention_dummy  108.807     16.711   6.511 5.33e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 22.06 on 17 degrees of freedom
## Multiple R-squared:  0.9982, Adjusted R-squared:  0.9959 
## F-statistic: 435.8 on 22 and 17 DF,  p-value: < 2.2e-16

measure_id == 6(Incidence),

metric_id == 1(Number)

interstitial_lung_disease_md5$torc <- ifelse(interstitial_lung_disease_md5$location_name == "Michigan", 1, 0)
interstitial_lung_disease_md5$intervention_dummy <- ifelse(interstitial_lung_disease_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = interstitial_lung_disease_md5)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) + 
##     unemployment_rate, data = interstitial_lung_disease_md5)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -10529  -2216      0   2216  10529 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               1972.1     8089.3   0.244  0.81031    
## torc                     22413.5     5988.0   3.743  0.00162 ** 
## intervention_dummy       43254.9     7695.4   5.621 3.06e-05 ***
## factor(year)2001         -9556.9     7857.0  -1.216  0.24047    
## factor(year)2002        -16697.8     9126.6  -1.830  0.08491 .  
## factor(year)2003        -21205.8    10142.0  -2.091  0.05187 .  
## factor(year)2004        -18281.5     9533.5  -1.918  0.07212 .  
## factor(year)2005        -15987.3     9205.9  -1.737  0.10054    
## factor(year)2006        -11645.5     9286.3  -1.254  0.22680    
## factor(year)2007         -2358.8     9450.1  -0.250  0.80589    
## factor(year)2008          5369.1    10507.3   0.511  0.61593    
## factor(year)2009        -17680.8    20380.9  -0.868  0.39774    
## factor(year)2010         -5405.7    18733.2  -0.289  0.77641    
## factor(year)2011          9576.6    15183.0   0.631  0.53660    
## factor(year)2012         18808.8    13324.3   1.412  0.17611    
## factor(year)2013         23313.5    12794.9   1.822  0.08608 .  
## factor(year)2014        -22313.5     9703.0  -2.300  0.03441 *  
## factor(year)2015         -9015.4     7804.9  -1.155  0.26403    
## factor(year)2016          -236.2     7417.5  -0.032  0.97497    
## factor(year)2017          8272.3     7176.5   1.153  0.26499    
## factor(year)2018          8890.1     7137.4   1.246  0.22982    
## factor(year)2019              NA         NA      NA       NA    
## unemployment_rate         7625.8     2528.6   3.016  0.00779 ** 
## torc:intervention_dummy  32009.6     5404.9   5.922 1.67e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7136 on 17 degrees of freedom
## Multiple R-squared:  0.9838, Adjusted R-squared:  0.9627 
## F-statistic: 46.81 on 22 and 17 DF,  p-value: 3.413e-11

Trend analysis - Other chronic respiratory diseases

# Filter the data for measure_id == 2
other_respiratory_diseases_md1 <- other_respiratory_diseases_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 2,
         metric_id == 1)

# Create the line plot for other_respiratory_diseases_md1
ggplot(other_respiratory_diseases_md1, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Other chronic respiratory diseases through years(metric_id == 1)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

# Filtered data for measure_id == 2 (Other chronic respiratory diseases)
other_respiratory_diseases_md2 <- other_respiratory_diseases_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 2,
         metric_id == 3)

# Create the line plot for Other chronic respiratory diseases measure_id == 2
ggplot(other_respiratory_diseases_md2, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Other chronic respiratory diseases through years (metric_id == 3)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

Difference analysis - Other chronic respiratory diseases

# For other_respiratory_diseases_md1
other_respiratory_diseases_md1_diff <- other_respiratory_diseases_md1 %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for other_respiratory_diseases_md1
ggplot(other_respiratory_diseases_md1_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For other_respiratory_diseases_md2
other_respiratory_diseases_md2_diff <- other_respiratory_diseases_md2 %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for other_respiratory_diseases_md2
ggplot(other_respiratory_diseases_md2_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 2)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

DiD - Other chronic respiratory diseases

measure_id == 2(DALY),

metric_id == 1(Number)

other_respiratory_diseases_md2$torc <- ifelse(other_respiratory_diseases_md2$location_name == "Michigan", 1, 0)
other_respiratory_diseases_md2$intervention_dummy <- ifelse(other_respiratory_diseases_md2$year >= 2014, 1, 0)

ymodel <- lm(val ~ torc + intervention_dummy + torc :intervention_dummy+factor(year)+unemployment_rate, data = other_respiratory_diseases_md2)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc + intervention_dummy + torc:intervention_dummy + 
##     factor(year) + unemployment_rate, data = other_respiratory_diseases_md2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27598 -0.09002  0.00000  0.09002  0.27598 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             25.24161    0.22606 111.661  < 2e-16 ***
## torc                     1.51034    0.16734   9.026 6.80e-08 ***
## intervention_dummy       9.07727    0.21505  42.210  < 2e-16 ***
## factor(year)2001         0.39315    0.21956   1.791 0.091186 .  
## factor(year)2002         1.00606    0.25504   3.945 0.001045 ** 
## factor(year)2003         1.25417    0.28342   4.425 0.000371 ***
## factor(year)2004         1.85323    0.26641   6.956 2.32e-06 ***
## factor(year)2005         3.11516    0.25726  12.109 8.76e-10 ***
## factor(year)2006         3.85157    0.25951  14.842 3.66e-11 ***
## factor(year)2007         4.81369    0.26408  18.228 1.36e-12 ***
## factor(year)2008         5.11066    0.29363  17.405 2.86e-12 ***
## factor(year)2009         3.97222    0.56955   6.974 2.24e-06 ***
## factor(year)2010         4.90640    0.52350   9.372 3.97e-08 ***
## factor(year)2011         6.07190    0.42429  14.311 6.52e-11 ***
## factor(year)2012         6.52057    0.37235  17.512 2.59e-12 ***
## factor(year)2013         7.26161    0.35756  20.309 2.33e-13 ***
## factor(year)2014        -0.78762    0.27115  -2.905 0.009863 ** 
## factor(year)2015        -0.02940    0.21811  -0.135 0.894368    
## factor(year)2016         0.39881    0.20728   1.924 0.071263 .  
## factor(year)2017         0.15203    0.20055   0.758 0.458795    
## factor(year)2018         0.12162    0.19946   0.610 0.550095    
## factor(year)2019              NA         NA      NA       NA    
## unemployment_rate        0.34346    0.07066   4.861 0.000147 ***
## torc:intervention_dummy -0.39950    0.15104  -2.645 0.017016 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1994 on 17 degrees of freedom
## Multiple R-squared:  0.9984, Adjusted R-squared:  0.9963 
## F-statistic: 483.1 on 22 and 17 DF,  p-value: < 2.2e-16

measure_id == 2(DALY),

metric_id == 3(Rate)

other_respiratory_diseases_md1$torc <- ifelse(other_respiratory_diseases_md1$location_name == "Michigan", 1, 0)
other_respiratory_diseases_md1$intervention_dummy <- ifelse(other_respiratory_diseases_md1$year >= 2014, 1, 0)

ymodel <- lm(val ~ torc + intervention_dummy + torc :intervention_dummy+factor(year)+unemployment_rate, data = other_respiratory_diseases_md1)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc + intervention_dummy + torc:intervention_dummy + 
##     factor(year) + unemployment_rate, data = other_respiratory_diseases_md1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -40.71 -18.26   0.00  18.26  40.71 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             1111.878     36.274  30.652 2.55e-16 ***
## torc                    1384.045     26.851  51.545  < 2e-16 ***
## intervention_dummy       693.690     34.507  20.103 2.75e-13 ***
## factor(year)2001         -31.946     35.232  -0.907 0.377222    
## factor(year)2002         -28.518     40.925  -0.697 0.495330    
## factor(year)2003         -32.289     45.478  -0.710 0.487342    
## factor(year)2004          43.413     42.750   1.016 0.324095    
## factor(year)2005         156.348     41.281   3.787 0.001470 ** 
## factor(year)2006         218.305     41.641   5.243 6.62e-05 ***
## factor(year)2007         286.968     42.376   6.772 3.26e-06 ***
## factor(year)2008         280.942     47.116   5.963 1.54e-05 ***
## factor(year)2009         -48.139     91.391  -0.527 0.605185    
## factor(year)2010          61.671     84.003   0.734 0.472856    
## factor(year)2011         233.917     68.083   3.436 0.003154 ** 
## factor(year)2012         314.629     59.748   5.266 6.31e-05 ***
## factor(year)2013         385.627     57.375   6.721 3.59e-06 ***
## factor(year)2014        -196.425     43.510  -4.515 0.000306 ***
## factor(year)2015         -60.469     34.999  -1.728 0.102150    
## factor(year)2016          -8.223     33.261  -0.247 0.807688    
## factor(year)2017          -1.250     32.181  -0.039 0.969469    
## factor(year)2018          14.054     32.005   0.439 0.666112    
## factor(year)2019              NA         NA      NA       NA    
## unemployment_rate         81.724     11.338   7.208 1.46e-06 ***
## torc:intervention_dummy   66.066     24.236   2.726 0.014376 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 32 on 17 degrees of freedom
## Multiple R-squared:  0.9994, Adjusted R-squared:  0.9985 
## F-statistic:  1190 on 22 and 17 DF,  p-value: < 2.2e-16

Trend analysis - Eating disorders

# Filtered data for measure_id == 1
eating_disorders_md1 <- eating_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 3)

# Create the line plot for Eating disorders measure_id == 1
ggplot(eating_disorders_md1, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Eating disorders through years (Measure ID = 1)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

eating_disorders_md2 <- eating_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 1)

# Create the line plot for Eating disorders measure_id == 2
ggplot(eating_disorders_md2, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Eating disorders through years (Measure ID = 1)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

Difference analysis - Eating disorders

# For eating_disorders_md1
eating_disorders_md1_diff <- eating_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 1) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for eating_disorders_md1
ggplot(eating_disorders_md1_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For eating_disorders_md2
eating_disorders_md2_diff <- eating_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 3) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for eating_disorders_md2
ggplot(eating_disorders_md2_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

DiD - Eating disorders

measure_id == 1(Death),

metric_id == 1(Number)

eating_disorders_md1 $torc <- ifelse(eating_disorders_md1 $location_name == "Michigan", 1, 0)
eating_disorders_md1 $intervention_dummy <- ifelse(eating_disorders_md1 $year >= 2014, 1, 0)

ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = eating_disorders_md1 )
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) + 
##     unemployment_rate, data = eating_disorders_md1)
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -1.853e-04 -7.078e-05  0.000e+00  7.078e-05  1.853e-04 
## 
## Coefficients: (1 not defined because of singularities)
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              1.469e-02  1.621e-04  90.629  < 2e-16 ***
## torc                    -1.544e-03  1.200e-04 -12.870 3.42e-10 ***
## intervention_dummy       5.889e-04  1.542e-04   3.820 0.001370 ** 
## factor(year)2001        -1.176e-05  1.574e-04  -0.075 0.941321    
## factor(year)2002         1.521e-03  1.828e-04   8.320 2.13e-07 ***
## factor(year)2003         1.853e-03  2.032e-04   9.119 5.88e-08 ***
## factor(year)2004         1.516e-03  1.910e-04   7.939 4.05e-07 ***
## factor(year)2005         7.885e-04  1.844e-04   4.276 0.000511 ***
## factor(year)2006         6.605e-04  1.860e-04   3.550 0.002460 ** 
## factor(year)2007         1.020e-03  1.893e-04   5.389 4.89e-05 ***
## factor(year)2008         9.317e-04  2.105e-04   4.426 0.000370 ***
## factor(year)2009         4.534e-04  4.083e-04   1.111 0.282230    
## factor(year)2010         1.257e-03  3.753e-04   3.350 0.003794 ** 
## factor(year)2011         1.101e-03  3.042e-04   3.618 0.002123 ** 
## factor(year)2012         1.019e-03  2.669e-04   3.819 0.001373 ** 
## factor(year)2013         9.385e-04  2.563e-04   3.662 0.001933 ** 
## factor(year)2014         8.092e-04  1.944e-04   4.163 0.000651 ***
## factor(year)2015         6.017e-04  1.564e-04   3.849 0.001288 ** 
## factor(year)2016         6.688e-04  1.486e-04   4.501 0.000315 ***
## factor(year)2017         5.441e-04  1.438e-04   3.784 0.001480 ** 
## factor(year)2018         2.451e-04  1.430e-04   1.714 0.104686    
## factor(year)2019                NA         NA      NA       NA    
## unemployment_rate        2.556e-05  5.065e-05   0.505 0.620333    
## torc:intervention_dummy -7.679e-05  1.083e-04  -0.709 0.487816    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.000143 on 17 degrees of freedom
## Multiple R-squared:  0.9893, Adjusted R-squared:  0.9754 
## F-statistic: 71.41 on 22 and 17 DF,  p-value: 1.039e-12

measure_id == 1(Death),

metric_id == 3(Rate)

eating_disorders_md2 $torc <- ifelse(eating_disorders_md2 $location_name == "Michigan", 1, 0)
eating_disorders_md2 $intervention_dummy <- ifelse(eating_disorders_md2 $year >= 2014, 1, 0)

ymodel <- lm(val ~ torc*intervention_dummy+factor(year)+unemployment_rate, data = eating_disorders_md2 )
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year) + 
##     unemployment_rate, data = eating_disorders_md2)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.022956 -0.007447  0.000000  0.007447  0.022956 
## 
## Coefficients: (1 not defined because of singularities)
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              0.798925   0.022524  35.469  < 2e-16 ***
## torc                     0.548483   0.016673  32.896  < 2e-16 ***
## intervention_dummy       0.101363   0.021428   4.731 0.000193 ***
## factor(year)2001         0.011800   0.021878   0.539 0.596630    
## factor(year)2002         0.134850   0.025413   5.306 5.80e-05 ***
## factor(year)2003         0.167366   0.028240   5.927 1.66e-05 ***
## factor(year)2004         0.142138   0.026546   5.354 5.26e-05 ***
## factor(year)2005         0.088482   0.025634   3.452 0.003046 ** 
## factor(year)2006         0.082430   0.025857   3.188 0.005387 ** 
## factor(year)2007         0.111439   0.026314   4.235 0.000558 ***
## factor(year)2008         0.112870   0.029257   3.858 0.001262 ** 
## factor(year)2009         0.112420   0.056750   1.981 0.064010 .  
## factor(year)2010         0.171404   0.052162   3.286 0.004360 ** 
## factor(year)2011         0.146694   0.042277   3.470 0.002929 ** 
## factor(year)2012         0.134272   0.037101   3.619 0.002119 ** 
## factor(year)2013         0.125873   0.035627   3.533 0.002554 ** 
## factor(year)2014         0.084965   0.027018   3.145 0.005909 ** 
## factor(year)2015         0.061044   0.021733   2.809 0.012078 *  
## factor(year)2016         0.059968   0.020654   2.903 0.009889 ** 
## factor(year)2017         0.045251   0.019983   2.264 0.036907 *  
## factor(year)2018         0.019040   0.019874   0.958 0.351483    
## factor(year)2019               NA         NA      NA       NA    
## unemployment_rate       -0.005317   0.007041  -0.755 0.460457    
## torc:intervention_dummy -0.073433   0.015050  -4.879 0.000141 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01987 on 17 degrees of freedom
## Multiple R-squared:  0.9976, Adjusted R-squared:  0.9944 
## F-statistic: 315.4 on 22 and 17 DF,  p-value: < 2.2e-16

Trend analysis - Ischemic heart disease

# Filtered data for measure_id == 6 (Ischemic heart disease)
ischemic_heart_disease_md5 <- ischemic_heart_disease_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 1)

# Create the line plot for Ischemic heart disease 
ggplot(ischemic_heart_disease_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Ischemic heart disease through years (Measure ID = 6)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

# Filtered data for measure_id == 6 (Ischemic heart disease)
ischemic_heart_disease_md6 <- ischemic_heart_disease_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 3)

# Create the line plot for Ischemic heart disease measure_id == 6
ggplot(ischemic_heart_disease_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Ischemic heart disease through years (Measure ID = 6)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

Difference analysis - Ischemic heart disease

# For ischemic_heart_disease_md5
ischemic_heart_disease_md5_diff <- ischemic_heart_disease_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 1) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for ischemic_heart_disease_md5
ggplot(ischemic_heart_disease_md5_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For ischemic_heart_disease_md6
ischemic_heart_disease_md6_diff <- ischemic_heart_disease_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 6,
         metric_id == 3) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for ischemic_heart_disease_md6
ggplot(ischemic_heart_disease_md6_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

DiD - Ischemic heart disease

measure_id == 6(Incidence),

metric_id == 1(Number)

ischemic_heart_disease_md5$torc <- ifelse(ischemic_heart_disease_md5$location_name == "Michigan", 1, 0)
ischemic_heart_disease_md5$intervention_dummy <- ifelse(ischemic_heart_disease_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy+ unemployment_rate + factor(year), data = ischemic_heart_disease_md5)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate + 
##     factor(year), data = ischemic_heart_disease_md5)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -588.6 -230.4    0.0  230.4  588.6 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             20391.84     560.59  36.375  < 2e-16 ***
## torc                    22443.10     414.97  54.083  < 2e-16 ***
## intervention_dummy      -6580.36     533.29 -12.339 6.56e-10 ***
## unemployment_rate         703.07     175.23   4.012 0.000903 ***
## factor(year)2001        -1372.19     544.49  -2.520 0.022021 *  
## factor(year)2002        -2656.84     632.48  -4.201 0.000601 ***
## factor(year)2003        -3743.77     702.84  -5.327 5.57e-05 ***
## factor(year)2004        -4152.74     660.68  -6.286 8.21e-06 ***
## factor(year)2005        -4465.80     637.98  -7.000 2.14e-06 ***
## factor(year)2006        -4920.78     643.55  -7.646 6.72e-07 ***
## factor(year)2007        -5442.14     654.90  -8.310 2.17e-07 ***
## factor(year)2008        -6294.96     728.16  -8.645 1.25e-07 ***
## factor(year)2009        -9853.04    1412.41  -6.976 2.23e-06 ***
## factor(year)2010        -9558.63    1298.22  -7.363 1.11e-06 ***
## factor(year)2011        -8636.83    1052.19  -8.208 2.57e-07 ***
## factor(year)2012        -8213.23     923.38  -8.895 8.37e-08 ***
## factor(year)2013        -8205.06     886.70  -9.253 4.77e-08 ***
## factor(year)2014         -949.44     672.42  -1.412 0.176002    
## factor(year)2015          -98.92     540.89  -0.183 0.857051    
## factor(year)2016          119.82     514.04   0.233 0.818467    
## factor(year)2017          363.24     497.34   0.730 0.475105    
## factor(year)2018          409.81     494.63   0.829 0.418859    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy   287.10     374.56   0.766 0.453900    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 494.6 on 17 degrees of freedom
## Multiple R-squared:  0.9993, Adjusted R-squared:  0.9984 
## F-statistic:  1082 on 22 and 17 DF,  p-value: < 2.2e-16

measure_id == 6(Incidence)

metric_id == 3(Rate)

ischemic_heart_disease_md6$torc <- ifelse(ischemic_heart_disease_md6$location_name == "Michigan", 1, 0)
ischemic_heart_disease_md6$intervention_dummy <- ifelse(ischemic_heart_disease_md6$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy+ unemployment_rate + factor(year), data = ischemic_heart_disease_md6)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + unemployment_rate + 
##     factor(year), data = ischemic_heart_disease_md6)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.443  -5.166   0.000   5.166  23.443 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              333.745     17.125  19.489 4.57e-13 ***
## torc                      37.325     12.676   2.944 0.009065 ** 
## intervention_dummy      -133.544     16.291  -8.198 2.61e-07 ***
## unemployment_rate         25.419      5.353   4.749 0.000186 ***
## factor(year)2001         -41.550     16.633  -2.498 0.023039 *  
## factor(year)2002         -75.986     19.321  -3.933 0.001072 ** 
## factor(year)2003        -102.048     21.470  -4.753 0.000184 ***
## factor(year)2004        -103.805     20.182  -5.143 8.13e-05 ***
## factor(year)2005        -106.819     19.489  -5.481 4.06e-05 ***
## factor(year)2006        -115.846     19.659  -5.893 1.77e-05 ***
## factor(year)2007        -126.549     20.006  -6.326 7.60e-06 ***
## factor(year)2008        -149.452     22.244  -6.719 3.60e-06 ***
## factor(year)2009        -270.560     43.146  -6.271 8.45e-06 ***
## factor(year)2010        -256.401     39.657  -6.465 5.82e-06 ***
## factor(year)2011        -219.655     32.142  -6.834 2.91e-06 ***
## factor(year)2012        -200.478     28.207  -7.107 1.76e-06 ***
## factor(year)2013        -196.236     27.086  -7.245 1.37e-06 ***
## factor(year)2014         -57.694     20.541  -2.809 0.012081 *  
## factor(year)2015         -24.352     16.523  -1.474 0.158801    
## factor(year)2016         -13.723     15.702  -0.874 0.394345    
## factor(year)2017          -1.836     15.192  -0.121 0.905238    
## factor(year)2018           5.188     15.110   0.343 0.735526    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy   59.059     11.442   5.162 7.82e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.11 on 17 degrees of freedom
## Multiple R-squared:  0.9749, Adjusted R-squared:  0.9423 
## F-statistic: 29.96 on 22 and 17 DF,  p-value: 1.291e-09

Trend analysis - Alcohol use disorders

# Filtered data for measure_id == 1 (Alcohol use disorders)
alcohol_use_disorders_md1 <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 3)

# Create the line plot for Alcohol use disorders measure_id == 1
ggplot(alcohol_use_disorders_md1, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 1)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

# Filtered data for measure_id == 1 (Alcohol use disorders)
alcohol_use_disorders_md2 <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 1)

# Create the line plot for Alcohol use disorders 
ggplot(alcohol_use_disorders_md2, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 2)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

# Filtered data for measure_id == 2 (Alcohol use disorders)
alcohol_use_disorders_md5 <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 2,
         metric_id == 3)

# Create the line plot for Alcohol use disorders measure_id == 2
ggplot(alcohol_use_disorders_md5, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 5)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

# Filtered data for measure_id == 2 (Alcohol use disorders)
alcohol_use_disorders_md6 <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 2,
         metric_id == 1)

# Create the line plot for Alcohol use disorders measure_id == 6
ggplot(alcohol_use_disorders_md6, aes(x = year, y = val, color = location_name, group = location_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for Alcohol use disorders through years (Measure ID = 6)",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

Difference analysis - Alcohol use disorders

# For alcohol_use_disorders_md1
alcohol_use_disorders_md1_diff <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 3) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for alcohol_use_disorders_md1
ggplot(alcohol_use_disorders_md1_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For alcohol_use_disorders_md2
alcohol_use_disorders_md2_diff <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 1,
         metric_id == 1) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for alcohol_use_disorders_md2
ggplot(alcohol_use_disorders_md2_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 2)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For alcohol_use_disorders_md5
alcohol_use_disorders_md5_diff <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 2,
         metric_id == 3) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for alcohol_use_disorders_md5
ggplot(alcohol_use_disorders_md5_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For alcohol_use_disorders_md6
alcohol_use_disorders_md6_diff <- alcohol_use_disorders_df %>%
  filter(location_name %in% c("Michigan", "Wisconsin"),
         measure_id == 2,
         metric_id == 1) %>%
  group_by(year) %>%
  summarise(difference = mean(val[location_name == "Michigan"]) - mean(val[location_name == "Wisconsin"]))

# Plot the difference values over the years for alcohol_use_disorders_md6
ggplot(alcohol_use_disorders_md6_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

DiD - Alcohol use disorders

measure_id == 1(Death),

metric_id == 3(Rate)

alcohol_use_disorders_md1$torc <- ifelse(alcohol_use_disorders_md1$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md1$intervention_dummy <- ifelse(alcohol_use_disorders_md1$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md1)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year), 
##     data = alcohol_use_disorders_md1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.07100 -0.02373  0.00000  0.02373  0.07100 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              3.48168    0.03876  89.826  < 2e-16 ***
## torc                    -0.58067    0.02002 -29.011  < 2e-16 ***
## intervention_dummy       1.64138    0.05602  29.300  < 2e-16 ***
## factor(year)2001         0.14331    0.05296   2.706 0.014463 *  
## factor(year)2002         0.25367    0.05296   4.790 0.000147 ***
## factor(year)2003         0.31741    0.05296   5.994 1.14e-05 ***
## factor(year)2004         0.37269    0.05296   7.038 1.45e-06 ***
## factor(year)2005         0.50455    0.05296   9.528 1.87e-08 ***
## factor(year)2006         0.55431    0.05296  10.467 4.40e-09 ***
## factor(year)2007         0.66532    0.05296  12.564 2.40e-10 ***
## factor(year)2008         0.73669    0.05296  13.911 4.52e-11 ***
## factor(year)2009         0.82080    0.05296  15.499 7.44e-12 ***
## factor(year)2010         0.97771    0.05296  18.463 3.81e-13 ***
## factor(year)2011         1.11023    0.05296  20.965 4.26e-14 ***
## factor(year)2012         1.19162    0.05296  22.502 1.25e-14 ***
## factor(year)2013         1.35840    0.05296  25.651 1.26e-15 ***
## factor(year)2014        -0.02394    0.05296  -0.452 0.656597    
## factor(year)2015         0.11402    0.05296   2.153 0.045118 *  
## factor(year)2016         0.24878    0.05296   4.698 0.000179 ***
## factor(year)2017         0.16494    0.05296   3.115 0.005984 ** 
## factor(year)2018         0.08728    0.05296   1.648 0.116684    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy -0.26363    0.03654  -7.214 1.03e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.05296 on 18 degrees of freedom
## Multiple R-squared:  0.997,  Adjusted R-squared:  0.9935 
## F-statistic: 285.8 on 21 and 18 DF,  p-value: < 2.2e-16

measure_id == 1(Death),

metric_id == 1(Number)

alcohol_use_disorders_md2$torc <- ifelse(alcohol_use_disorders_md2$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md2$intervention_dummy <- ifelse(alcohol_use_disorders_md2$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md2)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year), 
##     data = alcohol_use_disorders_md2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.336  -4.206   0.000   4.206  10.336 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             176.0832     6.4565  27.272 4.30e-16 ***
## torc                    121.0568     3.3341  36.309  < 2e-16 ***
## intervention_dummy      120.2046     9.3315  12.882 1.60e-10 ***
## factor(year)2001         11.5136     8.8212   1.305  0.20826    
## factor(year)2002         20.9308     8.8212   2.373  0.02900 *  
## factor(year)2003         26.5242     8.8212   3.007  0.00757 ** 
## factor(year)2004         31.8001     8.8212   3.605  0.00202 ** 
## factor(year)2005         42.1669     8.8212   4.780  0.00015 ***
## factor(year)2006         47.3682     8.8212   5.370 4.20e-05 ***
## factor(year)2007         56.0065     8.8212   6.349 5.56e-06 ***
## factor(year)2008         63.2615     8.8212   7.172 1.12e-06 ***
## factor(year)2009         70.9364     8.8212   8.042 2.28e-07 ***
## factor(year)2010         83.5093     8.8212   9.467 2.06e-08 ***
## factor(year)2011         93.4474     8.8212  10.593 3.65e-09 ***
## factor(year)2012         98.8016     8.8212  11.200 1.52e-09 ***
## factor(year)2013        110.9533     8.8212  12.578 2.36e-10 ***
## factor(year)2014         -0.4215     8.8212  -0.048  0.96242    
## factor(year)2015         10.1948     8.8212   1.156  0.26291    
## factor(year)2016         20.1592     8.8212   2.285  0.03464 *  
## factor(year)2017         13.3461     8.8212   1.513  0.14765    
## factor(year)2018          7.0281     8.8212   0.797  0.43599    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy   1.8569     6.0872   0.305  0.76383    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.821 on 18 degrees of freedom
## Multiple R-squared:  0.9939, Adjusted R-squared:  0.9867 
## F-statistic: 138.9 on 21 and 18 DF,  p-value: 7.866e-16

measure_id == 2(DALY),

metric_id == 3(Rate)

alcohol_use_disorders_md5$torc <- ifelse(alcohol_use_disorders_md5$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md5$intervention_dummy <- ifelse(alcohol_use_disorders_md5$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md5)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year), 
##     data = alcohol_use_disorders_md5)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9415 -0.9172  0.0000  0.9172  2.9415 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             368.9525     1.5283 241.421  < 2e-16 ***
## torc                    -12.6790     0.7892 -16.066 4.06e-12 ***
## intervention_dummy       49.9864     2.2088  22.631 1.13e-14 ***
## factor(year)2001          6.3210     2.0880   3.027 0.007242 ** 
## factor(year)2002         12.0451     2.0880   5.769 1.82e-05 ***
## factor(year)2003         15.8617     2.0880   7.597 5.08e-07 ***
## factor(year)2004         18.9949     2.0880   9.097 3.75e-08 ***
## factor(year)2005         24.6329     2.0880  11.797 6.63e-10 ***
## factor(year)2006         26.6471     2.0880  12.762 1.86e-10 ***
## factor(year)2007         30.2521     2.0880  14.489 2.30e-11 ***
## factor(year)2008         32.0852     2.0880  15.367 8.60e-12 ***
## factor(year)2009         34.5255     2.0880  16.535 2.50e-12 ***
## factor(year)2010         39.6908     2.0880  19.009 2.31e-13 ***
## factor(year)2011         44.1026     2.0880  21.122 3.75e-14 ***
## factor(year)2012         46.1160     2.0880  22.086 1.73e-14 ***
## factor(year)2013         51.0898     2.0880  24.468 2.89e-15 ***
## factor(year)2014          8.2103     2.0880   3.932 0.000977 ***
## factor(year)2015         11.1097     2.0880   5.321 4.66e-05 ***
## factor(year)2016         12.7107     2.0880   6.088 9.43e-06 ***
## factor(year)2017          6.4916     2.0880   3.109 0.006059 ** 
## factor(year)2018          2.4146     2.0880   1.156 0.262620    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy  -7.8084     1.4408  -5.419 3.78e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.088 on 18 degrees of freedom
## Multiple R-squared:  0.9945, Adjusted R-squared:  0.9881 
## F-statistic: 155.7 on 21 and 18 DF,  p-value: 2.836e-16

measure_id == 2(DALY),

metric_id == 1(Number)

alcohol_use_disorders_md6$torc <- ifelse(alcohol_use_disorders_md6$location_name == "Michigan", 1, 0)
alcohol_use_disorders_md6$intervention_dummy <- ifelse(alcohol_use_disorders_md6$year >= 2014, 1, 0)
ymodel <- lm(val ~ torc * intervention_dummy + factor(year), data = alcohol_use_disorders_md6)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ torc * intervention_dummy + factor(year), 
##     data = alcohol_use_disorders_md6)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -403.4 -122.5    0.0  122.5  403.4 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)              19546.7      202.6  96.497  < 2e-16 ***
## torc                     15938.1      104.6 152.367  < 2e-16 ***
## intervention_dummy        4725.1      292.8  16.140 3.76e-12 ***
## factor(year)2001           569.8      276.8   2.059 0.054271 .  
## factor(year)2002          1088.8      276.8   3.934 0.000972 ***
## factor(year)2003          1458.2      276.8   5.269 5.20e-05 ***
## factor(year)2004          1779.0      276.8   6.428 4.75e-06 ***
## factor(year)2005          2258.9      276.8   8.162 1.84e-07 ***
## factor(year)2006          2505.5      276.8   9.053 4.03e-08 ***
## factor(year)2007          2818.6      276.8  10.185 6.73e-09 ***
## factor(year)2008          3055.0      276.8  11.039 1.91e-09 ***
## factor(year)2009          3318.9      276.8  11.992 5.10e-10 ***
## factor(year)2010          3748.6      276.8  13.545 7.02e-11 ***
## factor(year)2011          4090.4      276.8  14.780 1.65e-11 ***
## factor(year)2012          4213.3      276.8  15.224 1.01e-11 ***
## factor(year)2013          4581.7      276.8  16.555 2.45e-12 ***
## factor(year)2014           703.3      276.8   2.541 0.020467 *  
## factor(year)2015           936.3      276.8   3.383 0.003312 ** 
## factor(year)2016          1040.7      276.8   3.760 0.001433 ** 
## factor(year)2017           533.0      276.8   1.926 0.070055 .  
## factor(year)2018           202.1      276.8   0.730 0.474673    
## factor(year)2019              NA         NA      NA       NA    
## torc:intervention_dummy  -1190.0      191.0  -6.231 7.04e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 276.8 on 18 degrees of freedom
## Multiple R-squared:  0.9995, Adjusted R-squared:  0.9988 
## F-statistic:  1569 on 21 and 18 DF,  p-value: < 2.2e-16

Further

Load data : health data (sex analysis)

hbdata <- read.csv("hdd.csv")

Select the disease and measurement we want to focus on

hbdata_md1 <- hbdata %>%
  filter(location_name %in% c("Michigan"),
         measure_id == 2,
         metric_id == 3)
hbdata_md2 <- hbdata %>%
  filter(location_name %in% c("Michigan"),
         measure_id == 2,
         metric_id == 1)
hbdata_md3 <- hbdata %>%
  filter(location_name %in% c("Michigan"),
         measure_id == 5,
         metric_id == 3)
hbdata_md4 <- hbdata %>%
  filter(location_name %in% c("Michigan"),
         measure_id == 5,
         metric_id == 1)
hbdata_md5 <- hbdata %>%
  filter(location_name %in% c("Michigan"),
         measure_id == 6,
         metric_id == 3)
hbdata_md6 <- hbdata %>%
  filter(location_name %in% c("Michigan"),
         measure_id == 6,
         metric_id == 1)

Trend analysis - Total burden related to hepatitis B

ggplot(hbdata_md1, aes(x = year, y = val, color = sex_name, group = sex_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for BB through years",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

ggplot(hbdata_md2, aes(x = year, y = val, color = sex_name, group = sex_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for BB through years",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

ggplot(hbdata_md3, aes(x = year, y = val, color = sex_name, group = sex_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for BB through years",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

ggplot(hbdata_md4, aes(x = year, y = val, color = sex_name, group = sex_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for BB through years",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

ggplot(hbdata_md5, aes(x = year, y = val, color = sex_name, group = sex_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for BB through years",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

ggplot(hbdata_md6, aes(x = year, y = val, color = sex_name, group = sex_name)) +
  geom_line() +
  labs(title = "Comparison of 'val' metric for BB through years",
       x = "Year",
       y = "Value",
       color = "Location") +
  theme_bw()

Difference analysis - Total burden related to hepatitis B

hbdata_md1_diff <- hbdata_md1 %>%
  group_by(year) %>%
  summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))

# Plot the difference values over the years for hbdata_md1
ggplot(hbdata_md1_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

hbdata_md2_diff <- hbdata_md2 %>%
  group_by(year) %>%
  summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))

# Plot the difference values over the years for hbdata_md1
ggplot(hbdata_md2_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 1)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For hbdata_md3
hbdata_md3_diff <- hbdata_md3 %>%
  group_by(year) %>%
  summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))

# Plot the difference values over the years for hbdata_md3
ggplot(hbdata_md3_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 3)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For hbdata_md4
hbdata_md4_diff <- hbdata_md4 %>%
  group_by(year) %>%
  summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))

# Plot the difference values over the years for hbdata_md4
ggplot(hbdata_md4_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 4)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For hbdata_md5
hbdata_md5_diff <- hbdata_md5 %>%
  group_by(year) %>%
  summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))

# Plot the difference values over the years for hbdata_md5
ggplot(hbdata_md5_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 5)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

# For hbdata_md6
hbdata_md6_diff <- hbdata_md6 %>%
  group_by(year) %>%
  summarise(difference = mean(val[sex_name == "Male"]) - mean(val[sex_name == "Female"]))

# Plot the difference values over the years for hbdata_md6
ggplot(hbdata_md6_diff, aes(x = year, y = difference)) +
  geom_line() +
  geom_vline(xintercept = 2014, linetype = "dashed", color = "red") +  
  labs(title = "Difference in 'val' metric between Michigan and Wisconsin (Measure ID = 6)",
       x = "Year",
       y = "Difference",
       color = "Location") +
  theme_bw()

DiD - sex - Michigan

val=β0​+β1​×sex+β2​×intervention_dummy+β3​×year_factor+β4​×(sex×intervention_dummy)+ϵ

Where:

# Fit a linear regression model using lm() with ‘val’ as the dependent variable

# ‘sex’ as the binary predictor representing Male

# ‘intervention_dummy’ as the binary predictor representing intervention period (after 2014)

# Interaction between ‘sex’ and ‘intervention_dummy’ to capture the differential effect of intervention for Males

# ‘year’ as a factor variable

DiD - Total burden related to hepatitis B- sex

measure_id == 2(DALY),

metric_id == 3(Rate)

# Create a binary variable 'sex' indicating whether the sex is Male (1) or not (0)
hbdata_md1$sex <- ifelse(hbdata_md1$sex_name == "Male", 1, 0)

# Create a binary variable 'intervention_dummy' indicating whether the year is greater than or equal to 2014
hbdata_md1$intervention_dummy <- ifelse(hbdata_md1$year >= 2014, 1, 0)

#Modeling
ymodel <- lm(val ~ sex * intervention_dummy + factor(year), data = hbdata_md1)

# Display the summary of the linear regression model
summary(ymodel)
## 
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.4388 -0.1772  0.0000  0.1772  0.4388 
## 
## Coefficients: (1 not defined because of singularities)
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            14.34110    0.26948  53.218  < 2e-16 ***
## sex                    22.85500    0.13916 164.238  < 2e-16 ***
## intervention_dummy     -1.05045    0.38948  -2.697   0.0147 *  
## factor(year)2001       -0.06825    0.36818  -0.185   0.8550    
## factor(year)2002        0.06340    0.36818   0.172   0.8652    
## factor(year)2003       -0.67466    0.36818  -1.832   0.0835 .  
## factor(year)2004       -0.94794    0.36818  -2.575   0.0191 *  
## factor(year)2005       -0.47071    0.36818  -1.278   0.2173    
## factor(year)2006       -0.47206    0.36818  -1.282   0.2161    
## factor(year)2007       -0.59184    0.36818  -1.607   0.1253    
## factor(year)2008       -0.50993    0.36818  -1.385   0.1830    
## factor(year)2009       -0.66234    0.36818  -1.799   0.0888 .  
## factor(year)2010       -0.09127    0.36818  -0.248   0.8070    
## factor(year)2011        0.20625    0.36818   0.560   0.5823    
## factor(year)2012        0.11561    0.36818   0.314   0.7571    
## factor(year)2013        0.98474    0.36818   2.675   0.0155 *  
## factor(year)2014        2.65186    0.36818   7.203 1.06e-06 ***
## factor(year)2015        3.10777    0.36818   8.441 1.13e-07 ***
## factor(year)2016        1.94611    0.36818   5.286 5.02e-05 ***
## factor(year)2017       -0.03174    0.36818  -0.086   0.9323    
## factor(year)2018       -0.09007    0.36818  -0.245   0.8095    
## factor(year)2019             NA         NA      NA       NA    
## sex:intervention_dummy -0.09975    0.25407  -0.393   0.6992    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3682 on 18 degrees of freedom
## Multiple R-squared:  0.9995, Adjusted R-squared:  0.999 
## F-statistic:  1841 on 21 and 18 DF,  p-value: < 2.2e-16

measure_id == 2(DALY),

metric_id == 1(Number)

hbdata_md2$sex <- ifelse(hbdata_md2$sex_name == "Male", 1, 0)
hbdata_md2$intervention_dummy <- ifelse(hbdata_md2$year >= 2014, 1, 0)

ymodel <- lm(val ~ sex*intervention_dummy+factor(year), data = hbdata_md2)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -21.980  -9.126   0.000   9.126  21.980 
## 
## Coefficients: (1 not defined because of singularities)
##                          Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             721.09444   13.63350  52.891  < 2e-16 ***
## sex                    1088.39547    7.04031 154.595  < 2e-16 ***
## intervention_dummy      -65.57082   19.70448  -3.328  0.00374 ** 
## factor(year)2001         -0.10306   18.62691  -0.006  0.99565    
## factor(year)2002          8.29753   18.62691   0.445  0.66130    
## factor(year)2003        -27.05767   18.62691  -1.453  0.16354    
## factor(year)2004        -40.04614   18.62691  -2.150  0.04541 *  
## factor(year)2005        -16.53151   18.62691  -0.888  0.38651    
## factor(year)2006        -17.45172   18.62691  -0.937  0.36121    
## factor(year)2007        -24.25653   18.62691  -1.302  0.20926    
## factor(year)2008        -21.64332   18.62691  -1.162  0.26044    
## factor(year)2009        -30.99888   18.62691  -1.664  0.11338    
## factor(year)2010         -5.25081   18.62691  -0.282  0.78124    
## factor(year)2011          6.77967   18.62691   0.364  0.72012    
## factor(year)2012          0.01649   18.62691   0.001  0.99930    
## factor(year)2013         40.08647   18.62691   2.152  0.04521 *  
## factor(year)2014        143.71347   18.62691   7.715 4.09e-07 ***
## factor(year)2015        163.47784   18.62691   8.776 6.39e-08 ***
## factor(year)2016        103.93741   18.62691   5.580 2.69e-05 ***
## factor(year)2017          4.83927   18.62691   0.260  0.79797    
## factor(year)2018         -1.13131   18.62691  -0.061  0.95224    
## factor(year)2019               NA         NA      NA       NA    
## sex:intervention_dummy  -25.55646   12.85379  -1.988  0.06221 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 18.63 on 18 degrees of freedom
## Multiple R-squared:  0.9995, Adjusted R-squared:  0.9988 
## F-statistic:  1613 on 21 and 18 DF,  p-value: < 2.2e-16

measure_id == 5(Prevalence),

metric_id == 3(Rate)

# For hbdata_md3
hbdata_md3$sex <- ifelse(hbdata_md3$sex_name == "Male", 1, 0)
hbdata_md3$intervention_dummy <- ifelse(hbdata_md3$year >= 2014, 1, 0)

ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md3)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.941  -9.433   0.000   9.433  22.941 
## 
## Coefficients: (1 not defined because of singularities)
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             304.117     14.318  21.240  3.4e-14 ***
## sex                     285.494      7.394  38.613  < 2e-16 ***
## intervention_dummy      -96.556     20.694  -4.666 0.000192 ***
## factor(year)2001         -2.966     19.562  -0.152 0.881154    
## factor(year)2002         -8.025     19.562  -0.410 0.686461    
## factor(year)2003        -13.882     19.562  -0.710 0.487015    
## factor(year)2004        -19.076     19.562  -0.975 0.342398    
## factor(year)2005        -22.245     19.562  -1.137 0.270376    
## factor(year)2006        -27.823     19.562  -1.422 0.172035    
## factor(year)2007        -38.743     19.562  -1.981 0.063134 .  
## factor(year)2008        -51.524     19.562  -2.634 0.016852 *  
## factor(year)2009        -62.611     19.562  -3.201 0.004955 ** 
## factor(year)2010        -68.501     19.562  -3.502 0.002547 ** 
## factor(year)2011        -72.588     19.562  -3.711 0.001600 ** 
## factor(year)2012        -79.361     19.562  -4.057 0.000740 ***
## factor(year)2013        -87.150     19.562  -4.455 0.000306 ***
## factor(year)2014         29.507     19.562   1.508 0.148803    
## factor(year)2015         24.948     19.562   1.275 0.218408    
## factor(year)2016         14.151     19.562   0.723 0.478746    
## factor(year)2017          3.664     19.562   0.187 0.853534    
## factor(year)2018          1.483     19.562   0.076 0.940411    
## factor(year)2019             NA         NA      NA       NA    
## sex:intervention_dummy  -54.374     13.499  -4.028 0.000789 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 19.56 on 18 degrees of freedom
## Multiple R-squared:  0.9915, Adjusted R-squared:  0.9815 
## F-statistic: 99.49 on 21 and 18 DF,  p-value: 1.517e-14

measure_id == 5(Prevalence),

metric_id == 1(Number)

# For hbdata_md4
hbdata_md4$sex <- ifelse(hbdata_md4$sex_name == "Male", 1, 0)
hbdata_md4$intervention_dummy <- ifelse(hbdata_md4$year >= 2014, 1, 0)

ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md4)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md4)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -1072   -490      0    490   1072 
## 
## Coefficients: (1 not defined because of singularities)
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            15254.02     704.89  21.640 2.46e-14 ***
## sex                    13446.72     364.00  36.941  < 2e-16 ***
## intervention_dummy     -4997.23    1018.77  -4.905 0.000114 ***
## factor(year)2001         -93.32     963.06  -0.097 0.923881    
## factor(year)2002        -308.95     963.06  -0.321 0.752055    
## factor(year)2003        -577.34     963.06  -0.599 0.556325    
## factor(year)2004        -823.64     963.06  -0.855 0.403662    
## factor(year)2005        -982.85     963.06  -1.021 0.320989    
## factor(year)2006       -1268.92     963.06  -1.318 0.204168    
## factor(year)2007       -1823.31     963.06  -1.893 0.074516 .  
## factor(year)2008       -2476.08     963.06  -2.571 0.019233 *  
## factor(year)2009       -3050.88     963.06  -3.168 0.005325 ** 
## factor(year)2010       -3374.99     963.06  -3.504 0.002532 ** 
## factor(year)2011       -3613.01     963.06  -3.752 0.001461 ** 
## factor(year)2012       -3981.40     963.06  -4.134 0.000623 ***
## factor(year)2013       -4397.78     963.06  -4.566 0.000239 ***
## factor(year)2014        1612.01     963.06   1.674 0.111450    
## factor(year)2015        1356.65     963.06   1.409 0.175967    
## factor(year)2016         797.48     963.06   0.828 0.418479    
## factor(year)2017         252.35     963.06   0.262 0.796271    
## factor(year)2018         109.22     963.06   0.113 0.910964    
## factor(year)2019             NA         NA      NA       NA    
## sex:intervention_dummy -2785.55     664.58  -4.191 0.000549 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 963.1 on 18 degrees of freedom
## Multiple R-squared:  0.9908, Adjusted R-squared:   0.98 
## F-statistic: 92.09 on 21 and 18 DF,  p-value: 3.003e-14

measure_id == 6(Incidence),

metric_id == 3(Rate)

# For hbdata_md5
hbdata_md5$sex <- ifelse(hbdata_md5$sex_name == "Male", 1, 0)
hbdata_md5$intervention_dummy <- ifelse(hbdata_md5$year >= 2014, 1, 0)

ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md5)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md5)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.093 -3.536  0.000  3.536  6.093 
## 
## Coefficients: (1 not defined because of singularities)
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             66.9728     4.4038  15.208 1.02e-11 ***
## sex                     56.6121     2.2741  24.894 2.14e-15 ***
## intervention_dummy     -34.5190     6.3649  -5.423 3.75e-05 ***
## factor(year)2001        -1.5912     6.0168  -0.264 0.794434    
## factor(year)2002        -3.9425     6.0168  -0.655 0.520597    
## factor(year)2003        -6.5535     6.0168  -1.089 0.290432    
## factor(year)2004        -8.8915     6.0168  -1.478 0.156749    
## factor(year)2005       -10.4929     6.0168  -1.744 0.098223 .  
## factor(year)2006       -12.5781     6.0168  -2.090 0.051031 .  
## factor(year)2007       -16.1239     6.0168  -2.680 0.015293 *  
## factor(year)2008       -20.1571     6.0168  -3.350 0.003564 ** 
## factor(year)2009       -23.6987     6.0168  -3.939 0.000963 ***
## factor(year)2010       -25.8136     6.0168  -4.290 0.000441 ***
## factor(year)2011       -27.2333     6.0168  -4.526 0.000261 ***
## factor(year)2012       -29.1726     6.0168  -4.849 0.000129 ***
## factor(year)2013       -31.3383     6.0168  -5.208 5.93e-05 ***
## factor(year)2014         9.7734     6.0168   1.624 0.121683    
## factor(year)2015         8.0636     6.0168   1.340 0.196858    
## factor(year)2016         4.1145     6.0168   0.684 0.502792    
## factor(year)2017         0.3563     6.0168   0.059 0.953428    
## factor(year)2018        -0.3195     6.0168  -0.053 0.958241    
## factor(year)2019             NA         NA      NA       NA    
## sex:intervention_dummy -17.3500     4.1520  -4.179 0.000564 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.017 on 18 degrees of freedom
## Multiple R-squared:  0.9818, Adjusted R-squared:  0.9605 
## F-statistic: 46.14 on 21 and 18 DF,  p-value: 1.286e-11

measure_id == 6(Incidence),

metric_id == 1(Number)

# For hbdata_md6
hbdata_md6$sex <- ifelse(hbdata_md6$sex_name == "Male", 1, 0)
hbdata_md6$intervention_dummy <- ifelse(hbdata_md6$year >= 2014, 1, 0)

ymodel <- lm(val ~ sex*intervention_dummy + factor(year), data = hbdata_md6)
summary(ymodel)
## 
## Call:
## lm(formula = val ~ sex * intervention_dummy + factor(year), data = hbdata_md6)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -299.1 -177.8    0.0  177.8  299.1 
## 
## Coefficients: (1 not defined because of singularities)
##                         Estimate Std. Error t value Pr(>|t|)    
## (Intercept)             3352.901    213.903  15.675 6.16e-12 ***
## sex                     2668.770    110.459  24.161 3.61e-15 ***
## intervention_dummy     -1747.163    309.154  -5.651 2.32e-05 ***
## factor(year)2001         -67.672    292.247  -0.232 0.819493    
## factor(year)2002        -177.315    292.247  -0.607 0.551606    
## factor(year)2003        -302.749    292.247  -1.036 0.313950    
## factor(year)2004        -416.807    292.247  -1.426 0.170920    
## factor(year)2005        -496.751    292.247  -1.700 0.106390    
## factor(year)2006        -601.742    292.247  -2.059 0.054262 .  
## factor(year)2007        -779.436    292.247  -2.667 0.015712 *  
## factor(year)2008        -982.199    292.247  -3.361 0.003480 ** 
## factor(year)2009       -1161.611    292.247  -3.975 0.000888 ***
## factor(year)2010       -1271.710    292.247  -4.351 0.000385 ***
## factor(year)2011       -1348.193    292.247  -4.613 0.000216 ***
## factor(year)2012       -1449.925    292.247  -4.961 0.000101 ***
## factor(year)2013       -1562.401    292.247  -5.346 4.42e-05 ***
## factor(year)2014         504.270    292.247   1.725 0.101565    
## factor(year)2015         414.899    292.247   1.420 0.172787    
## factor(year)2016         216.962    292.247   0.742 0.467428    
## factor(year)2017          28.701    292.247   0.098 0.922853    
## factor(year)2018          -9.826    292.247  -0.034 0.973548    
## factor(year)2019              NA         NA      NA       NA    
## sex:intervention_dummy  -854.978    201.670  -4.239 0.000493 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 292.2 on 18 degrees of freedom
## Multiple R-squared:  0.9812, Adjusted R-squared:  0.9593 
## F-statistic: 44.78 on 21 and 18 DF,  p-value: 1.667e-11