STAT9210

data prep

nhanes_data = read.csv("nhanesi_class_dataset.csv", header=TRUE)
library(DescTools) 
library(caret)

## Loading required package: ggplot2

## Loading required package: lattice

## 
## Attaching package: 'caret'

## The following objects are masked from 'package:DescTools':
## 
##     MAE, RMSE

library(OSTools)   
library(ggplot2)   

nhanes_data = read.csv("nhanesi_class_dataset.csv", header=TRUE)


datatemp = nhanes_data
datatemp$treatment = as.numeric(datatemp$physically.inactive)
datatemp$outcome = datatemp$years.lived.since.1971.up.to.1992

datatemp$working.during.last.three.months <- as.numeric(datatemp$working.last.three.months)
datatemp$marital.status <- as.numeric(datatemp$married)

ipw.est <- function(data, propscore.formula){
    psmodel <- glm(propscore.formula, family=binomial, data=data) 
    propscore <- predict(psmodel, type="response")
    treatment <- data$treatment
    outcome <- data$outcome
    
    weight <- (1/propscore)*treatment + (1/(1-propscore))*(1-treatment)

    weight.norm <- weight
    weight.norm[treatment==1] <- weight[treatment==1] / sum(weight[treatment==1])
    weight.norm[treatment==0] <- weight[treatment==0] / sum(weight[treatment==0]*(1-treatment)) 
    
    effect.norm <- sum(weight.norm*outcome*treatment) -
                   sum(weight.norm*outcome*(1-treatment))
    return(effect.norm)
}

(a) explanatory data analysis: smoking status and physical inactivity

datatemp_eda <- datatemp
datatemp_eda$Activity_Status <- factor(datatemp_eda$treatment, 
                                     levels = c(0, 1), 
                                     labels = c("Active (Control)", "Inactive (Treated)"))
datatemp_eda$Smoking <- datatemp_eda$smoking.status

plot_a <- ggplot(datatemp_eda, aes(x = Activity_Status, fill = Smoking)) +
  geom_bar(position = "fill") + 
  labs(title = "Smoking Status Distribution by Physical Activity Level",
       x = "Physical Activity Status",
       y = "Proportion",
       fill = "Smoking Status") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal()

print(plot_a)

This chart shows that while the proportion of individuals who have never smoked is nearly identical between the two groups, physically inactive individuals have a slightly higher proportion of current smokers and a lower proportion of former smokers compared to those who are physically active.

(b) missing values

datatemp$income.poverty.ratio.mis <- is.na(datatemp$income.poverty.ratio)
datatemp$dietary.adequacy.mis <- is.na(datatemp$dietary.adequacy)

mean_income <- mean(datatemp$income.poverty.ratio, na.rm=TRUE)
mean_dietary <- mean(datatemp$dietary.adequacy, na.rm=TRUE)

datatemp$income.poverty.ratio[is.na(datatemp$income.poverty.ratio)] <- mean_income
datatemp$dietary.adequacy[is.na(datatemp$dietary.adequacy)] <- mean_dietary

categorical_vars <- c("sex", "smoking.status", "race", "education", "alcohol.consumption")
for (v in categorical_vars) {
    datatemp[[v]] <- factor(datatemp[[v]])
}
datatemp$working.during.last.three.months <- factor(datatemp$working.during.last.three.months)
datatemp$marital.status <- factor(datatemp$marital.status)
datatemp$income.poverty.ratio.mis <- factor(datatemp$income.poverty.ratio.mis)
datatemp$dietary.adequacy.mis <- factor(datatemp$dietary.adequacy.mis)

(c) fit a propensity score model

covariates <- formula(treatment ~ sex + smoking.status + income.poverty.ratio + age.at.interview + 
                      race + education + working.during.last.three.months + marital.status + 
                      alcohol.consumption + dietary.adequacy + 
                      income.poverty.ratio.mis + dietary.adequacy.mis)

psmodel <- glm(covariates, family=binomial, data=datatemp, x=TRUE, y=TRUE)
datatemp$logit.ps <- predict(psmodel) 

res <- overlap(datatemp$logit.ps, datatemp$treatment)
which.remove <- res$index.lack.overlap 
no_excluded <- res$no.treated.lack.overlap + res$no.control.lack.overlap

print(paste("Number of treated units lacking overlap:", res$no.treated.lack.overlap))

## [1] "Number of treated units lacking overlap: 1"

print(paste("Number of control units lacking overlap:", res$no.control.lack.overlap))

## [1] "Number of control units lacking overlap: 0"

print(paste("Total units excluded:", no_excluded))

## [1] "Total units excluded: 1"

dmy.original <- dummyVars(covariates, data=datatemp)
Xmat.original <- data.frame(predict(dmy.original, newdata=datatemp))

if (no_excluded > 0) {
    datatemp_overlap <- datatemp[-which.remove,]
    Xmat <- Xmat.original[-which.remove,]
    datatemp.full <- rbind(datatemp_overlap, datatemp[which.remove,]) 
    Xmat.full <- rbind(Xmat, Xmat.original[which.remove,])
} else {
    datatemp_overlap <- datatemp
    Xmat <- Xmat.original
    datatemp.full <- datatemp
    Xmat.full <- Xmat.original
}

rownames(datatemp_overlap) <- 1:nrow(datatemp_overlap)
rownames(Xmat) <- 1:nrow(Xmat)

(d) assess the balance on the confounders

btb_smd_before <- balance.check(Xmat.full, Xmat.full, 
                                datatemp.full$treatment, datatemp.full$treatment)

smd_before <- abs(btb_smd_before[, "Before SMD"])

imbalanced_vars <- names(smd_before[smd_before > 0.1])
print("Confounders with Absolute Standardized Differences > 0.1 (Before Adjustment):")

## [1] "Confounders with Absolute Standardized Differences > 0.1 (Before Adjustment):"

print(imbalanced_vars)

##  [1] "sex.Female"                             
##  [2] "sex.Male"                               
##  [3] "smoking.status.Current"                 
##  [4] "smoking.status.Former"                  
##  [5] "income.poverty.ratio"                   
##  [6] "age.at.interview"                       
##  [7] "race.Nonwhite"                          
##  [8] "race.White"                             
##  [9] "education.0.8"                          
## [10] "education.12"                           
## [11] "education.Some.College"                 
## [12] "working.during.last.three.months.0"     
## [13] "working.during.last.three.months.1"     
## [14] "marital.status.0"                       
## [15] "marital.status.1"                       
## [16] "alcohol.consumption.1.4.times.per.month"
## [17] "alcohol.consumption.Never"              
## [18] "dietary.adequacy"

(e) Inverse Probability Weighting

propscore_overlap <- predict(psmodel, newdata = datatemp_overlap, type = "response")

weight <- ifelse(datatemp_overlap$treatment == 1, 
                 1 / propscore_overlap, 
                 1 / (1 - propscore_overlap))

lm_ipw <- lm(outcome ~ treatment, data = datatemp_overlap, weights = weight)
summary(lm_ipw)

## 
## Call:
## lm(formula = outcome ~ treatment, data = datatemp_overlap, weights = weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -39.881  -4.181   3.237   3.549  21.339 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  17.9036     0.1757 101.927  < 2e-16 ***
## treatment    -1.6147     0.2465  -6.551 7.31e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.748 on 1948 degrees of freedom
## Multiple R-squared:  0.02155,    Adjusted R-squared:  0.02105 
## F-statistic: 42.91 on 1 and 1948 DF,  p-value: 7.31e-11

ate_estimate_ipw <- coef(lm_ipw)["treatment"]
conf_interval_ipw <- confint(lm_ipw)["treatment", ]

print(paste("IPW Point Estimate (from WLS):", round(ate_estimate_ipw, 4)))

## [1] "IPW Point Estimate (from WLS): -1.6147"

print("95% Confidence Interval for IPW Estimate:")

## [1] "95% Confidence Interval for IPW Estimate:"

print(conf_interval_ipw)

##     2.5 %    97.5 % 
## -2.098185 -1.131313

(f) propensity score subclassification

propscore <- predict(psmodel, type="response", newdata=datatemp_overlap) 
datatemp_overlap$propscore <- propscore

no.subclasses <- 5
cutoffs <- quantile(propscore, seq(1/no.subclasses, (no.subclasses-1)/no.subclasses, 1/no.subclasses))
subclass <- rep(1, length(propscore))
for (i in 1:(no.subclasses-1)){
    subclass <- subclass + 1*(propscore >= cutoffs[i])
}
datatemp_overlap$subclass <- subclass

psmodel.overlap <- glm(covariates, family=binomial, data=datatemp_overlap, x=TRUE)

nvar <- ncol(psmodel.overlap$x) - 1
smd.before.subclass <- numeric(nvar)
smd.after.subclass <- numeric(nvar)

for (i in 2:(nvar+1)){
    xi <- psmodel.overlap$x[, i]
    poolsd <- sqrt((var(xi[datatemp_overlap$treatment==1]) + var(xi[datatemp_overlap$treatment==0])) / 2)
    smd.before.subclass[i-1] <- (mean(xi[datatemp_overlap$treatment==1]) - mean(xi[datatemp_overlap$treatment==0])) / poolsd
    
    tempdf <- matrix(nrow=no.subclasses, ncol=2)
    for (s in 1:(no.subclasses)){
        tempdf[s, 1] <- mean(xi[datatemp_overlap$treatment==1 & datatemp_overlap$subclass==s], na.rm=TRUE)
        tempdf[s, 2] <- mean(xi[datatemp_overlap$treatment==0 & datatemp_overlap$subclass==s], na.rm=TRUE)
    }
    smd.after.subclass[i-1] <- sum((tempdf[, 1] - tempdf[, 2]) * table(subclass) / nrow(datatemp_overlap), na.rm=TRUE) / poolsd
}

love.plot(abs(smd.before.subclass), abs(smd.after.subclass), colnames(psmodel.overlap$x)[2:(nvar+1)])

Based on the Love plot, the adjustment is acceptable.

max_smd_after <- max(abs(smd.after.subclass), na.rm=TRUE)
print(paste("Maximum Adjusted Absolute SMD:", round(max_smd_after, 4)))

## [1] "Maximum Adjusted Absolute SMD: 0.0665"

if (max_smd_after < 0.2) {
    print("Balance after subclassification is acceptable.")
} else {
    print("Balance after subclassification is NOT acceptable.")
}

## [1] "Balance after subclassification is acceptable."

subclass.treatment.effect <- rep(0, no.subclasses)
subclass.treatment.effect.se <- rep(0, no.subclasses)
subclass.no.subjects <- rep(0, no.subclasses)

for (i in 1:no.subclasses){
    tempmodel <- lm(outcome ~ treatment, subset=(subclass==i), data=datatemp_overlap)
    subclass.treatment.effect[i] <- coef(tempmodel)[2]
    subclass.treatment.effect.se[i] <- sqrt(vcov(tempmodel)[2, 2])
    subclass.no.subjects[i] <- sum(datatemp_overlap$subclass == i, na.rm=TRUE)
}

subclass.weight <- subclass.no.subjects / sum(subclass.no.subjects)
avg.treat.effect.est <- sum(subclass.treatment.effect * subclass.weight, na.rm=TRUE)
avg.treat.effect.se <- sqrt(sum(subclass.weight^2 * subclass.treatment.effect.se^2, na.rm=TRUE))
avg.treat.effect.lci <- avg.treat.effect.est - 1.96 * avg.treat.effect.se
avg.treat.effect.uci <- avg.treat.effect.est + 1.96 * avg.treat.effect.se

print(paste("Subclassification ATE Estimate:", round(avg.treat.effect.est, 4)))

## [1] "Subclassification ATE Estimate: -1.7295"

print(paste0("95% CI: (", round(avg.treat.effect.lci, 4), ", ", round(avg.treat.effect.uci, 4), ")"))

## [1] "95% CI: (-2.3118, -1.1472)"

(g) Optimal Matched Pairs

age_var <- "age.at.interview"
sex_vars <- grep("^sex", colnames(Xmat), value = TRUE)
smoking_vars <- grep("^smoking.status", colnames(Xmat), value = TRUE)
Xmatmahal_vars <- c(age_var, sex_vars, smoking_vars)
Xmatmahal <- Xmat[, Xmatmahal_vars]

distmat <- maha.dense(datatemp_overlap$treatment, Xmatmahal, matrix=TRUE)

datatemp_overlap$logit.ps <- predict(psmodel, newdata=datatemp_overlap)
distmat <- add.caliper(distmat, datatemp_overlap$treatment, datatemp_overlap$logit.ps, 
                      rg=.5, stdev=TRUE, penalty=1000, constant=FALSE)

rownames(distmat) <- rownames(datatemp_overlap)[as.numeric(rownames(distmat))]
colnames(distmat) <- rownames(datatemp_overlap)[as.numeric(colnames(distmat))]
result1 <- match.os(datatemp_overlap$treatment, distmat, datatemp_overlap, ncontrol=1)
matcheddata1 <- result1$data

Xmat.matched1 <- Xmat[as.numeric(rownames(matcheddata1)), ]
btb_smd1 <- balance.check(Xmat.full, Xmat.matched1, 
                         datatemp.full$treatment, matcheddata1$treatment)

print("Standardized Differences After Pair Matching (n=1):")

## [1] "Standardized Differences After Pair Matching (n=1):"

print(round(btb_smd1[, c("After SMD", "Before SMD")], 3))

##                                                  After SMD Before SMD
## sex.Female                                          -0.004      0.244
## sex.Male                                             0.004     -0.244
## smoking.status.Current                               0.009      0.141
## smoking.status.Former                               -0.005     -0.142
## smoking.status.Never                                -0.004     -0.018
## income.poverty.ratio                                -0.122     -0.420
## age.at.interview                                    -0.002      0.282
## race.Nonwhite                                        0.093      0.251
## race.White                                          -0.093     -0.251
## education.0.8                                        0.127      0.310
## education.12                                        -0.069     -0.194
## education.College.Grad                              -0.055      0.037
## education.Missing                                   -0.054      0.004
## education.Ninth.11th.grade                           0.021     -0.098
## education.Some.College                              -0.083     -0.158
## working.during.last.three.months.0                   0.046      0.588
## working.during.last.three.months.1                  -0.046     -0.588
## marital.status.0                                     0.071      0.350
## marital.status.1                                    -0.071     -0.350
## alcohol.consumption..1.time.per.month               -0.054      0.015
## alcohol.consumption.1.4.times.per.month              0.000     -0.125
## alcohol.consumption.2..times.per.week                0.036     -0.069
## alcohol.consumption.Just.about.everyday.everyday    -0.054     -0.073
## alcohol.consumption.Never                            0.062      0.189
## dietary.adequacy                                    -0.100     -0.305
## income.poverty.ratio.mis.FALSE                       0.205      0.013
## income.poverty.ratio.mis.TRUE                       -0.205     -0.013
## dietary.adequacy.mis.FALSE                           0.245      0.068
## dietary.adequacy.mis.TRUE                           -0.245     -0.068

# Love Plot [cite: 275]
love.plot(abs(btb_smd1[, "Before SMD"]), abs(btb_smd1[, "After SMD"]), rownames(btb_smd1))

## (h) Matching with Multiple Controls

# n control=2
result2 <- match.os(datatemp_overlap$treatment, distmat, datatemp_overlap, ncontrol=2)
matcheddata2 <- result2$data
Xmat.matched2 <- Xmat[as.numeric(rownames(matcheddata2)), ]
btb_smd2 <- balance.check(Xmat.full, Xmat.matched2, 
                         datatemp.full$treatment, matcheddata2$treatment)
smd_after2 <- abs(btb_smd2[, "After SMD"])
print("Maximum Absolute SMD for n=2 controls:")

## [1] "Maximum Absolute SMD for n=2 controls:"

print(round(max(smd_after2), 4))

## [1] 0.2124

Looks like n=2 is not the most balanced option with a lot of SMD > 0.2.

# n control=3
result3 <- match.os(datatemp_overlap$treatment, distmat, datatemp_overlap, ncontrol=3)
matcheddata3 <- result3$data
Xmat.matched3 <- Xmat[as.numeric(rownames(matcheddata3)), ]
btb_smd3 <- balance.check(Xmat.full, Xmat.matched3, 
                         datatemp.full$treatment, matcheddata3$treatment)
smd_after3 <- abs(btb_smd3[, "After SMD"])
print("Maximum Absolute SMD for n=3 controls:")

## [1] "Maximum Absolute SMD for n=3 controls:"

print(round(max(smd_after3), 4))

## [1] 0.5343

n=3 still does not provide a good balance with many SMD > 0.2. ## (i) Full Matching

result_full <- match.os.full(datatemp_overlap$treatment, distmat, datatemp_overlap)
matcheddata_full <- result_full$data

print(paste("Full Match Effective Sample Size:", round(result_full$effsize, 2)))

## [1] "Full Match Effective Sample Size: 572.28"

missing.mat <- matrix(0, nrow=nrow(Xmat.full), ncol=ncol(Xmat.full))
colnames(missing.mat) <- colnames(Xmat.full)
missing.mat[, "income.poverty.ratio"] <- as.numeric(datatemp.full$income.poverty.ratio.mis == "TRUE")
missing.mat[, "dietary.adequacy"] <- as.numeric(datatemp.full$dietary.adequacy.mis == "TRUE")

Xmat.without.missing.full <- Xmat.full
Xmat.without.missing.full[missing.mat[, "income.poverty.ratio"] == 1, "income.poverty.ratio"] <- NA
Xmat.without.missing.full[missing.mat[, "dietary.adequacy"] == 1, "dietary.adequacy"] <- NA

Xmat.matched_full <- Xmat.full[as.numeric(rownames(matcheddata_full)), ]
missing.matched <- missing.mat[as.numeric(rownames(matcheddata_full)), ]

Xmat.without.missing.matched <- Xmat.matched_full
Xmat.without.missing.matched[missing.matched[, "income.poverty.ratio"] == 1, "income.poverty.ratio"] <- NA
Xmat.without.missing.matched[missing.matched[, "dietary.adequacy"] == 1, "dietary.adequacy"] <- NA

smd.full <- balance.check.full(
    Xmat.without.missing.full, 
    Xmat.without.missing.matched, 
    datatemp.full$treatment, 
    matcheddata_full$treatment,
    matcheddata_full$mset, 
    missing.mat, 
    missing.matched, 
    'ate' 
)

print("Standardized Differences After Full Matching:")

## [1] "Standardized Differences After Full Matching:"

print(round(smd.full, 3))

##                                                  smd.before smd.after
## sex.Female                                            0.244     0.004
## sex.Male                                             -0.244    -0.004
## smoking.status.Current                                0.141    -0.001
## smoking.status.Former                                -0.142    -0.002
## smoking.status.Never                                 -0.018     0.002
## income.poverty.ratio                                 -0.499    -0.049
## age.at.interview                                      0.282    -0.004
## race.Nonwhite                                         0.251     0.018
## race.White                                           -0.251    -0.018
## education.0.8                                         0.310     0.036
## education.12                                         -0.194    -0.019
## education.College.Grad                                0.037     0.011
## education.Missing                                     0.004    -0.012
## education.Ninth.11th.grade                           -0.098    -0.008
## education.Some.College                               -0.158    -0.030
## working.during.last.three.months.0                    0.588     0.070
## working.during.last.three.months.1                   -0.588    -0.070
## marital.status.0                                      0.350     0.020
## marital.status.1                                     -0.350    -0.020
## alcohol.consumption..1.time.per.month                 0.015    -0.012
## alcohol.consumption.1.4.times.per.month              -0.125    -0.047
## alcohol.consumption.2..times.per.week                -0.069     0.002
## alcohol.consumption.Just.about.everyday.everyday     -0.073    -0.019
## alcohol.consumption.Never                             0.189     0.062
## dietary.adequacy                                     -0.348    -0.041
## income.poverty.ratio.mis.FALSE                        0.013     0.032
## income.poverty.ratio.mis.TRUE                        -0.013    -0.032
## dietary.adequacy.mis.FALSE                            0.068     0.032
## dietary.adequacy.mis.TRUE                            -0.068    -0.032

max_smd_full <- max(abs(smd.full[, "smd.after"]))
print(paste("Maximum Absolute SMD after full matching:", round(max_smd_full, 4)))

## [1] "Maximum Absolute SMD after full matching: 0.0701"

if (max_smd_full < 0.2) {
    print("Balance for full matching is ACCEPTABLE.")
} else {
    print("Balance for full matching is NOT acceptable.")
}

## [1] "Balance for full matching is ACCEPTABLE."

Much more balanced with SMD < 0.2 for all covariates. ## (j) Best Matching Justification Based on the results of questions (g), (h), and (i), the SMDs from full matching are all ≤ 0.2, and it also makes use of as many data points as possible. Therefore, it is the best choice, as it minimizes bias caused by observable confounding to the greatest extent.

(k) Treatment Effect Estimate and CI

treated.y1 <- matcheddata1$outcome[matcheddata1$treatment==1]
control.y1 <- matcheddata1$outcome[matcheddata1$treatment==0]
y1 <- cbind(treated.y1, control.y1)

ci1 <- sens.ci(y1, alternative='two-sided')

print("Pair Matching (n=1) Estimate (ATT):")

## [1] "Pair Matching (n=1) Estimate (ATT):"

print(paste0("Point Estimate: ", round(ci1$PointEstimate[1], 4)))

## [1] "Point Estimate: -2.4615"

print(paste0("95% CI: (", round(ci1$Confidence.Interval[1], 4), ", ", round(ci1$Confidence.Interval[2], 4), ")"))

## [1] "95% CI: (-3.1544, -1.8105)"

ci_full <- sens.ci.full(matcheddata_full$outcome, matcheddata_full$treatment, matcheddata_full$mset)

print("Chosen Match (Full Matching) Estimate (ATE):")

## [1] "Chosen Match (Full Matching) Estimate (ATE):"

print(paste0("Point Estimate: ", round(ci_full$PointEstimates[1], 4)))

## [1] "Point Estimate: -1.6456"

print(paste0("95% CI: (", round(ci_full$ConfidenceInterval[1], 4), ", ", round(ci_full$ConfidenceInterval[2], 4), ")"))

## [1] "95% CI: (-2.4584, -1.2105)"

(l) Potential Unmeasured Confounders

Variables that influence both physical inactivity and longevity but were not measured, such as genetic factors and unmeasured aspects of diet quality.

(m) Alternative Treatment Effect Estimation

datatemp_m <- datatemp.full 

correct_current_smoker_label <- "Current" 
datatemp_m$treatment_smoking <- as.numeric(datatemp_m$smoking.status == correct_current_smoker_label)

smoking_covariates <- formula(treatment_smoking ~ sex + income.poverty.ratio + age.at.interview + 
                             race + education + working.during.last.three.months + marital.status + 
                             alcohol.consumption + dietary.adequacy + physically.inactive + 
                             income.poverty.ratio.mis + dietary.adequacy.mis)

psmodel_smoking <- glm(smoking_covariates, family=binomial, data=datatemp_m, x=TRUE, y=TRUE)
datatemp_m$logit.ps_smoking <- predict(psmodel_smoking)

res_smoking <- overlap(datatemp_m$logit.ps_smoking, datatemp_m$treatment_smoking)
which.remove_smoking <- res_smoking$index.lack.overlap
datatemp_overlap_smoking <- datatemp_m[-which.remove_smoking, ]

dmy_smoking <- dummyVars(smoking_covariates, data=datatemp_overlap_smoking)
Xmat_smoking <- data.frame(predict(dmy_smoking, newdata=datatemp_overlap_smoking))
datatemp_overlap_smoking$treatment <- datatemp_overlap_smoking$treatment_smoking 

age_var_m <- "age.at.interview" 
sex_vars_m <- grep("^sex", colnames(Xmat_smoking), value = TRUE)
Xmatmahal_smoking <- Xmat_smoking[, c(age_var_m, sex_vars_m)]

distmat_smoking <- maha.dense(datatemp_overlap_smoking$treatment, Xmatmahal_smoking, matrix=TRUE)
distmat_smoking <- add.caliper(distmat_smoking, datatemp_overlap_smoking$treatment, 
                              datatemp_overlap_smoking$logit.ps_smoking, rg=.5, 
                              stdev=TRUE, penalty=1000, constant=FALSE)

result_smoking <- match.os(datatemp_overlap_smoking$treatment, distmat_smoking, 
                           datatemp_overlap_smoking, ncontrol=1)
matcheddata_smoking <- result_smoking$data

treated.y_smoking <- matcheddata_smoking$outcome[matcheddata_smoking$treatment==1]
control.y_smoking <- matcheddata_smoking$outcome[matcheddata_smoking$treatment==0]
y_smoking <- cbind(treated.y_smoking, control.y_smoking)

ci_smoking <- sens.ci(y_smoking, alternative='two-sided')

print("Smoking Treatment Estimate (ATT):")

## [1] "Smoking Treatment Estimate (ATT):"

print(paste0("Point Estimate: ", round(ci_smoking$PointEstimate[1], 4)))

## [1] "Point Estimate: -1.7886"

print(paste0("95% CI: (", round(ci_smoking$Confidence.Interval[1], 4), ", ", round(ci_smoking$Confidence.Interval[2], 4), ")"))

## [1] "95% CI: (-2.3117, -1.3474)"

STAT9210_HW2

Ying Zhang

2025-10-15