Results
library(haven)
datatratamiento <- read_sav("datatratamiento.sav")
View(datatratamiento)
datatratamiento
## # A tibble: 53,385 × 17
## PAIS HH26C HHAGE HHSEX Helevel Wscore windex5 windex10 HC7A HC11 HC12
## <dbl+lbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 [ARGEN… NA 37 1 1 0.72 4 8 1 1 1
## 2 1 [ARGEN… NA 37 1 1 0.72 4 8 1 1 1
## 3 1 [ARGEN… 3 40 1 2 0.27 3 6 1 2 1
## 4 1 [ARGEN… 3 44 1 1 111. 5 9 1 1 1
## 5 1 [ARGEN… NA 32 1 1 115. 5 10 1 1 1
## 6 1 [ARGEN… NA 28 1 0 125. 5 10 1 1 1
## 7 1 [ARGEN… 2 42 1 1 114. 5 10 1 1 1
## 8 1 [ARGEN… NA 68 1 1 0.79 4 8 1 2 1
## 9 1 [ARGEN… NA 38 1 1 0.99 5 9 1 9 1
## 10 1 [ARGEN… NA 29 1 2 0.82 4 8 1 1 1
## # ℹ 53,375 more rows
## # ℹ 6 more variables: HC12A <dbl>, HC13 <dbl>, HC14 <dbl>, datebirth <dbl>,
## # tratamiento <dbl+lbl>, VAR00001 <chr>
sum(is.na(datatratamiento$PAIS))
## [1] 0
datatratamiento <- as.data.frame(lapply(datatratamiento, haven::zap_labels))
dummiesdata <- fastDummies::dummy_cols(datatratamiento, select_columns = "PAIS", remove_first_dummy = TRUE)
dummiesdata <- fastDummies::dummy_cols(datatratamiento, select_columns = "PAIS")
names(dummiesdata)
## [1] "PAIS" "HH26C" "HHAGE" "HHSEX" "Helevel"
## [6] "Wscore" "windex5" "windex10" "HC7A" "HC11"
## [11] "HC12" "HC12A" "HC13" "HC14" "datebirth"
## [16] "tratamiento" "VAR00001" "PAIS_1" "PAIS_2" "PAIS_3"
## [21] "PAIS_4" "PAIS_5" "PAIS_6"
modelolineal <- lm(Wscore ~ Helevel + windex5 + PAIS_1 + PAIS_2 + PAIS_3 + PAIS_4 + PAIS_5 + PAIS_6, data = dummiesdata)
summary(modelolineal)
##
## Call:
## lm(formula = Wscore ~ Helevel + windex5 + PAIS_1 + PAIS_2 + PAIS_3 +
## PAIS_4 + PAIS_5 + PAIS_6, data = dummiesdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -727.25 -39.05 9.47 43.00 267.16
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -165.2192 1.1545 -143.113 <2e-16 ***
## Helevel 3.8454 0.2197 17.505 <2e-16 ***
## windex5 45.4332 0.2046 222.051 <2e-16 ***
## PAIS_1 15.5970 1.2802 12.183 <2e-16 ***
## PAIS_2 0.7372 1.2473 0.591 0.555
## PAIS_3 24.4629 1.0942 22.356 <2e-16 ***
## PAIS_4 1.3778 1.4473 0.952 0.341
## PAIS_5 31.2004 1.0728 29.084 <2e-16 ***
## PAIS_6 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 63.46 on 53377 degrees of freedom
## Multiple R-squared: 0.5276, Adjusted R-squared: 0.5276
## F-statistic: 8518 on 7 and 53377 DF, p-value: < 2.2e-16
dummiesdata$tratamiento <- replace(dummiesdata$tratamiento, dummiesdata$tratamiento == 2, 0)
Logistic_model <- glm(tratamiento ~ Helevel + windex5 + PAIS_1 + PAIS_2 + PAIS_3 + PAIS_4 + PAIS_5 + PAIS_6, family = binomial(), data = dummiesdata)
summary(Logistic_model)
##
## Call:
## glm(formula = tratamiento ~ Helevel + windex5 + PAIS_1 + PAIS_2 +
## PAIS_3 + PAIS_4 + PAIS_5 + PAIS_6, family = binomial(), data = dummiesdata)
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.81209 0.18312 -26.279 < 2e-16 ***
## Helevel 0.20925 0.01447 14.465 < 2e-16 ***
## windex5 -0.24851 0.01934 -12.849 < 2e-16 ***
## PAIS_1 0.55005 0.22128 2.486 0.012928 *
## PAIS_2 0.53878 0.20960 2.571 0.010154 *
## PAIS_3 2.15897 0.17912 12.053 < 2e-16 ***
## PAIS_4 0.77265 0.22320 3.462 0.000537 ***
## PAIS_5 1.72255 0.17970 9.586 < 2e-16 ***
## PAIS_6 NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 14694 on 53384 degrees of freedom
## Residual deviance: 13782 on 53377 degrees of freedom
## AIC: 13798
##
## Number of Fisher Scoring iterations: 7
table(dummiesdata$tratamiento)
##
## 0 1
## 51739 1646
pscore <- Logistic_model$fitted.values
summary(pscore)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002341 0.010873 0.025936 0.030833 0.041934 0.265337
library(MatchIt)
match.out <- matchit(tratamiento ~ Helevel + windex5 + PAIS_1 + PAIS_2 + PAIS_3 + PAIS_4 + PAIS_5 + PAIS_6, data = dummiesdata, method = "nearest")
summary(match.out)
##
## Call:
## matchit(formula = tratamiento ~ Helevel + windex5 + PAIS_1 +
## PAIS_2 + PAIS_3 + PAIS_4 + PAIS_5 + PAIS_6, data = dummiesdata,
## method = "nearest")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.0476 0.0303 0.6367 1.3448 0.1806
## Helevel 2.2497 1.8661 0.2778 0.8936 0.0539
## windex5 2.3372 2.7418 -0.3043 0.8457 0.0809
## PAIS_1 0.0352 0.1397 -0.5665 . 0.1044
## PAIS_2 0.0462 0.1167 -0.3359 . 0.0705
## PAIS_3 0.4587 0.2525 0.4138 . 0.2062
## PAIS_4 0.0322 0.0640 -0.1801 . 0.0318
## PAIS_5 0.4077 0.3410 0.1356 . 0.0666
## PAIS_6 0.0200 0.0861 -0.4714 . 0.0661
## eCDF Max
## distance 0.3319
## Helevel 0.1992
## windex5 0.1170
## PAIS_1 0.1044
## PAIS_2 0.0705
## PAIS_3 0.2062
## PAIS_4 0.0318
## PAIS_5 0.0666
## PAIS_6 0.0661
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.0476 0.0476 0 1 0
## Helevel 2.2497 2.2497 0 1 0
## windex5 2.3372 2.3372 0 1 0
## PAIS_1 0.0352 0.0352 0 . 0
## PAIS_2 0.0462 0.0462 0 . 0
## PAIS_3 0.4587 0.4587 0 . 0
## PAIS_4 0.0322 0.0322 0 . 0
## PAIS_5 0.4077 0.4077 0 . 0
## PAIS_6 0.0200 0.0200 0 . 0
## eCDF Max Std. Pair Dist.
## distance 0 0
## Helevel 0 0
## windex5 0 0
## PAIS_1 0 0
## PAIS_2 0 0
## PAIS_3 0 0
## PAIS_4 0 0
## PAIS_5 0 0
## PAIS_6 0 0
##
## Sample Sizes:
## Control Treated
## All 51739 1646
## Matched 1646 1646
## Unmatched 50093 0
## Discarded 0 0
plot(match.out, type = "jitter")

## To identify the units, use first mouse button; to stop, use second.
plot(match.out, type = "hist", col.axis = 4)
match.data2 <- match.data(match.out, group = "treat")
match.data3 <- match.data(match.out, group = "control")
library(psych)

describe(match.data2$Wscore)
## vars n mean sd median trimmed mad min max range skew
## X1 1 1646 -32.12 93.78 -0.25 -26.7 1.17 -480.05 195.5 675.55 -0.82
## kurtosis se
## X1 0.81 2.31
describe(match.data3$Wscore)
## vars n mean sd median trimmed mad min max range skew
## X1 1 1646 -30.48 91.28 -0.23 -25.77 1.16 -427.13 182.05 609.18 -0.75
## kurtosis se
## X1 0.89 2.25
t.test(match.data2$Wscore, match.data3$Wscore, paired = T)
##
## Paired t-test
##
## data: match.data2$Wscore and match.data3$Wscore
## t = -0.66306, df = 1645, p-value = 0.5074
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -6.512124 3.221589
## sample estimates:
## mean difference
## -1.645267