library(nlme)
library(ggplot2)
Read in the data
urisurvey = read.csv("../datafiles/urisurvey.csv", na.strings = c("", "NA"))
uri = urisurvey[complete.cases(urisurvey), ]
summary(uri)
## lat long phone countyfips
## Min. :-106.62 Min. :26.10 Min. :2.023e+09 Min. :48013
## 1st Qu.: -98.38 1st Qu.:29.60 1st Qu.:2.556e+09 1st Qu.:48113
## Median : -97.24 Median :30.11 Median :5.129e+09 Median :48201
## Mean : -97.64 Mean :30.67 Mean :5.692e+09 Mean :48206
## 3rd Qu.: -95.66 3rd Qu.:32.64 3rd Qu.:8.324e+09 3rd Qu.:48303
## Max. : -93.75 Max. :33.64 Max. :9.800e+09 Max. :48497
## zcta msa serveyversion hud
## Min. :75006 Length:780 Min. :1.000 Length:780
## 1st Qu.:76232 Class :character 1st Qu.:2.000 Class :character
## Median :77545 Mode :character Median :2.000 Mode :character
## Mean :77439 Mean :2.379
## 3rd Qu.:78259 3rd Qu.:3.000
## Max. :79938 Max. :3.100
## housetype coping preparedness pipesburst
## Length:780 Min. : 1.000 Length:780 Length:780
## Class :character 1st Qu.: 3.000 Class :character Class :character
## Mode :character Median : 6.000 Mode :character Mode :character
## Mean : 5.563
## 3rd Qu.: 8.000
## Max. :10.000
## pipeflooding watermainbreak heatingfailure foundationdamage
## Length:780 Length:780 Length:780 Length:780
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## winddamage roofdamage otherdamage hoursnoelectric
## Length:780 Length:780 Length:780 Min. : 0.00
## Class :character Class :character Class :character 1st Qu.: 0.00
## Mode :character Mode :character Mode :character Median : 24.00
## Mean : 41.27
## 3rd Qu.: 72.00
## Max. :504.00
## hoursnopipedwater daysmaxunder32 sumdamage
## Min. : 0.00 Min. :0.000 Min. :0.000
## 1st Qu.: 0.00 1st Qu.:1.000 1st Qu.:0.000
## Median : 0.00 Median :3.000 Median :1.000
## Mean : 32.34 Mean :3.694 Mean :1.806
## 3rd Qu.: 48.00 3rd Qu.:7.000 3rd Qu.:3.000
## Max. :672.00 Max. :9.000 Max. :8.000
Set categorical data as factors
uri$fmsa = factor(uri$msa)
uri$hud = factor(uri$hud, level = c("No", "Yes"))
uri$preparedness = factor(uri$preparedness, level = c("well prepared", "somewhat prepared", "not prepared at all"))
Exploratory analysis
hist(uri$coping)
plot(coping ~ preparedness, data = uri, ylab = "Coping", xlab = "Disaster preparedness")
plot(coping ~ fmsa, data = uri, ylab = "Coping", xlab = "Metropolitan Statistical Area")
qplot(preparedness,
coping,
data = uri,
geom = 'boxplot',
fill = preparedness,
xlab = "Preparedness",
ylab = "Coping ability")
msarespondents = ggplot(uri, aes(fmsa)) + labs(x = "Metropolitan Statistical Area", y ="Respondents")
msarespondents + geom_bar(aes(fill=hud), )
msa = c("Austin", "Beaumont", "Dallas", "El Paso", "Houston", "Lubbock", "McAllen", "San Antonio")
meanmsa = c(5.6, 0.95, 7.77, 0.24, 0.56, 7.33, 0.00, 3.28)
maxtempavgbymsa = data.frame(msa, meanmsa)
barplot(meanmsa~msa, data = maxtempavgbymsa, ylim = c(0,10), xlab = "Metropolitan Statistical Area", ylab = "Mean # days where max temp was below 32F", col = "darkblue")
uri$fcoping = factor(uri$coping)
plot(uri$fcoping, xlab = "Coping", ylab = "Number of Occurrences", ylim = c(0,130))
damage = c(0,1,2,3,4,5,6,7,8)
meancoping = c(3.90783410138249,5.39175257731959,5.66891891891892,6.4367816091954,7.234375,8.23684210526316,8.26666666666667,8.64285714285714,8.33333333333333)
meancopingbydamagetable = data.frame(damage, meancoping)
ggplot(meancopingbydamagetable, aes(x=damage, y=meancoping)) +
geom_bar(stat="identity") +
labs(x = "# of occurrences of home damage", y = "Mean coping ability")
Statistical Modeling
coping as a function of sumdamage
copingdamage.lm1 = lm(coping~sumdamage, data = uri)
summary(copingdamage.lm1)
##
## Call:
## lm(formula = coping ~ sumdamage, data = uri)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.1737 -2.2363 0.0294 2.0294 5.7637
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.23631 0.13085 32.38 <2e-16 ***
## sumdamage 0.73434 0.05188 14.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.55 on 778 degrees of freedom
## Multiple R-squared: 0.2048, Adjusted R-squared: 0.2037
## F-statistic: 200.3 on 1 and 778 DF, p-value: < 2.2e-16
AIC(logLik(copingdamage.lm1))
## [1] 3677.89
coping as a function of maxtempbelow32
copingtemp.lm3 = lm(coping ~ daysmaxunder32, data = uri)
summary(copingtemp.lm3)
##
## Call:
## lm(formula = coping ~ daysmaxunder32, data = uri)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.609 -2.546 0.391 2.404 4.503
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.60898 0.15886 35.31 <2e-16 ***
## daysmaxunder32 -0.01250 0.03289 -0.38 0.704
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.859 on 778 degrees of freedom
## Multiple R-squared: 0.0001856, Adjusted R-squared: -0.001099
## F-statistic: 0.1444 on 1 and 778 DF, p-value: 0.704
coping as a function of sumdamage and preparedness
copingdamage.lm2 = lm(coping ~ sumdamage + preparedness, data=uri)
summary(copingdamage.lm2)
##
## Call:
## lm(formula = coping ~ sumdamage + preparedness, data = uri)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.9040 -1.8977 -0.1195 1.8523 6.6992
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.30083 0.17725 18.623 < 2e-16 ***
## sumdamage 0.59481 0.05163 11.521 < 2e-16 ***
## preparednesssomewhat prepared 1.22389 0.21531 5.684 1.86e-08 ***
## preparednessnot prepared at all 2.25207 0.24540 9.177 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.425 on 776 degrees of freedom
## Multiple R-squared: 0.2827, Adjusted R-squared: 0.28
## F-statistic: 102 on 3 and 776 DF, p-value: < 2.2e-16
AIC(logLik(copingdamage.lm2))
## [1] 3601.393
coping w/o slope coef. to compare between fixed intercept and random intercept (varying by MSA)
coping.m0.lm = glm(coping ~ 1, data=uri)
coping.m1.lme = lme(coping ~ 1, random = ~1|fmsa, data=uri)
AIC(logLik(coping.m0.lm))
## [1] 3854.602
AIC(logLik(coping.m1.lme))
## [1] 3831.901
coping as a function of sumdamage (fixed effect) w/ random intercept for each MSA
coping.m2.lme = lme(coping ~ sumdamage,
random = ~1|fmsa,
data = uri)
summary(coping.m2.lme)
## Linear mixed-effects model fit by REML
## Data: uri
## AIC BIC logLik
## 3662.498 3681.125 -1827.249
##
## Random effects:
## Formula: ~1 | fmsa
## (Intercept) Residual
## StdDev: 0.7334925 2.489064
##
## Fixed effects: coping ~ sumdamage
## Value Std.Error DF t-value p-value
## (Intercept) 4.085400 0.30166261 771 13.54294 0
## sumdamage 0.713426 0.05087294 771 14.02369 0
## Correlation:
## (Intr)
## sumdamage -0.291
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.36692334 -0.81625119 -0.04055097 0.78836132 2.39539133
##
## Number of Observations: 780
## Number of Groups: 8
coping as a function of sumdamage (fixed effect) and preparednes w/ random intercept for each MSA
coping.m3.lme = lme(coping ~ sumdamage + preparedness,
random = ~1|fmsa,
data = uri)
summary(coping.m3.lme)
## Linear mixed-effects model fit by REML
## Data: uri
## AIC BIC logLik
## 3583.804 3611.729 -1785.902
##
## Random effects:
## Formula: ~1 | fmsa
## (Intercept) Residual
## StdDev: 0.744501 2.357949
##
## Fixed effects: coping ~ sumdamage + preparedness
## Value Std.Error DF t-value p-value
## (Intercept) 3.1526577 0.3244228 769 9.717743 0
## sumdamage 0.5728216 0.0504191 769 11.361204 0
## preparednesssomewhat prepared 1.2724718 0.2099175 769 6.061771 0
## preparednessnot prepared at all 2.2737884 0.2400587 769 9.471803 0
## Correlation:
## (Intr) sumdmg prprdp
## sumdamage -0.160
## preparednesssomewhat prepared -0.358 -0.174
## preparednessnot prepared at all -0.288 -0.293 0.587
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.50590703 -0.81146609 -0.02406378 0.72003385 2.62838787
##
## Number of Observations: 780
## Number of Groups: 8
coping as a function of sumdamage (random effect) and preparedness (fixed) w/ a random intercept for each MSA
coping.m5.lme = lme(coping ~ sumdamage + preparedness,
random = ~ sumdamage|fmsa,
data = uri)
summary(coping.m5.lme)
## Linear mixed-effects model fit by REML
## Data: uri
## AIC BIC logLik
## 3586.272 3623.506 -1785.136
##
## Random effects:
## Formula: ~sumdamage | fmsa
## Structure: General positive-definite, Log-Cholesky parametrization
## StdDev Corr
## (Intercept) 0.8643602 (Intr)
## sumdamage 0.1182895 -0.803
## Residual 2.3530288
##
## Fixed effects: coping ~ sumdamage + preparedness
## Value Std.Error DF t-value p-value
## (Intercept) 3.1135667 0.3633175 769 8.569823 0
## sumdamage 0.5949991 0.0691214 769 8.608029 0
## preparednesssomewhat prepared 1.2675615 0.2096221 769 6.046889 0
## preparednessnot prepared at all 2.2613497 0.2395286 769 9.440835 0
## Correlation:
## (Intr) sumdmg prprdp
## sumdamage -0.568
## preparednesssomewhat prepared -0.316 -0.133
## preparednessnot prepared at all -0.255 -0.220 0.587
##
## Standardized Within-Group Residuals:
## Min Q1 Med Q3 Max
## -2.52746794 -0.79388104 0.02742585 0.72041444 2.60633106
##
## Number of Observations: 780
## Number of Groups: 8
use ANOVA to compare the models with sumdamage as a fixed effect, and sumdamage as a random effect
anova(coping.m3.lme, coping.m5.lme)
## Model df AIC BIC logLik Test L.Ratio p-value
## coping.m3.lme 1 6 3583.804 3611.729 -1785.902
## coping.m5.lme 2 8 3586.272 3623.506 -1785.136 1 vs 2 1.531371 0.465