EricGoodwin_GEOG6000

library(nlme)
library(ggplot2)

Read in the data

urisurvey = read.csv("../datafiles/urisurvey.csv", na.strings = c("", "NA"))
uri = urisurvey[complete.cases(urisurvey), ]
summary(uri)

##       lat               long           phone             countyfips   
##  Min.   :-106.62   Min.   :26.10   Min.   :2.023e+09   Min.   :48013  
##  1st Qu.: -98.38   1st Qu.:29.60   1st Qu.:2.556e+09   1st Qu.:48113  
##  Median : -97.24   Median :30.11   Median :5.129e+09   Median :48201  
##  Mean   : -97.64   Mean   :30.67   Mean   :5.692e+09   Mean   :48206  
##  3rd Qu.: -95.66   3rd Qu.:32.64   3rd Qu.:8.324e+09   3rd Qu.:48303  
##  Max.   : -93.75   Max.   :33.64   Max.   :9.800e+09   Max.   :48497  
##       zcta           msa            serveyversion       hud           
##  Min.   :75006   Length:780         Min.   :1.000   Length:780        
##  1st Qu.:76232   Class :character   1st Qu.:2.000   Class :character  
##  Median :77545   Mode  :character   Median :2.000   Mode  :character  
##  Mean   :77439                      Mean   :2.379                     
##  3rd Qu.:78259                      3rd Qu.:3.000                     
##  Max.   :79938                      Max.   :3.100                     
##   housetype             coping       preparedness        pipesburst       
##  Length:780         Min.   : 1.000   Length:780         Length:780        
##  Class :character   1st Qu.: 3.000   Class :character   Class :character  
##  Mode  :character   Median : 6.000   Mode  :character   Mode  :character  
##                     Mean   : 5.563                                        
##                     3rd Qu.: 8.000                                        
##                     Max.   :10.000                                        
##  pipeflooding       watermainbreak     heatingfailure     foundationdamage  
##  Length:780         Length:780         Length:780         Length:780        
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   winddamage         roofdamage        otherdamage        hoursnoelectric 
##  Length:780         Length:780         Length:780         Min.   :  0.00  
##  Class :character   Class :character   Class :character   1st Qu.:  0.00  
##  Mode  :character   Mode  :character   Mode  :character   Median : 24.00  
##                                                           Mean   : 41.27  
##                                                           3rd Qu.: 72.00  
##                                                           Max.   :504.00  
##  hoursnopipedwater daysmaxunder32    sumdamage    
##  Min.   :  0.00    Min.   :0.000   Min.   :0.000  
##  1st Qu.:  0.00    1st Qu.:1.000   1st Qu.:0.000  
##  Median :  0.00    Median :3.000   Median :1.000  
##  Mean   : 32.34    Mean   :3.694   Mean   :1.806  
##  3rd Qu.: 48.00    3rd Qu.:7.000   3rd Qu.:3.000  
##  Max.   :672.00    Max.   :9.000   Max.   :8.000

Set categorical data as factors

uri$fmsa = factor(uri$msa)
uri$hud = factor(uri$hud, level = c("No", "Yes"))
uri$preparedness = factor(uri$preparedness, level = c("well prepared", "somewhat prepared", "not prepared at all"))

Exploratory analysis

hist(uri$coping)

plot(coping ~ preparedness, data = uri, ylab = "Coping", xlab = "Disaster preparedness")

plot(coping ~ fmsa, data = uri, ylab = "Coping", xlab = "Metropolitan Statistical Area")

qplot(preparedness,
      coping, 
      data = uri,
      geom = 'boxplot',
      fill = preparedness,
      xlab = "Preparedness",
      ylab = "Coping ability")

msarespondents = ggplot(uri, aes(fmsa)) + labs(x = "Metropolitan Statistical Area", y ="Respondents")
msarespondents + geom_bar(aes(fill=hud), )

msa = c("Austin", "Beaumont", "Dallas", "El Paso", "Houston", "Lubbock", "McAllen", "San Antonio")
meanmsa = c(5.6, 0.95, 7.77, 0.24, 0.56, 7.33, 0.00, 3.28)
maxtempavgbymsa = data.frame(msa, meanmsa)
barplot(meanmsa~msa, data = maxtempavgbymsa, ylim = c(0,10), xlab = "Metropolitan Statistical Area", ylab = "Mean # days where max temp was below 32F", col = "darkblue")

uri$fcoping = factor(uri$coping)
plot(uri$fcoping, xlab = "Coping", ylab = "Number of Occurrences", ylim = c(0,130))

damage = c(0,1,2,3,4,5,6,7,8)
meancoping = c(3.90783410138249,5.39175257731959,5.66891891891892,6.4367816091954,7.234375,8.23684210526316,8.26666666666667,8.64285714285714,8.33333333333333)
meancopingbydamagetable = data.frame(damage, meancoping)
ggplot(meancopingbydamagetable, aes(x=damage, y=meancoping)) +
  geom_bar(stat="identity") +
  labs(x = "# of occurrences of home damage", y = "Mean coping ability")

Statistical Modeling

coping as a function of sumdamage

copingdamage.lm1 = lm(coping~sumdamage, data = uri)
summary(copingdamage.lm1)

## 
## Call:
## lm(formula = coping ~ sumdamage, data = uri)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.1737 -2.2363  0.0294  2.0294  5.7637 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.23631    0.13085   32.38   <2e-16 ***
## sumdamage    0.73434    0.05188   14.15   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.55 on 778 degrees of freedom
## Multiple R-squared:  0.2048, Adjusted R-squared:  0.2037 
## F-statistic: 200.3 on 1 and 778 DF,  p-value: < 2.2e-16

AIC(logLik(copingdamage.lm1))

## [1] 3677.89

coping as a function of maxtempbelow32

copingtemp.lm3 = lm(coping ~ daysmaxunder32, data = uri)
summary(copingtemp.lm3)

## 
## Call:
## lm(formula = coping ~ daysmaxunder32, data = uri)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.609 -2.546  0.391  2.404  4.503 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     5.60898    0.15886   35.31   <2e-16 ***
## daysmaxunder32 -0.01250    0.03289   -0.38    0.704    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.859 on 778 degrees of freedom
## Multiple R-squared:  0.0001856,  Adjusted R-squared:  -0.001099 
## F-statistic: 0.1444 on 1 and 778 DF,  p-value: 0.704

coping as a function of sumdamage and preparedness

copingdamage.lm2 = lm(coping ~ sumdamage + preparedness, data=uri)
summary(copingdamage.lm2)

## 
## Call:
## lm(formula = coping ~ sumdamage + preparedness, data = uri)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.9040 -1.8977 -0.1195  1.8523  6.6992 
## 
## Coefficients:
##                                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                      3.30083    0.17725  18.623  < 2e-16 ***
## sumdamage                        0.59481    0.05163  11.521  < 2e-16 ***
## preparednesssomewhat prepared    1.22389    0.21531   5.684 1.86e-08 ***
## preparednessnot prepared at all  2.25207    0.24540   9.177  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.425 on 776 degrees of freedom
## Multiple R-squared:  0.2827, Adjusted R-squared:   0.28 
## F-statistic:   102 on 3 and 776 DF,  p-value: < 2.2e-16

AIC(logLik(copingdamage.lm2))

## [1] 3601.393

coping w/o slope coef. to compare between fixed intercept and random intercept (varying by MSA)

coping.m0.lm = glm(coping ~ 1, data=uri)
coping.m1.lme = lme(coping ~ 1, random = ~1|fmsa, data=uri)
AIC(logLik(coping.m0.lm))

## [1] 3854.602

AIC(logLik(coping.m1.lme))

## [1] 3831.901

coping as a function of sumdamage (fixed effect) w/ random intercept for each MSA

coping.m2.lme = lme(coping ~ sumdamage,
                    random = ~1|fmsa,
                    data = uri)
summary(coping.m2.lme)

## Linear mixed-effects model fit by REML
##   Data: uri 
##        AIC      BIC    logLik
##   3662.498 3681.125 -1827.249
## 
## Random effects:
##  Formula: ~1 | fmsa
##         (Intercept) Residual
## StdDev:   0.7334925 2.489064
## 
## Fixed effects:  coping ~ sumdamage 
##                Value  Std.Error  DF  t-value p-value
## (Intercept) 4.085400 0.30166261 771 13.54294       0
## sumdamage   0.713426 0.05087294 771 14.02369       0
##  Correlation: 
##           (Intr)
## sumdamage -0.291
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.36692334 -0.81625119 -0.04055097  0.78836132  2.39539133 
## 
## Number of Observations: 780
## Number of Groups: 8

coping as a function of sumdamage (fixed effect) and preparednes w/ random intercept for each MSA

coping.m3.lme = lme(coping ~ sumdamage + preparedness,
                    random = ~1|fmsa,
                    data = uri)
summary(coping.m3.lme)

## Linear mixed-effects model fit by REML
##   Data: uri 
##        AIC      BIC    logLik
##   3583.804 3611.729 -1785.902
## 
## Random effects:
##  Formula: ~1 | fmsa
##         (Intercept) Residual
## StdDev:    0.744501 2.357949
## 
## Fixed effects:  coping ~ sumdamage + preparedness 
##                                     Value Std.Error  DF   t-value p-value
## (Intercept)                     3.1526577 0.3244228 769  9.717743       0
## sumdamage                       0.5728216 0.0504191 769 11.361204       0
## preparednesssomewhat prepared   1.2724718 0.2099175 769  6.061771       0
## preparednessnot prepared at all 2.2737884 0.2400587 769  9.471803       0
##  Correlation: 
##                                 (Intr) sumdmg prprdp
## sumdamage                       -0.160              
## preparednesssomewhat prepared   -0.358 -0.174       
## preparednessnot prepared at all -0.288 -0.293  0.587
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.50590703 -0.81146609 -0.02406378  0.72003385  2.62838787 
## 
## Number of Observations: 780
## Number of Groups: 8

coping as a function of sumdamage (random effect) and preparedness (fixed) w/ a random intercept for each MSA

coping.m5.lme = lme(coping ~ sumdamage + preparedness, 
               random = ~ sumdamage|fmsa,
               data = uri)

summary(coping.m5.lme)

## Linear mixed-effects model fit by REML
##   Data: uri 
##        AIC      BIC    logLik
##   3586.272 3623.506 -1785.136
## 
## Random effects:
##  Formula: ~sumdamage | fmsa
##  Structure: General positive-definite, Log-Cholesky parametrization
##             StdDev    Corr  
## (Intercept) 0.8643602 (Intr)
## sumdamage   0.1182895 -0.803
## Residual    2.3530288       
## 
## Fixed effects:  coping ~ sumdamage + preparedness 
##                                     Value Std.Error  DF  t-value p-value
## (Intercept)                     3.1135667 0.3633175 769 8.569823       0
## sumdamage                       0.5949991 0.0691214 769 8.608029       0
## preparednesssomewhat prepared   1.2675615 0.2096221 769 6.046889       0
## preparednessnot prepared at all 2.2613497 0.2395286 769 9.440835       0
##  Correlation: 
##                                 (Intr) sumdmg prprdp
## sumdamage                       -0.568              
## preparednesssomewhat prepared   -0.316 -0.133       
## preparednessnot prepared at all -0.255 -0.220  0.587
## 
## Standardized Within-Group Residuals:
##         Min          Q1         Med          Q3         Max 
## -2.52746794 -0.79388104  0.02742585  0.72041444  2.60633106 
## 
## Number of Observations: 780
## Number of Groups: 8

use ANOVA to compare the models with sumdamage as a fixed effect, and sumdamage as a random effect

anova(coping.m3.lme, coping.m5.lme)

##               Model df      AIC      BIC    logLik   Test  L.Ratio p-value
## coping.m3.lme     1  6 3583.804 3611.729 -1785.902                        
## coping.m5.lme     2  8 3586.272 3623.506 -1785.136 1 vs 2 1.531371   0.465

EricGoodwin_GEOG6000_Final