setwd ("C:/doug/classes/geog6000/labs")
statedata <- read.csv("statedata.csv", row.names=1, na.strings="")
mycol <- heat.colors(5)
summary(statedata)
## Population Income Illiteracy Life.Exp
## Min. : 365 Min. :3098 Min. :0.500 Min. :67.96
## 1st Qu.: 1080 1st Qu.:3993 1st Qu.:0.625 1st Qu.:70.12
## Median : 2838 Median :4519 Median :0.950 Median :70.67
## Mean : 4246 Mean :4436 Mean :1.170 Mean :70.88
## 3rd Qu.: 4968 3rd Qu.:4814 3rd Qu.:1.575 3rd Qu.:71.89
## Max. :21198 Max. :6315 Max. :2.800 Max. :73.60
## Murder HS.Grad Frost Area
## Min. : 1.400 Min. :37.80 Min. : 0.00 Min. : 1049
## 1st Qu.: 4.350 1st Qu.:48.05 1st Qu.: 66.25 1st Qu.: 36985
## Median : 6.850 Median :53.25 Median :114.50 Median : 54277
## Mean : 7.378 Mean :53.11 Mean :104.46 Mean : 70736
## 3rd Qu.:10.675 3rd Qu.:59.15 3rd Qu.:139.75 3rd Qu.: 81163
## Max. :15.100 Max. :67.30 Max. :188.00 Max. :566432
statedata2 <- statedata[,-1]
nullmodel <- lm(Life.Exp ~ 1, data = statedata2)
summary(nullmodel)
##
## Call:
## lm(formula = Life.Exp ~ 1, data = statedata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9186 -0.7611 -0.2036 1.0139 2.7214
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.8786 0.1898 373.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.342 on 49 degrees of freedom
fullmodel <- lm(Life.Exp ~ ., data = statedata2)
summary(fullmodel)
##
## Call:
## lm(formula = Life.Exp ~ ., data = statedata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39934 -0.53722 0.08628 0.53270 1.28452
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.112e+01 1.788e+00 39.771 < 2e-16 ***
## Income 1.219e-04 2.363e-04 0.516 0.6088
## Illiteracy -1.399e-01 3.617e-01 -0.387 0.7008
## Murder -2.742e-01 4.517e-02 -6.070 2.89e-07 ***
## HS.Grad 4.155e-02 2.352e-02 1.767 0.0843 .
## Frost -7.495e-03 3.056e-03 -2.452 0.0183 *
## Area -2.625e-07 1.706e-06 -0.154 0.8784
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7632 on 43 degrees of freedom
## Multiple R-squared: 0.7164, Adjusted R-squared: 0.6768
## F-statistic: 18.1 on 6 and 43 DF, p-value: 2.41e-10
stepmodelfit <- step(nullmodel, scope=formula(fullmodel))
## Start: AIC=30.44
## Life.Exp ~ 1
##
## Df Sum of Sq RSS AIC
## + Murder 1 53.838 34.461 -14.609
## + Illiteracy 1 30.578 57.721 11.179
## + HS.Grad 1 29.931 58.368 11.737
## + Income 1 10.223 78.076 26.283
## + Frost 1 6.064 82.235 28.878
## <none> 88.299 30.435
## + Area 1 1.017 87.282 31.856
##
## Step: AIC=-14.61
## Life.Exp ~ Murder
##
## Df Sum of Sq RSS AIC
## + HS.Grad 1 4.691 29.770 -19.925
## + Frost 1 3.135 31.327 -17.378
## + Income 1 2.405 32.057 -16.226
## <none> 34.461 -14.609
## + Area 1 0.470 33.992 -13.295
## + Illiteracy 1 0.273 34.188 -13.007
## - Murder 1 53.838 88.299 30.435
##
## Step: AIC=-19.93
## Life.Exp ~ Murder + HS.Grad
##
## Df Sum of Sq RSS AIC
## + Frost 1 4.3987 25.372 -25.920
## <none> 29.770 -19.925
## + Illiteracy 1 0.4419 29.328 -18.673
## + Area 1 0.2775 29.493 -18.394
## + Income 1 0.1022 29.668 -18.097
## - HS.Grad 1 4.6910 34.461 -14.609
## - Murder 1 28.5974 58.368 11.737
##
## Step: AIC=-25.92
## Life.Exp ~ Murder + HS.Grad + Frost
##
## Df Sum of Sq RSS AIC
## <none> 25.372 -25.920
## + Income 1 0.182 25.189 -24.280
## + Illiteracy 1 0.172 25.200 -24.259
## + Area 1 0.026 25.346 -23.970
## - Frost 1 4.399 29.770 -19.925
## - HS.Grad 1 5.955 31.327 -17.378
## - Murder 1 32.756 58.128 13.531
finalstepfit <- lm(Life.Exp ~ Frost + HS.Grad + Murder, data = statedata2)
summary(finalstepfit)
##
## Call:
## lm(formula = Life.Exp ~ Frost + HS.Grad + Murder, data = statedata2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5015 -0.5391 0.1014 0.5921 1.2268
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.036379 0.983262 72.246 < 2e-16 ***
## Frost -0.006912 0.002447 -2.824 0.00699 **
## HS.Grad 0.049949 0.015201 3.286 0.00195 **
## Murder -0.283065 0.036731 -7.706 8.04e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7427 on 46 degrees of freedom
## Multiple R-squared: 0.7127, Adjusted R-squared: 0.6939
## F-statistic: 38.03 on 3 and 46 DF, p-value: 1.634e-12
finalstepfit <- lm(Life.Exp ~ Frost + HS.Grad + Murder, data = statedata2)
print (statedata2, which = "UT" )
## Income Illiteracy Life.Exp Murder HS.Grad Frost Area
## AL 3624 2.1 69.05 15.1 41.3 20 50708
## AK 6315 1.5 69.31 11.3 66.7 152 566432
## AZ 4530 1.8 70.55 7.8 58.1 15 113417
## AR 3378 1.9 70.66 10.1 39.9 65 51945
## CA 5114 1.1 71.71 10.3 62.6 20 156361
## CO 4884 0.7 72.06 6.8 63.9 166 103766
## CT 5348 1.1 72.48 3.1 56.0 139 4862
## DE 4809 0.9 70.06 6.2 54.6 103 1982
## FL 4815 1.3 70.66 10.7 52.6 11 54090
## GA 4091 2.0 68.54 13.9 40.6 60 58073
## HI 4963 1.9 73.60 6.2 61.9 0 6425
## ID 4119 0.6 71.87 5.3 59.5 126 82677
## IL 5107 0.9 70.14 10.3 52.6 127 55748
## IN 4458 0.7 70.88 7.1 52.9 122 36097
## IA 4628 0.5 72.56 2.3 59.0 140 55941
## KS 4669 0.6 72.58 4.5 59.9 114 81787
## KY 3712 1.6 70.10 10.6 38.5 95 39650
## LA 3545 2.8 68.76 13.2 42.2 12 44930
## ME 3694 0.7 70.39 2.7 54.7 161 30920
## MD 5299 0.9 70.22 8.5 52.3 101 9891
## MA 4755 1.1 71.83 3.3 58.5 103 7826
## MI 4751 0.9 70.63 11.1 52.8 125 56817
## MN 4675 0.6 72.96 2.3 57.6 160 79289
## MS 3098 2.4 68.09 12.5 41.0 50 47296
## MO 4254 0.8 70.69 9.3 48.8 108 68995
## MT 4347 0.6 70.56 5.0 59.2 155 145587
## NE 4508 0.6 72.60 2.9 59.3 139 76483
## NV 5149 0.5 69.03 11.5 65.2 188 109889
## NH 4281 0.7 71.23 3.3 57.6 174 9027
## NJ 5237 1.1 70.93 5.2 52.5 115 7521
## NM 3601 2.2 70.32 9.7 55.2 120 121412
## NY 4903 1.4 70.55 10.9 52.7 82 47831
## NC 3875 1.8 69.21 11.1 38.5 80 48798
## ND 5087 0.8 72.78 1.4 50.3 186 69273
## OH 4561 0.8 70.82 7.4 53.2 124 40975
## OK 3983 1.1 71.42 6.4 51.6 82 68782
## OR 4660 0.6 72.13 4.2 60.0 44 96184
## PA 4449 1.0 70.43 6.1 50.2 126 44966
## RI 4558 1.3 71.90 2.4 46.4 127 1049
## SC 3635 2.3 67.96 11.6 37.8 65 30225
## SD 4167 0.5 72.08 1.7 53.3 172 75955
## TN 3821 1.7 70.11 11.0 41.8 70 41328
## TX 4188 2.2 70.90 12.2 47.4 35 262134
## UT 4022 0.6 72.90 4.5 67.3 137 82096
## VT 3907 0.6 71.64 5.5 57.1 168 9267
## VA 4701 1.4 70.08 9.5 47.8 85 39780
## WA 4864 0.6 71.72 4.3 63.5 32 66570
## WV 3617 1.4 69.48 6.7 41.6 100 24070
## WI 4468 0.7 72.48 3.0 54.5 149 54464
## WY 4566 0.6 70.29 6.9 62.9 173 97203
utah = 71.036379 + 4.5 * -0.283065 + 67.3 * 0.049949 + 137 * -0.006912
print (utah)
## [1] 72.17721
# 72.17721
predict(finalstepfit, level = 0.95, interval = "conf", statedata2[44,])
## fit lwr upr
## UT 72.17723 71.74608 72.60837
plot(finalstepfit)
# more stuff out of curiousity
plot (Life.Exp ~ log(Population), data = statedata, main = "life exp x log population, red = full model derived abline, blue = nullmodel")
abline(fullmodel, col = "red")
## Warning in abline(fullmodel, col = "red"): only using the first two of 7
## regression coefficients
abline(nullmodel, col = "blue", h=5, lty = 3)
summary(statedata)
## Population Income Illiteracy Life.Exp
## Min. : 365 Min. :3098 Min. :0.500 Min. :67.96
## 1st Qu.: 1080 1st Qu.:3993 1st Qu.:0.625 1st Qu.:70.12
## Median : 2838 Median :4519 Median :0.950 Median :70.67
## Mean : 4246 Mean :4436 Mean :1.170 Mean :70.88
## 3rd Qu.: 4968 3rd Qu.:4814 3rd Qu.:1.575 3rd Qu.:71.89
## Max. :21198 Max. :6315 Max. :2.800 Max. :73.60
## Murder HS.Grad Frost Area
## Min. : 1.400 Min. :37.80 Min. : 0.00 Min. : 1049
## 1st Qu.: 4.350 1st Qu.:48.05 1st Qu.: 66.25 1st Qu.: 36985
## Median : 6.850 Median :53.25 Median :114.50 Median : 54277
## Mean : 7.378 Mean :53.11 Mean :104.46 Mean : 70736
## 3rd Qu.:10.675 3rd Qu.:59.15 3rd Qu.:139.75 3rd Qu.: 81163
## Max. :15.100 Max. :67.30 Max. :188.00 Max. :566432
brewers_desired_nullmodel <- lm(Life.Exp ~1, data = statedata)
brewers_desired_fullmodel <- lm(Life.Exp ~., data = statedata)
brewers_stepmodel <- step(brewers_desired_nullmodel, scope = formula(brewers_desired_fullmodel))
## Start: AIC=30.44
## Life.Exp ~ 1
##
## Df Sum of Sq RSS AIC
## + Murder 1 53.838 34.461 -14.609
## + Illiteracy 1 30.578 57.721 11.179
## + HS.Grad 1 29.931 58.368 11.737
## + Income 1 10.223 78.076 26.283
## + Frost 1 6.064 82.235 28.878
## <none> 88.299 30.435
## + Area 1 1.017 87.282 31.856
## + Population 1 0.409 87.890 32.203
##
## Step: AIC=-14.61
## Life.Exp ~ Murder
##
## Df Sum of Sq RSS AIC
## + HS.Grad 1 4.691 29.770 -19.925
## + Population 1 4.016 30.445 -18.805
## + Frost 1 3.135 31.327 -17.378
## + Income 1 2.405 32.057 -16.226
## <none> 34.461 -14.609
## + Area 1 0.470 33.992 -13.295
## + Illiteracy 1 0.273 34.188 -13.007
## - Murder 1 53.838 88.299 30.435
##
## Step: AIC=-19.93
## Life.Exp ~ Murder + HS.Grad
##
## Df Sum of Sq RSS AIC
## + Frost 1 4.3987 25.372 -25.920
## + Population 1 3.3405 26.430 -23.877
## <none> 29.770 -19.925
## + Illiteracy 1 0.4419 29.328 -18.673
## + Area 1 0.2775 29.493 -18.394
## + Income 1 0.1022 29.668 -18.097
## - HS.Grad 1 4.6910 34.461 -14.609
## - Murder 1 28.5974 58.368 11.737
##
## Step: AIC=-25.92
## Life.Exp ~ Murder + HS.Grad + Frost
##
## Df Sum of Sq RSS AIC
## + Population 1 2.064 23.308 -28.161
## <none> 25.372 -25.920
## + Income 1 0.182 25.189 -24.280
## + Illiteracy 1 0.172 25.200 -24.259
## + Area 1 0.026 25.346 -23.970
## - Frost 1 4.399 29.770 -19.925
## - HS.Grad 1 5.955 31.327 -17.378
## - Murder 1 32.756 58.128 13.531
##
## Step: AIC=-28.16
## Life.Exp ~ Murder + HS.Grad + Frost + Population
##
## Df Sum of Sq RSS AIC
## <none> 23.308 -28.161
## + Income 1 0.006 23.302 -26.174
## + Illiteracy 1 0.004 23.304 -26.170
## + Area 1 0.001 23.307 -26.163
## - Population 1 2.064 25.372 -25.920
## - Frost 1 3.122 26.430 -23.877
## - HS.Grad 1 5.112 28.420 -20.246
## - Murder 1 34.816 58.124 15.528
summary(brewers_stepmodel)
##
## Call:
## lm(formula = Life.Exp ~ Murder + HS.Grad + Frost + Population,
## data = statedata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.47095 -0.53464 -0.03701 0.57621 1.50683
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.103e+01 9.529e-01 74.542 < 2e-16 ***
## Murder -3.001e-01 3.661e-02 -8.199 1.77e-10 ***
## HS.Grad 4.658e-02 1.483e-02 3.142 0.00297 **
## Frost -5.943e-03 2.421e-03 -2.455 0.01802 *
## Population 5.014e-05 2.512e-05 1.996 0.05201 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7197 on 45 degrees of freedom
## Multiple R-squared: 0.736, Adjusted R-squared: 0.7126
## F-statistic: 31.37 on 4 and 45 DF, p-value: 1.696e-12
predict(brewers_stepmodel, level = 0.95, interval = "conf", statedata[44,])
## fit lwr upr
## UT 72.05753 71.62238 72.49269
plot(brewers_stepmodel, main = "Brewers Desired Step Model")
newstate <- statedata[44,]
summary(newstate)
## Population Income Illiteracy Life.Exp Murder
## Min. :1203 Min. :4022 Min. :0.6 Min. :72.9 Min. :4.5
## 1st Qu.:1203 1st Qu.:4022 1st Qu.:0.6 1st Qu.:72.9 1st Qu.:4.5
## Median :1203 Median :4022 Median :0.6 Median :72.9 Median :4.5
## Mean :1203 Mean :4022 Mean :0.6 Mean :72.9 Mean :4.5
## 3rd Qu.:1203 3rd Qu.:4022 3rd Qu.:0.6 3rd Qu.:72.9 3rd Qu.:4.5
## Max. :1203 Max. :4022 Max. :0.6 Max. :72.9 Max. :4.5
## HS.Grad Frost Area
## Min. :67.3 Min. :137 Min. :82096
## 1st Qu.:67.3 1st Qu.:137 1st Qu.:82096
## Median :67.3 Median :137 Median :82096
## Mean :67.3 Mean :137 Mean :82096
## 3rd Qu.:67.3 3rd Qu.:137 3rd Qu.:82096
## Max. :67.3 Max. :137 Max. :82096
newstate[1] <- 2785
newstate[6] <- 75
newstate[5] <- 1.3
summary(newstate)
## Population Income Illiteracy Life.Exp Murder
## Min. :2785 Min. :4022 Min. :0.6 Min. :72.9 Min. :1.3
## 1st Qu.:2785 1st Qu.:4022 1st Qu.:0.6 1st Qu.:72.9 1st Qu.:1.3
## Median :2785 Median :4022 Median :0.6 Median :72.9 Median :1.3
## Mean :2785 Mean :4022 Mean :0.6 Mean :72.9 Mean :1.3
## 3rd Qu.:2785 3rd Qu.:4022 3rd Qu.:0.6 3rd Qu.:72.9 3rd Qu.:1.3
## Max. :2785 Max. :4022 Max. :0.6 Max. :72.9 Max. :1.3
## HS.Grad Frost Area
## Min. :75 Min. :137 Min. :82096
## 1st Qu.:75 1st Qu.:137 1st Qu.:82096
## Median :75 Median :137 Median :82096
## Mean :75 Mean :137 Mean :82096
## 3rd Qu.:75 3rd Qu.:137 3rd Qu.:82096
## Max. :75 Max. :137 Max. :82096
UTpredict <- predict(brewers_stepmodel, level = 0.95, interval = "conf", newstate[1,])
summary(UTpredict)
## fit lwr upr
## Min. :73.46 Min. :72.84 Min. :74.07
## 1st Qu.:73.46 1st Qu.:72.84 1st Qu.:74.07
## Median :73.46 Median :72.84 Median :74.07
## Mean :73.46 Mean :72.84 Mean :74.07
## 3rd Qu.:73.46 3rd Qu.:72.84 3rd Qu.:74.07
## Max. :73.46 Max. :72.84 Max. :74.07
print(statedata, which = “CA”) #I counted down to find out which was CA, I’ll learn eventually how to do a which statement
statedata5 <- statedata[5,]
statedata5[1] <- 36962
statedata5[6] <- 68.3
statedata5[5] <- 5.3
CApredict <- predict(brewers_stepmodel, level = 0.95, interval = "conf", statedata5[1,])
summary(CApredict)
## fit lwr upr
## Min. :74.35 Min. :72.65 Min. :76.06
## 1st Qu.:74.35 1st Qu.:72.65 1st Qu.:76.06
## Median :74.35 Median :72.65 Median :76.06
## Mean :74.35 Mean :72.65 Mean :76.06
## 3rd Qu.:74.35 3rd Qu.:72.65 3rd Qu.:76.06
## Max. :74.35 Max. :72.65 Max. :76.06
setwd ("C:/doug/classes/geog6000")
temp<- read.csv("normtemp.csv", row.names=1, na.strings="")
temp$sex <- factor(temp$sex, labels=c("male","female"))
summary(temp)
## temp sex weight
## Min. : 96.30 male :65 Min. :57.00
## 1st Qu.: 97.80 female:65 1st Qu.:69.00
## Median : 98.30 Median :74.00
## Mean : 98.25 Mean :73.76
## 3rd Qu.: 98.70 3rd Qu.:79.00
## Max. :100.80 Max. :89.00
temp$wtc <- temp$weight - (mean(temp$weight))
summary(temp$wtc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -16.7615 -4.7615 0.2385 0.0000 5.2385 15.2385
cor(temp$wtc, temp$temp)
## [1] 0.2536564
tempmodel <- lm(temp$temp ~ temp$wtc + temp$sex)
summary(tempmodel)
##
## Call:
## lm(formula = temp$temp ~ temp$wtc + temp$sex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.86363 -0.45624 0.01841 0.47366 2.33424
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 98.114528 0.087102 1126.428 < 2e-16 ***
## temp$wtc 0.025267 0.008762 2.884 0.00462 **
## temp$sexfemale 0.269406 0.123277 2.185 0.03070 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7017 on 127 degrees of freedom
## Multiple R-squared: 0.09825, Adjusted R-squared: 0.08405
## F-statistic: 6.919 on 2 and 127 DF, p-value: 0.001406
attach(temp)
## The following object is masked _by_ .GlobalEnv:
##
## temp
subsetmodel <- lm(temp ~ wtc, data = temp)
summary (subsetmodel)
##
## Call:
## lm(formula = temp ~ wtc, data = temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.85017 -0.39999 0.01033 0.43915 2.46549
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 98.249231 0.062444 1573.402 < 2e-16 ***
## wtc 0.026335 0.008876 2.967 0.00359 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.712 on 128 degrees of freedom
## Multiple R-squared: 0.06434, Adjusted R-squared: 0.05703
## F-statistic: 8.802 on 1 and 128 DF, p-value: 0.003591
anova(subsetmodel)
## Analysis of Variance Table
##
## Response: temp
## Df Sum Sq Mean Sq F value Pr(>F)
## wtc 1 4.462 4.4618 8.8021 0.003591 **
## Residuals 128 64.883 0.5069
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1