#knitr::opts_chunk$set(echo=TRUE, warning = FALSE, message = FALSE, results = 'hide', include=TRUE, fig.keep = 'all')
## SET UP ##
#Setup for our analysis, including necessary packages.
rm(list = ls())
setwd("~/Documents/UPenn/Penn 2022 Spring/Planning by Numbers/Assignment 3")
library(tidyverse)
library(tidycensus)
library(ggplot2)
library(dplyr)
library(data.table)
require("ggrepel")
library(hrbrthemes)
library(viridis)
library(ggridges)
library(lubridate)
library(tigris)
library(gganimate)
library(riem)
library(gridExtra)
library(knitr)
library(kableExtra)
library(mapview)
library(ggcorrplot)
library(RColorBrewer)
library(stargazer)
library(correlationfunnel)
library(tidyquant)
library(plotly)
library(shiny)
library(bslib)
library(shinydashboard)
library(DT)
library(car)
library(AICcmodavg)
library(corrplot)
library(ggthemes)
library(mlogit)
require(mlogit)
Part A: Change in Land Use for Chester County, PA between 1992-2001
Utilizing 500-meter raster cell locations matched to corresponding land cover & use, transportation, and demographic data for 1992 and 2001, we evaluated factors that impacted changes in land usage in Chester County, PA. To study this data and identify possible determinants of land use change, in particular conversion of non-urban land to urban uses, we evaluated each potential determinant’s relationship or correlation with land use.
Taking the potential determinants (variables) that displayed a relationship to the land use changes data, we developed and tested a series of models, eventually determining that the percentage of white population, proximity within 100 meters of a SEPTA rail station, distance from regional rail stations, and distance from parks were the best predictors of the likelihood an area would be converted to urban uses or remain unconverted.
We determined that for every unit increase in our identified determinants, there would be the following impact on the probability that land would be converted to urban uses.
Being located within 100 meters of a SEPTA rail station corresponds to 300% increase in the odds of land conversion.
A unit increase in the percentage of white population corresponds with a 4% decrease in the odds of land conversion.
Unit increases in both distance to the closest park and regional rail station correspond with a less than 1% decrease in the odds of land conversion.
From our model, we developed a series of scenarios to predict how changes within each variable impact land conversion probability. Largely confirming the values seen above, our scenario plot displayed trends indicating that as white population increases, the likelihood of conversion to urban land uses decreases. Our scenarios also indicated that the presence of a rail station, being within 100 meters of a rail station has a very high (over 50%) predicted probability of urban land use conversion when distance to parks and other regional rail stations were held at the middle (median) value of their respective data sets.
Given proximity to park space and regional rail stations have very low predicted odds ratios for land use change, these variables are likely not the most useful additions to the model, although their additions did increase the overall fit of the model to the data. Given the substantial impact of SEPTA rail infrastructure within the model it is likely that other variables relating to transit infrastructure, such as REGRAIL300 and FOURLNE300, would improve model performance.
Remove percent college graduate as numbers appear to be incorrect.
dat_chester <- read.csv("Chester_Urban_Growth.csv", header = TRUE)
#head(dat_chester)
#summary(dat_chester)
dat_chester1 <- subset(dat_chester, select = -c(PCT_COLGRD)) #values appear to be incorrect
#head(dat_chester1)
summary(dat_chester1)
## CHESCO X Y SLOPE
## Min. :1 Min. :1586501 Min. :152678 Min. : 0.050
## 1st Qu.:1 1st Qu.:1602001 1st Qu.:173178 1st Qu.: 1.344
## Median :1 Median :1610001 Median :186178 Median : 2.048
## Mean :1 Mean :1610704 Mean :185065 Mean : 2.407
## 3rd Qu.:1 3rd Qu.:1618502 3rd Qu.:197178 3rd Qu.: 3.036
## Max. :1 Max. :1638502 Max. :214179 Max. :12.042
## FOURLNE300 INTERST800 REGRAIL300 RAILSTN100
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.4971 Mean :0.3426 Mean :0.1193 Mean :0.01253
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## PARKS500M WATER100 CITBORO_10 DIST_WATER
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. : 0
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 1414
## Median :0.00000 Median :0.00000 Median :0.00000 Median : 3162
## Mean :0.02276 Mean :0.05733 Mean :0.07015 Mean : 3753
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.: 5701
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :13730
## DIST_RAILS DIST_REGRA DIST_PASSR DIST_4LNE_
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 6671 1st Qu.: 6265 1st Qu.: 1000 1st Qu.: 1414
## Median :12500 Median :12176 Median : 2236 Median : 3000
## Mean :13233 Mean :12918 Mean : 2763 Mean : 3416
## 3rd Qu.:18201 3rd Qu.:18000 3rd Qu.: 4123 3rd Qu.: 5315
## Max. :40771 Max. :40771 Max. :11181 Max. :11511
## DIST_INTER DIST_PARKS PAL_WETLND FARM92
## Min. : 0 Min. : 0 Min. :0.0000 Min. :0.000000
## 1st Qu.: 5590 1st Qu.: 2500 1st Qu.:0.0000 1st Qu.:0.000000
## Median :12530 Median : 4472 Median :0.0000 Median :0.000000
## Mean :15640 Mean : 5161 Mean :0.1052 Mean :0.009219
## 3rd Qu.:24076 3rd Qu.: 7106 3rd Qu.:0.0000 3rd Qu.:0.000000
## Max. :49359 Max. :16808 Max. :1.0000 Max. :1.000000
## PASTURE92 FOREST92 URBAN01 URBAN92
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.4994 Mean :0.3518 Mean :0.0677 Mean :0.04883
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## POPDEN90 MEDINC90 MEDHSEVAL_ PCT_WHITE_
## Min. : 0 Min. : 0 Min. : 0 Min. : 34.00
## 1st Qu.: 21 1st Qu.: 37986 1st Qu.:128000 1st Qu.: 94.00
## Median : 61 Median : 45074 Median :150200 Median : 97.00
## Mean : 189 Mean : 48876 Mean :167526 Mean : 94.74
## 3rd Qu.: 154 3rd Qu.: 56394 3rd Qu.:214200 3rd Qu.: 98.00
## Max. :13000 Max. :103043 Max. :384000 Max. :100.00
## PCT_SFHOME PCT_POV_90 PCT_HSB_19
## Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 77.00 1st Qu.: 2.000 1st Qu.: 14.00
## Median : 86.00 Median : 4.000 Median : 19.00
## Mean : 82.67 Mean : 4.424 Mean : 23.11
## 3rd Qu.: 93.00 3rd Qu.: 6.000 3rd Qu.: 31.00
## Max. :100.00 Max. :29.000 Max. :100.00
#dim(dat_chester1)
#[1] 6942 31
Population Density in 1990
Values skewed, create binary variable.
summary(dat_chester1$POPDEN90)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 21 61 189 154 13000
plotTheme <- theme(
axis.title = element_text(
size = 11, lineheight = .9, family = "Comic Sans MS"),
plot.title = element_text(size=16, face = "bold", hjust = 0.5, family = "Comic Sans MS"),
plot.background = element_blank())
dat_chester1$hi_POPDEN90 <- ifelse(dat_chester1$POPDEN90 > mean(dat_chester1$POPDEN90), 1, 0)
qplot(dat_chester1$POPDEN90,
geom="histogram",
binwidth = 157,
main = "Population Density",
xlab = "Average Pop Density",
fill=I("lawngreen"),
col=I("lawngreen"),
alpha=I(.3)) + plotTheme
Median Household Income in 1990
summary(dat_chester1$MEDINC90)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 37986 45074 48876 56394 103043
#number of bins: sqrt(6942)
qplot(dat_chester1$MEDINC90,
geom="histogram",
binwidth = 1241,
main = "Median Household Income",
xlab = "$Moolah$",
fill=I("lawngreen"),
col=I("lawngreen"),
alpha=I(.3)) + plotTheme
Median Home Value in 1990
summary(dat_chester1$MEDHSEVAL_)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 128000 150200 167526 214200 384000
#number of bins: sqrt(6942) = 83
#bin width: (max-min) / #of bins
qplot(dat_chester1$MEDHSEVAL_,
geom="histogram",
binwidth = 4627,
main = "Median Home Value",
xlab = "$Moolah$",
fill=I("lawngreen"),
col=I("lawngreen"),
alpha=I(.3)) + plotTheme
Percentage of White Pop in 1990
Values skewed, create binary variable.
summary(dat_chester1$PCT_WHITE_)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 34.00 94.00 97.00 94.74 98.00 100.00
dat_chester1$hi_PCT_WHITE_ <- ifelse(dat_chester1$PCT_WHITE_ > mean(dat_chester1$PCT_WHITE_), 1, 0)
qplot(dat_chester1$PCT_WHITE_,
geom="histogram",
binwidth = 10,
main = "White Population",
xlab = "%Percent%",
fill=I("lawngreen"),
col=I("lawngreen"),
alpha=I(.3)) + plotTheme
Percentage of Single-Family Homes in 1990
Values skewed, create binary variable.
summary(dat_chester1$PCT_SFHOME)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 77.00 86.00 82.67 93.00 100.00
dat_chester1$hi_PCT_SFHOME <- ifelse(dat_chester1$PCT_SFHOME > mean(dat_chester1$PCT_SFHOME), 1, 0)
qplot(dat_chester1$PCT_SFHOME,
geom="histogram",
binwidth = 10,
main = "Single-Family Homes",
xlab = "%Percent%",
fill=I("lawngreen"),
col=I("lawngreen"),
alpha=I(.3)) + plotTheme
Percentage of Households Below Poverty Line in 1990
summary(dat_chester1$PCT_POV_90)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 4.000 4.424 6.000 29.000
qplot(dat_chester1$PCT_POV_90,
geom="histogram",
binwidth = 10,
main = "Households Below Poverty Line",
xlab = "%Percent%",
fill=I("lawngreen"),
col=I("lawngreen"),
alpha=I(.3)) + plotTheme
Percentage of Homes Built Before 1950 in 1990
summary(dat_chester1$PCT_HSB_19)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 14.00 19.00 23.11 31.00 100.00
qplot(dat_chester1$PCT_HSB_19,
geom="histogram",
binwidth = 10,
main = "Homes Built Before 1950",
xlab = "%Percent%",
fill=I("lawngreen"),
col=I("lawngreen"),
alpha=I(.3)) + plotTheme
Created a new data frame including a conditional binary variable column of raster cells that were either farmland, pasture or forest in 1992 and were converted to urban uses by 2001.
#create binary variable column, assigning all "0"
dat_chester1$CHNG_URB <- 0
#create conditional value "1" for new column based on whether a cell was farm, pasture or forest in 1992
#and whether it changed to urban in 2001
dat_chester1 <- dat_chester1%>%
mutate(CHNG_URB = if_else ((FARM92 == "1" |
PASTURE92 == "1" | FOREST92 == "1") & URBAN01 == "1", 1, CHNG_URB))
summary(dat_chester1)
## CHESCO X Y SLOPE
## Min. :1 Min. :1586501 Min. :152678 Min. : 0.050
## 1st Qu.:1 1st Qu.:1602001 1st Qu.:173178 1st Qu.: 1.344
## Median :1 Median :1610001 Median :186178 Median : 2.048
## Mean :1 Mean :1610704 Mean :185065 Mean : 2.407
## 3rd Qu.:1 3rd Qu.:1618502 3rd Qu.:197178 3rd Qu.: 3.036
## Max. :1 Max. :1638502 Max. :214179 Max. :12.042
## FOURLNE300 INTERST800 REGRAIL300 RAILSTN100
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.4971 Mean :0.3426 Mean :0.1193 Mean :0.01253
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## PARKS500M WATER100 CITBORO_10 DIST_WATER
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. : 0
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.: 1414
## Median :0.00000 Median :0.00000 Median :0.00000 Median : 3162
## Mean :0.02276 Mean :0.05733 Mean :0.07015 Mean : 3753
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.: 5701
## Max. :1.00000 Max. :1.00000 Max. :1.00000 Max. :13730
## DIST_RAILS DIST_REGRA DIST_PASSR DIST_4LNE_
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 6671 1st Qu.: 6265 1st Qu.: 1000 1st Qu.: 1414
## Median :12500 Median :12176 Median : 2236 Median : 3000
## Mean :13233 Mean :12918 Mean : 2763 Mean : 3416
## 3rd Qu.:18201 3rd Qu.:18000 3rd Qu.: 4123 3rd Qu.: 5315
## Max. :40771 Max. :40771 Max. :11181 Max. :11511
## DIST_INTER DIST_PARKS PAL_WETLND FARM92
## Min. : 0 Min. : 0 Min. :0.0000 Min. :0.000000
## 1st Qu.: 5590 1st Qu.: 2500 1st Qu.:0.0000 1st Qu.:0.000000
## Median :12530 Median : 4472 Median :0.0000 Median :0.000000
## Mean :15640 Mean : 5161 Mean :0.1052 Mean :0.009219
## 3rd Qu.:24076 3rd Qu.: 7106 3rd Qu.:0.0000 3rd Qu.:0.000000
## Max. :49359 Max. :16808 Max. :1.0000 Max. :1.000000
## PASTURE92 FOREST92 URBAN01 URBAN92
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.4994 Mean :0.3518 Mean :0.0677 Mean :0.04883
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## POPDEN90 MEDINC90 MEDHSEVAL_ PCT_WHITE_
## Min. : 0 Min. : 0 Min. : 0 Min. : 34.00
## 1st Qu.: 21 1st Qu.: 37986 1st Qu.:128000 1st Qu.: 94.00
## Median : 61 Median : 45074 Median :150200 Median : 97.00
## Mean : 189 Mean : 48876 Mean :167526 Mean : 94.74
## 3rd Qu.: 154 3rd Qu.: 56394 3rd Qu.:214200 3rd Qu.: 98.00
## Max. :13000 Max. :103043 Max. :384000 Max. :100.00
## PCT_SFHOME PCT_POV_90 PCT_HSB_19 hi_POPDEN90
## Min. : 0.00 Min. : 0.000 Min. : 0.00 Min. :0.0000
## 1st Qu.: 77.00 1st Qu.: 2.000 1st Qu.: 14.00 1st Qu.:0.0000
## Median : 86.00 Median : 4.000 Median : 19.00 Median :0.0000
## Mean : 82.67 Mean : 4.424 Mean : 23.11 Mean :0.2025
## 3rd Qu.: 93.00 3rd Qu.: 6.000 3rd Qu.: 31.00 3rd Qu.:0.0000
## Max. :100.00 Max. :29.000 Max. :100.00 Max. :1.0000
## hi_PCT_WHITE_ hi_PCT_SFHOME CHNG_URB
## Min. :0.0000 Min. :0.000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.00000
## Median :1.0000 Median :1.000 Median :0.00000
## Mean :0.7151 Mean :0.574 Mean :0.03673
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.000 Max. :1.00000
#NEXT build binomial logit model of determinants for land use
Create Correlation Matrix of Potential Variables for Study
#identify determinants of agr/pasture/forest to urban use change ('92-'01)
colnamCleaning <- c("CHNG_URB", "FOURLNE300", "PARKS500M", "RAILSTN100", "REGRAIL300", "WATER100", "CITBORO_10",
"DIST_WATER", "DIST_RAILS","DIST_REGRA", "DIST_PASSR", "DIST_4LNE","DIST_INTER", "DIST_PARKS", "PAL_WETLND", "POPDEN90","MEDINC90", "MEDHSEVAL_", "PCT_WHITE_", "hi_PCT_WHITE_", "PCT_POV_90", "PCT_HSB_19")
clean_datchester1 <- dat_chester1[ , which(names(dat_chester1) %in% colnamCleaning)]
cortable <- cor(clean_datchester1)
round(cortable, 2)
## FOURLNE300 REGRAIL300 RAILSTN100 PARKS500M WATER100 CITBORO_10
## FOURLNE300 1.00 0.30 0.11 0.02 -0.08 0.10
## REGRAIL300 0.30 1.00 0.31 -0.04 -0.05 0.06
## RAILSTN100 0.11 0.31 1.00 -0.02 -0.02 0.06
## PARKS500M 0.02 -0.04 -0.02 1.00 0.03 -0.01
## WATER100 -0.08 -0.05 -0.02 0.03 1.00 0.04
## CITBORO_10 0.10 0.06 0.06 -0.01 0.04 1.00
## DIST_WATER 0.18 0.20 0.06 -0.04 -0.32 -0.06
## DIST_RAILS -0.23 -0.50 -0.18 0.02 0.09 -0.08
## DIST_REGRA -0.24 -0.51 -0.17 0.03 0.09 -0.08
## DIST_PASSR -0.35 -0.27 -0.08 0.02 0.00 -0.25
## DIST_INTER -0.17 -0.32 -0.10 -0.05 0.08 -0.03
## DIST_PARKS -0.15 -0.11 -0.05 -0.22 0.00 -0.06
## PAL_WETLND 0.01 0.00 -0.03 0.00 0.02 -0.02
## POPDEN90 0.14 0.17 0.13 -0.04 -0.03 0.27
## MEDINC90 0.23 0.21 0.01 0.03 -0.07 -0.17
## MEDHSEVAL_ 0.18 0.16 0.02 0.04 -0.07 -0.18
## PCT_WHITE_ -0.20 -0.03 -0.03 0.05 -0.02 -0.23
## PCT_POV_90 -0.14 -0.16 -0.04 -0.06 0.05 0.18
## PCT_HSB_19 -0.15 -0.33 -0.02 -0.03 0.00 0.25
## hi_PCT_WHITE_ -0.14 -0.13 -0.11 0.07 0.03 -0.22
## CHNG_URB 0.16 0.21 0.16 -0.01 -0.02 0.10
## DIST_WATER DIST_RAILS DIST_REGRA DIST_PASSR DIST_INTER DIST_PARKS
## FOURLNE300 0.18 -0.23 -0.24 -0.35 -0.17 -0.15
## REGRAIL300 0.20 -0.50 -0.51 -0.27 -0.32 -0.11
## RAILSTN100 0.06 -0.18 -0.17 -0.08 -0.10 -0.05
## PARKS500M -0.04 0.02 0.03 0.02 -0.05 -0.22
## WATER100 -0.32 0.09 0.09 0.00 0.08 0.00
## CITBORO_10 -0.06 -0.08 -0.08 -0.25 -0.03 -0.06
## DIST_WATER 1.00 -0.28 -0.29 0.03 -0.25 0.06
## DIST_RAILS -0.28 1.00 1.00 0.31 0.80 0.17
## DIST_REGRA -0.29 1.00 1.00 0.31 0.81 0.17
## DIST_PASSR 0.03 0.31 0.31 1.00 0.20 0.14
## DIST_INTER -0.25 0.80 0.81 0.20 1.00 0.42
## DIST_PARKS 0.06 0.17 0.17 0.14 0.42 1.00
## PAL_WETLND 0.01 -0.07 -0.07 0.00 -0.08 -0.05
## POPDEN90 0.04 -0.18 -0.18 -0.15 -0.12 -0.10
## MEDINC90 0.24 -0.39 -0.39 0.05 -0.31 -0.23
## MEDHSEVAL_ 0.36 -0.37 -0.37 0.10 -0.35 -0.24
## PCT_WHITE_ 0.08 -0.06 -0.07 0.22 -0.24 -0.14
## PCT_POV_90 -0.03 0.21 0.22 0.05 0.21 0.18
## PCT_HSB_19 0.04 0.28 0.28 0.10 0.19 0.21
## hi_PCT_WHITE_ -0.08 0.03 0.03 0.27 -0.16 -0.22
## CHNG_URB 0.04 -0.15 -0.15 -0.13 -0.09 -0.08
## PAL_WETLND POPDEN90 MEDINC90 MEDHSEVAL_ PCT_WHITE_ PCT_POV_90
## FOURLNE300 0.01 0.14 0.23 0.18 -0.20 -0.14
## REGRAIL300 0.00 0.17 0.21 0.16 -0.03 -0.16
## RAILSTN100 -0.03 0.13 0.01 0.02 -0.03 -0.04
## PARKS500M 0.00 -0.04 0.03 0.04 0.05 -0.06
## WATER100 0.02 -0.03 -0.07 -0.07 -0.02 0.05
## CITBORO_10 -0.02 0.27 -0.17 -0.18 -0.23 0.18
## DIST_WATER 0.01 0.04 0.24 0.36 0.08 -0.03
## DIST_RAILS -0.07 -0.18 -0.39 -0.37 -0.06 0.21
## DIST_REGRA -0.07 -0.18 -0.39 -0.37 -0.07 0.22
## DIST_PASSR 0.00 -0.15 0.05 0.10 0.22 0.05
## DIST_INTER -0.08 -0.12 -0.31 -0.35 -0.24 0.21
## DIST_PARKS -0.05 -0.10 -0.23 -0.24 -0.14 0.18
## PAL_WETLND 1.00 0.00 0.03 0.03 0.01 -0.02
## POPDEN90 0.00 1.00 -0.02 0.00 -0.09 0.04
## MEDINC90 0.03 -0.02 1.00 0.80 0.22 -0.48
## MEDHSEVAL_ 0.03 0.00 0.80 1.00 0.28 -0.35
## PCT_WHITE_ 0.01 -0.09 0.22 0.28 1.00 -0.33
## PCT_POV_90 -0.02 0.04 -0.48 -0.35 -0.33 1.00
## PCT_HSB_19 -0.03 0.06 -0.48 -0.31 -0.14 0.37
## hi_PCT_WHITE_ 0.03 -0.11 0.22 0.18 0.62 -0.27
## CHNG_URB 0.00 0.12 0.04 0.03 -0.06 -0.03
## PCT_HSB_19 hi_PCT_WHITE_ CHNG_URB
## FOURLNE300 -0.15 -0.14 0.16
## REGRAIL300 -0.33 -0.13 0.21
## RAILSTN100 -0.02 -0.11 0.16
## PARKS500M -0.03 0.07 -0.01
## WATER100 0.00 0.03 -0.02
## CITBORO_10 0.25 -0.22 0.10
## DIST_WATER 0.04 -0.08 0.04
## DIST_RAILS 0.28 0.03 -0.15
## DIST_REGRA 0.28 0.03 -0.15
## DIST_PASSR 0.10 0.27 -0.13
## DIST_INTER 0.19 -0.16 -0.09
## DIST_PARKS 0.21 -0.22 -0.08
## PAL_WETLND -0.03 0.03 0.00
## POPDEN90 0.06 -0.11 0.12
## MEDINC90 -0.48 0.22 0.04
## MEDHSEVAL_ -0.31 0.18 0.03
## PCT_WHITE_ -0.14 0.62 -0.06
## PCT_POV_90 0.37 -0.27 -0.03
## PCT_HSB_19 1.00 -0.13 -0.05
## hi_PCT_WHITE_ -0.13 1.00 -0.09
## CHNG_URB -0.05 -0.09 1.00
N = cor(clean_datchester1)
corrplot(N, method="square", order = 'FPC')
Variables with highest degree of correlation with CHNG_URB
| VARIABLE | DEGREE |
|---|---|
| REGRAIL300 | 0.21 |
| FOURLNE300 | 0.16 |
| RAILSTN100 | 0.16 |
| DIST_REGRA | 0.15 |
| DIST_RAILS | 0.15 |
| DIST_PASSR | 0.13 |
| DIST_INTER | 0.09 |
| DIST_PARKS | 0.08 |
| PCT_WHITE_ | 0.06 |
| PCT_HSB_19 | 0.05 |
Correlation Matrix of Potential Independent Variables
REGRAIL300 & DIST_REGRA & DIST_PASSR are highly correlated (>0.30) with almost all other independent variables and should not be used in the same model as any of the others.
This leaves independent variables with correlations < 30: PCT_WHITE_, RAILSTN100, DIST_PARKS & DIST_REGRA
colnamCleaning2 <- c("RAILSTN100", "REGRAIL300", "DIST_RAILS", "DIST_PASSR", "hi_POPDEN90", "FOURLNE300", "DIST_REGRA", "DIST_INTER", "DIST_PARKS", "PCT_WHITE_", "PCT_HSB_19")
clean_datchester2 <- dat_chester1[ , which(names(dat_chester1) %in% colnamCleaning2)]
cortable <- cor(clean_datchester2)
round(cortable, 2)
## FOURLNE300 REGRAIL300 RAILSTN100 DIST_RAILS DIST_REGRA DIST_PASSR
## FOURLNE300 1.00 0.30 0.11 -0.23 -0.24 -0.35
## REGRAIL300 0.30 1.00 0.31 -0.50 -0.51 -0.27
## RAILSTN100 0.11 0.31 1.00 -0.18 -0.17 -0.08
## DIST_RAILS -0.23 -0.50 -0.18 1.00 1.00 0.31
## DIST_REGRA -0.24 -0.51 -0.17 1.00 1.00 0.31
## DIST_PASSR -0.35 -0.27 -0.08 0.31 0.31 1.00
## DIST_INTER -0.17 -0.32 -0.10 0.80 0.81 0.20
## DIST_PARKS -0.15 -0.11 -0.05 0.17 0.17 0.14
## PCT_WHITE_ -0.20 -0.03 -0.03 -0.06 -0.07 0.22
## PCT_HSB_19 -0.15 -0.33 -0.02 0.28 0.28 0.10
## hi_POPDEN90 0.23 0.31 0.14 -0.29 -0.30 -0.18
## DIST_INTER DIST_PARKS PCT_WHITE_ PCT_HSB_19 hi_POPDEN90
## FOURLNE300 -0.17 -0.15 -0.20 -0.15 0.23
## REGRAIL300 -0.32 -0.11 -0.03 -0.33 0.31
## RAILSTN100 -0.10 -0.05 -0.03 -0.02 0.14
## DIST_RAILS 0.80 0.17 -0.06 0.28 -0.29
## DIST_REGRA 0.81 0.17 -0.07 0.28 -0.30
## DIST_PASSR 0.20 0.14 0.22 0.10 -0.18
## DIST_INTER 1.00 0.42 -0.24 0.19 -0.19
## DIST_PARKS 0.42 1.00 -0.14 0.21 -0.16
## PCT_WHITE_ -0.24 -0.14 1.00 -0.14 -0.05
## PCT_HSB_19 0.19 0.21 -0.14 1.00 -0.16
## hi_POPDEN90 -0.19 -0.16 -0.05 -0.16 1.00
Using variables identified to have correlation with urban land use create a binomial logit models for testing.
MODEL A: MOST HIGHLY CORRELATED VARIABLES WITH LAND USE CHANGE
Model A1: Change in Land Use vs Percent of White Pop
AIC: 2168.6
mod_chester1 <- glm ( CHNG_URB ~ PCT_WHITE_, data=dat_chester1, family = binomial)
summary(mod_chester1)
##
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_, family = binomial, data = dat_chester1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6680 -0.2727 -0.2603 -0.2524 2.6431
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.317304 0.572871 -0.554 0.58
## PCT_WHITE_ -0.031448 0.006146 -5.117 3.11e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2185.6 on 6941 degrees of freedom
## Residual deviance: 2164.6 on 6940 degrees of freedom
## AIC: 2168.6
##
## Number of Fisher Scoring iterations: 6
Model A2: Change in Land Use vs Percent of White Pop vs 100 Meters of SEPTA Regional Rail
AIC: 2095.1
mod_chester2 <- glm ( CHNG_URB ~ PCT_WHITE_ + RAILSTN100, data=dat_chester1, family = binomial)
summary(mod_chester2)
##
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100, family = binomial,
## data = dat_chester1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0338 -0.2593 -0.2477 -0.2402 2.6796
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.452314 0.596422 -0.758 0.448
## PCT_WHITE_ -0.031099 0.006398 -4.861 1.17e-06 ***
## RAILSTN100 2.530466 0.241771 10.466 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2185.6 on 6941 degrees of freedom
## Residual deviance: 2089.1 on 6939 degrees of freedom
## AIC: 2095.1
##
## Number of Fisher Scoring iterations: 6
Model A3: Change in Land Use vs Percent of White Pop vs 100 Meters of SEPTA Regional Rail vs Distance from Nearest Park
AIC: 2047.4
mod_chester3 <- glm ( CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS,
data=dat_chester1, family = binomial)
summary(mod_chester3)
##
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS,
## family = binomial, data = dat_chester1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.1205 -0.2929 -0.2502 -0.1991 2.8468
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.031e+00 6.247e-01 1.651 0.0987 .
## PCT_WHITE_ -3.944e-02 6.411e-03 -6.152 7.66e-10 ***
## RAILSTN100 2.383e+00 2.437e-01 9.777 < 2e-16 ***
## DIST_PARKS -1.570e-04 2.424e-05 -6.477 9.36e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2185.6 on 6941 degrees of freedom
## Residual deviance: 2039.4 on 6938 degrees of freedom
## AIC: 2047.4
##
## Number of Fisher Scoring iterations: 6
Model A4: Change in Land Use vs Percent of White Pop vs 100 Meters of SEPTA Regional Rail vs Distance from Nearest Park vs Distance to a Regional Rail Line
AIC: 1937.8
mod_chester4 <- glm (CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA, data=dat_chester1, family = binomial)
summary(mod_chester4)
##
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS +
## DIST_REGRA, family = binomial, data = dat_chester1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0861 -0.2998 -0.2132 -0.1476 3.4005
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.888e+00 6.573e-01 2.873 0.00407 **
## PCT_WHITE_ -3.900e-02 6.875e-03 -5.672 1.41e-08 ***
## RAILSTN100 1.385e+00 2.569e-01 5.393 6.94e-08 ***
## DIST_PARKS -1.205e-04 2.628e-05 -4.585 4.55e-06 ***
## DIST_REGRA -1.051e-04 1.137e-05 -9.249 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2185.6 on 6941 degrees of freedom
## Residual deviance: 1927.8 on 6937 degrees of freedom
## AIC: 1937.8
##
## Number of Fisher Scoring iterations: 7
Analysis by Variance Tables
All models are statistically significant improvements to model 1.
anova(mod_chester1, mod_chester2, mod_chester3, mod_chester4, test="Chisq")
## Analysis of Deviance Table
##
## Model 1: CHNG_URB ~ PCT_WHITE_
## Model 2: CHNG_URB ~ PCT_WHITE_ + RAILSTN100
## Model 3: CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS
## Model 4: CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA
## Resid. Df Resid. Dev Df Deviance Pr(>Chi)
## 1 6940 2164.6
## 2 6939 2089.1 1 75.496 < 2.2e-16 ***
## 3 6938 2039.4 1 49.734 1.76e-12 ***
## 4 6937 1927.8 1 111.595 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Order of Significance by Step Through
Backward Selection
Run backward selection for model A4.Nothing drops.
AIC = 1937.78 Null Deviance: 2186 Residual Deviance: 1928
step(mod_chester4, direction = "backward")
## Start: AIC=1937.78
## CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA
##
## Df Deviance AIC
## <none> 1927.8 1937.8
## - DIST_PARKS 1 1950.6 1958.6
## - RAILSTN100 1 1952.9 1960.9
## - PCT_WHITE_ 1 1953.7 1961.7
## - DIST_REGRA 1 2039.4 2047.4
##
## Call: glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS +
## DIST_REGRA, family = binomial, data = dat_chester1)
##
## Coefficients:
## (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
## 1.8882136 -0.0389976 1.3852463 -0.0001205 -0.0001051
##
## Degrees of Freedom: 6941 Total (i.e. Null); 6937 Residual
## Null Deviance: 2186
## Residual Deviance: 1928 AIC: 1938
#nothing is dropped
Forward Selection
Setting up the forward selection
modUpper <- glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS +
DIST_REGRA, family = binomial, data = dat_chester1)
intonly <- glm(formula = CHNG_URB ~1, family = binomial, data = dat_chester1)
step(intonly, scope=list(lower=intonly, upper=modUpper), direction="forward")
## Start: AIC=2187.6
## CHNG_URB ~ 1
##
## Df Deviance AIC
## + DIST_REGRA 1 2001.9 2005.9
## + RAILSTN100 1 2108.1 2112.1
## + DIST_PARKS 1 2140.7 2144.7
## + PCT_WHITE_ 1 2164.6 2168.6
## <none> 2185.6 2187.6
##
## Step: AIC=2005.88
## CHNG_URB ~ DIST_REGRA
##
## Df Deviance AIC
## + RAILSTN100 1 1974.8 1980.8
## + PCT_WHITE_ 1 1976.0 1982.0
## + DIST_PARKS 1 1980.2 1986.2
## <none> 2001.9 2005.9
##
## Step: AIC=1980.84
## CHNG_URB ~ DIST_REGRA + RAILSTN100
##
## Df Deviance AIC
## + PCT_WHITE_ 1 1950.6 1958.6
## + DIST_PARKS 1 1953.7 1961.7
## <none> 1974.8 1980.8
##
## Step: AIC=1958.64
## CHNG_URB ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_
##
## Df Deviance AIC
## + DIST_PARKS 1 1927.8 1937.8
## <none> 1950.6 1958.6
##
## Step: AIC=1937.78
## CHNG_URB ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_ + DIST_PARKS
##
## Call: glm(formula = CHNG_URB ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_ +
## DIST_PARKS, family = binomial, data = dat_chester1)
##
## Coefficients:
## (Intercept) DIST_REGRA RAILSTN100 PCT_WHITE_ DIST_PARKS
## 1.8882136 -0.0001051 1.3852463 -0.0389976 -0.0001205
##
## Degrees of Freedom: 6941 Total (i.e. Null); 6937 Residual
## Null Deviance: 2186
## Residual Deviance: 1928 AIC: 1938
#Start: CHNG_URB ~ 1
## AIC = 2187.6
#Step: ~ DIST_REGRA
##AIC = 2005.88
#Step: ~ DIST_REGRA + RAILSTN100
##AIC = 1980.84
#Step: ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_
##AIC = 1958.64
#Step: ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_ + DIST_PARKS
##AIC = 1937.78
#Degrees of Freedom: 6941 Total (i.e. Null); 6937 Residual
#Null Deviance: 2186
#Residual Deviance: 1928
The AIC is at its lowest with all four possible independent variables present. This, model4, has been identified by ANOVA, backward and forward selection as the best fit.
PCT_White_ Coefficient
Unit increase in the percentage of white pop results in a 4% decrease in the odds land conversion
mod_chester4$coefficients
## (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
## 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
# (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
# 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
(exp(-0.0389976059) - 1) * 100
## [1] -3.824699
#unit increase in Y results in a 4% decrease in the odds land conversion
DIST_PARKS Coefficient
Unit increase in distance to the closest park 0.012% decrease in the odds land conversion. Virtually having no impact on land conversion likelihood within this model.
mod_chester4$coefficients
## (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
## 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
# (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
# 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
(exp(-0.0001204970) - 1) * 100
## [1] -0.01204897
#unit increase in Y results in a 4% decrease in the odds land conversion
DIST_REGRA Coefficient
Unit increase in distance to the closest park 0.011% decrease in the odds land conversion. Virtually having no impact on land conversion likelihood within this model.
mod_chester4$coefficients
## (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
## 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
# (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
# 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
(exp(-0.0001051128) - 1) * 100
## [1] -0.01051073
#unit increase in Y results in a 4% decrease in the odds land conversion
RAILSTN100 Coefficient
If a cell is within 100 meters of a SEPTA rail station then there is a 300% increase in the likelihood that the land will be converted to urban use. w
mod_chester4$coefficients
## (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
## 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
# (Intercept) PCT_WHITE_ RAILSTN100 DIST_PARKS DIST_REGRA
# 1.8882135917 -0.0389976059 1.3852463122 -0.0001204970 -0.0001051128
(exp(1.3852463122) - 1) * 100
## [1] 299.581
#unit increase in Y results in a 4% decrease in the odds land conversion
Shows distribution between land converted to urban use (1) and land that remained unconverted (0).
High percentages of white populations correspond with predominately unconverted land.
jit <- ggplot(dat_chester1, aes(PCT_WHITE_, CHNG_URB))
jit + geom_point(shape = 21, size = 4,
fill=I("lawngreen"),
color=("lawngreen"), alpha=I(.2), position = "jitter") + theme(panel.background = element_blank()) + plotTheme
#CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA
Unsurprisingly, a lack of proximity to rail infrastructure corresponds with unconverted land.
jit2 <- ggplot(dat_chester1, aes(RAILSTN100, CHNG_URB))
jit2 + geom_point(shape = 21, size = 4,
fill=I("lawngreen"),
color=("lawngreen"), alpha=I(.2), position = "jitter") + theme(panel.background = element_blank()) + plotTheme
Interestingly, distance from parks seems to have more variation than other land use indicators. While the variable observations are concentrated within a close proximity to park space and unconverted land, there is more spread within this distribution.
jit3 <- ggplot(dat_chester1, aes(DIST_PARKS, CHNG_URB))
jit3 + geom_point(shape = 21, size = 4,
fill=I("lawngreen"),
color=("lawngreen"), alpha=I(.2), position = "jitter") + theme(panel.background = element_blank()) + plotTheme
Using model 4, our best model, we are evaluated how distance from regional rail, parks, and percentage of white population impacts changes in land use. Most notably is the downward curve of each scenario, indicating that as percentage of white population increases the likelihood of conversion to urban uses decreases. Conversely, it is notable that the lower percentages of white populations correspond with a higher likelihood of conversion to urban land. The plot also indicates the importance of rail infrastructure as seen in the red scenario “Pred_NoRAIL_PARKMED_REGA3Q”. In the absence of being within 100 meters from a SEPTA regional rail station and being farther away from a rail station than 75% (3rd quartile) of the data, the likelihood of experiencing conversion to urban land is under 10%. And as white population increases, this likelihood decreases even further.
Interestingly, holding distance from SEPTA rail and regional rail constant, being closer to a park, increases the probability of conversion to urban land uses, as seen between the purple “Pred_YesRAIL_PARKMED_REGAMED” and blue “Pred_YesRAIL_PARK3Q_REGAMED” line.
##CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA
newdat_chester <- data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester) <- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester$RAILSTN100 <- 0
newdat_chester$DIST_PARKS <- 4472 #MEDIAN
newdat_chester$DIST_REGRA <-18000 #third quar
newdat_chester1<-data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester1)<- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester1$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester1$RAILSTN100 <- 1
newdat_chester1$DIST_PARKS <- 4472 #MEDIAN
newdat_chester1$DIST_REGRA <-12176 #MEDIAN
newdat_chester2<-data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester2)<- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester2$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester2$RAILSTN100 <- 1
newdat_chester2$DIST_PARKS <- 7106 #THIRD QUAR
newdat_chester2$DIST_REGRA <-12176 #MEDIAN
newdat_chester3<-data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester3)<- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester3$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester3$RAILSTN100 <- 1
newdat_chester3$DIST_PARKS <- 7106 #THIRD QUAR
newdat_chester3$DIST_REGRA <-18000 #THIRD QUAR
pred_dat<- data.frame(matrix(ncol = 3, nrow = nrow(dat_chester1)))
colnames(pred_dat)<- c("PCT_WHITE_", "Pred_NoRAIL_PARKMED_REGA3Q", "Pred_YesRAIL_PARKMED_REGAMED")
pred_dat$PCT_WHITE_<-dat_chester1$PCT_WHITE_
pred_dat$Pred_NoRAIL_PARKMED_REGA3Q<- predict(mod_chester4, newdat_chester, type="response")
pred_dat$Pred_YesRAIL_PARKMED_REGAMED<- predict(mod_chester4, newdat_chester1, type="response")
pred_dat<- data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(pred_dat)<- c("PCT_WHITE_", "Pred_NoRAIL_PARKMED_REGA3Q", "Pred_YesRAIL_PARKMED_REGAMED", "Pred_YesRAIL_PARK3Q_REGAMED")
pred_dat$PCT_WHITE_ <-dat_chester1$PCT_WHITE_
pred_dat$Pred_NoRAIL_PARKMED_REGA3Q<- predict(mod_chester4, newdat_chester, type="response")
pred_dat$Pred_YesRAIL_PARKMED_REGAMED<- predict(mod_chester4, newdat_chester1, type="response")
pred_dat$Pred_YesRAIL_PARK3Q_REGAMED<- predict(mod_chester4, newdat_chester2, type="response")
pred_dat<- data.frame(matrix(ncol = 5, nrow = nrow(dat_chester1)))
colnames(pred_dat)<- c("PCT_WHITE_", "Pred_NoRAIL_PARKMED_REGA3Q", "Pred_YesRAIL_PARKMED_REGAMED", "Pred_YesRAIL_PARK3Q_REGAMED", "Pred_YesRAIL_PARK3Q_REGA3Q")
pred_dat$PCT_WHITE_ <-dat_chester1$PCT_WHITE_
pred_dat$Pred_NoRAIL_PARKMED_REGA3Q<- predict(mod_chester4, newdat_chester, type="response")
pred_dat$Pred_YesRAIL_PARKMED_REGAMED<- predict(mod_chester4, newdat_chester1, type="response")
pred_dat$Pred_YesRAIL_PARK3Q_REGAMED<- predict(mod_chester4, newdat_chester2, type="response")
pred_dat$Pred_YesRAIL_PARK3Q_REGA3Q<- predict(mod_chester4, newdat_chester3, type="response")
dat_gg <- gather(pred_dat, -PCT_WHITE_, key = "Scenario", value = "value")
ggplot(dat_gg, aes(x = PCT_WHITE_, y = value, colour = Scenario)) +
geom_line() + ylim(0,1) +
xlab("Percentage of White Population") + ylab("Predicted Probability of Conversion to Urban Land") + plotTheme