#knitr::opts_chunk$set(echo=TRUE, warning = FALSE, message = FALSE, results = 'hide', include=TRUE, fig.keep = 'all')

## SET UP ##

#Setup for our analysis, including necessary packages.

rm(list = ls())

setwd("~/Documents/UPenn/Penn 2022 Spring/Planning by Numbers/Assignment 3")

library(tidyverse)
library(tidycensus)
library(ggplot2)
library(dplyr)
library(data.table)
require("ggrepel")
library(hrbrthemes)
library(viridis)
library(ggridges)
library(lubridate)
library(tigris)
library(gganimate)
library(riem)
library(gridExtra)
library(knitr)
library(kableExtra)
library(mapview)
library(ggcorrplot)
library(RColorBrewer)
library(stargazer)
library(correlationfunnel)
library(tidyquant)
library(plotly)
library(shiny)
library(bslib)
library(shinydashboard)
library(DT)
library(car)
library(AICcmodavg)
library(corrplot)
library(ggthemes)
library(mlogit)
require(mlogit)

Summary of Findings

Part A: Change in Land Use for Chester County, PA between 1992-2001

Utilizing 500-meter raster cell locations matched to corresponding land cover & use, transportation, and demographic data for 1992 and 2001, we evaluated factors that impacted changes in land usage in Chester County, PA. To study this data and identify possible determinants of land use change, in particular conversion of non-urban land to urban uses, we evaluated each potential determinant’s relationship or correlation with land use.

Taking the potential determinants (variables) that displayed a relationship to the land use changes data, we developed and tested a series of models, eventually determining that the percentage of white population, proximity within 100 meters of a SEPTA rail station, distance from regional rail stations, and distance from parks were the best predictors of the likelihood an area would be converted to urban uses or remain unconverted.

We determined that for every unit increase in our identified determinants, there would be the following impact on the probability that land would be converted to urban uses.

Being located within 100 meters of a SEPTA rail station corresponds to 300% increase in the odds of land conversion.
A unit increase in the percentage of white population corresponds with a 4% decrease in the odds of land conversion.
Unit increases in both distance to the closest park and regional rail station correspond with a less than 1% decrease in the odds of land conversion.

From our model, we developed a series of scenarios to predict how changes within each variable impact land conversion probability. Largely confirming the values seen above, our scenario plot displayed trends indicating that as white population increases, the likelihood of conversion to urban land uses decreases. Our scenarios also indicated that the presence of a rail station, being within 100 meters of a rail station has a very high (over 50%) predicted probability of urban land use conversion when distance to parks and other regional rail stations were held at the middle (median) value of their respective data sets.

Given proximity to park space and regional rail stations have very low predicted odds ratios for land use change, these variables are likely not the most useful additions to the model, although their additions did increase the overall fit of the model to the data. Given the substantial impact of SEPTA rail infrastructure within the model it is likely that other variables relating to transit infrastructure, such as REGRAIL300 and FOURLNE300, would improve model performance.

Part A: Modeling Land Use Change in Chester Co. PA

Part A.1 - Reading the Data & Running Descriptive Statistics

Remove percent college graduate as numbers appear to be incorrect.

dat_chester <- read.csv("Chester_Urban_Growth.csv", header = TRUE)
#head(dat_chester)
#summary(dat_chester)

dat_chester1 <- subset(dat_chester, select = -c(PCT_COLGRD)) #values appear to be incorrect

#head(dat_chester1)

summary(dat_chester1)

##      CHESCO        X                 Y              SLOPE       
##  Min.   :1   Min.   :1586501   Min.   :152678   Min.   : 0.050  
##  1st Qu.:1   1st Qu.:1602001   1st Qu.:173178   1st Qu.: 1.344  
##  Median :1   Median :1610001   Median :186178   Median : 2.048  
##  Mean   :1   Mean   :1610704   Mean   :185065   Mean   : 2.407  
##  3rd Qu.:1   3rd Qu.:1618502   3rd Qu.:197178   3rd Qu.: 3.036  
##  Max.   :1   Max.   :1638502   Max.   :214179   Max.   :12.042  
##    FOURLNE300       INTERST800       REGRAIL300       RAILSTN100     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.4971   Mean   :0.3426   Mean   :0.1193   Mean   :0.01253  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##    PARKS500M          WATER100         CITBORO_10        DIST_WATER   
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :    0  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.: 1414  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median : 3162  
##  Mean   :0.02276   Mean   :0.05733   Mean   :0.07015   Mean   : 3753  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.: 5701  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :13730  
##    DIST_RAILS      DIST_REGRA      DIST_PASSR      DIST_4LNE_   
##  Min.   :    0   Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.: 6671   1st Qu.: 6265   1st Qu.: 1000   1st Qu.: 1414  
##  Median :12500   Median :12176   Median : 2236   Median : 3000  
##  Mean   :13233   Mean   :12918   Mean   : 2763   Mean   : 3416  
##  3rd Qu.:18201   3rd Qu.:18000   3rd Qu.: 4123   3rd Qu.: 5315  
##  Max.   :40771   Max.   :40771   Max.   :11181   Max.   :11511  
##    DIST_INTER      DIST_PARKS      PAL_WETLND         FARM92        
##  Min.   :    0   Min.   :    0   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.: 5590   1st Qu.: 2500   1st Qu.:0.0000   1st Qu.:0.000000  
##  Median :12530   Median : 4472   Median :0.0000   Median :0.000000  
##  Mean   :15640   Mean   : 5161   Mean   :0.1052   Mean   :0.009219  
##  3rd Qu.:24076   3rd Qu.: 7106   3rd Qu.:0.0000   3rd Qu.:0.000000  
##  Max.   :49359   Max.   :16808   Max.   :1.0000   Max.   :1.000000  
##    PASTURE92         FOREST92         URBAN01          URBAN92       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.4994   Mean   :0.3518   Mean   :0.0677   Mean   :0.04883  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##     POPDEN90        MEDINC90        MEDHSEVAL_       PCT_WHITE_    
##  Min.   :    0   Min.   :     0   Min.   :     0   Min.   : 34.00  
##  1st Qu.:   21   1st Qu.: 37986   1st Qu.:128000   1st Qu.: 94.00  
##  Median :   61   Median : 45074   Median :150200   Median : 97.00  
##  Mean   :  189   Mean   : 48876   Mean   :167526   Mean   : 94.74  
##  3rd Qu.:  154   3rd Qu.: 56394   3rd Qu.:214200   3rd Qu.: 98.00  
##  Max.   :13000   Max.   :103043   Max.   :384000   Max.   :100.00  
##    PCT_SFHOME       PCT_POV_90       PCT_HSB_19    
##  Min.   :  0.00   Min.   : 0.000   Min.   :  0.00  
##  1st Qu.: 77.00   1st Qu.: 2.000   1st Qu.: 14.00  
##  Median : 86.00   Median : 4.000   Median : 19.00  
##  Mean   : 82.67   Mean   : 4.424   Mean   : 23.11  
##  3rd Qu.: 93.00   3rd Qu.: 6.000   3rd Qu.: 31.00  
##  Max.   :100.00   Max.   :29.000   Max.   :100.00

#dim(dat_chester1)
#[1] 6942   31

Descriptive Statistics

Population Density in 1990

Values skewed, create binary variable.

summary(dat_chester1$POPDEN90)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0      21      61     189     154   13000

plotTheme <-  theme(
  axis.title = element_text(
    size = 11, lineheight = .9, family = "Comic Sans MS"),
  plot.title = element_text(size=16, face = "bold", hjust = 0.5, family = "Comic Sans MS"),
  plot.background = element_blank())

dat_chester1$hi_POPDEN90 <- ifelse(dat_chester1$POPDEN90 > mean(dat_chester1$POPDEN90), 1, 0)

qplot(dat_chester1$POPDEN90,
     geom="histogram",
      binwidth = 157,
      main = "Population Density", 
      xlab = "Average Pop Density",  
      fill=I("lawngreen"), 
      col=I("lawngreen"),
      alpha=I(.3)) + plotTheme

Median Household Income in 1990

summary(dat_chester1$MEDINC90)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0   37986   45074   48876   56394  103043

#number of bins: sqrt(6942) 
qplot(dat_chester1$MEDINC90,
     geom="histogram",
      binwidth = 1241,
      main = "Median Household Income", 
      xlab = "$Moolah$",  
      fill=I("lawngreen"), 
      col=I("lawngreen"),
      alpha=I(.3)) + plotTheme

Median Home Value in 1990

summary(dat_chester1$MEDHSEVAL_)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0  128000  150200  167526  214200  384000

#number of bins: sqrt(6942) = 83
#bin width: (max-min) / #of bins
qplot(dat_chester1$MEDHSEVAL_,
     geom="histogram",
      binwidth = 4627,
      main = "Median Home Value", 
      xlab = "$Moolah$",  
      fill=I("lawngreen"), 
      col=I("lawngreen"),
      alpha=I(.3)) + plotTheme

Percentage of White Pop in 1990

Values skewed, create binary variable.

summary(dat_chester1$PCT_WHITE_)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   34.00   94.00   97.00   94.74   98.00  100.00

dat_chester1$hi_PCT_WHITE_ <- ifelse(dat_chester1$PCT_WHITE_ > mean(dat_chester1$PCT_WHITE_), 1, 0)


qplot(dat_chester1$PCT_WHITE_,
     geom="histogram",
      binwidth = 10,
      main = "White Population", 
      xlab = "%Percent%",  
      fill=I("lawngreen"), 
      col=I("lawngreen"),
      alpha=I(.3)) + plotTheme

Percentage of Single-Family Homes in 1990

Values skewed, create binary variable.

summary(dat_chester1$PCT_SFHOME)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   77.00   86.00   82.67   93.00  100.00

dat_chester1$hi_PCT_SFHOME <- ifelse(dat_chester1$PCT_SFHOME > mean(dat_chester1$PCT_SFHOME), 1, 0)


qplot(dat_chester1$PCT_SFHOME,
     geom="histogram",
      binwidth = 10,
      main = "Single-Family Homes", 
      xlab = "%Percent%",  
      fill=I("lawngreen"), 
      col=I("lawngreen"),
      alpha=I(.3)) + plotTheme

Percentage of Households Below Poverty Line in 1990

summary(dat_chester1$PCT_POV_90)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   4.000   4.424   6.000  29.000

qplot(dat_chester1$PCT_POV_90,
     geom="histogram",
      binwidth = 10,
      main = "Households Below Poverty Line", 
      xlab = "%Percent%",  
      fill=I("lawngreen"), 
      col=I("lawngreen"),
      alpha=I(.3)) + plotTheme

Percentage of Homes Built Before 1950 in 1990

summary(dat_chester1$PCT_HSB_19)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   14.00   19.00   23.11   31.00  100.00

qplot(dat_chester1$PCT_HSB_19,
     geom="histogram",
      binwidth = 10,
      main = "Homes Built Before 1950", 
      xlab = "%Percent%",  
      fill=I("lawngreen"), 
      col=I("lawngreen"),
      alpha=I(.3)) + plotTheme

Part A.2 - Creating Data Frame with Binary Variable

Created a new data frame including a conditional binary variable column of raster cells that were either farmland, pasture or forest in 1992 and were converted to urban uses by 2001.

#create binary variable column, assigning all "0" 
dat_chester1$CHNG_URB <- 0

#create conditional value "1" for new column based on whether a cell was farm, pasture or forest in 1992
#and whether it changed to urban in 2001
dat_chester1 <- dat_chester1%>%
  mutate(CHNG_URB = if_else ((FARM92 == "1" | 
                                PASTURE92 == "1" | FOREST92 == "1") & URBAN01 == "1", 1, CHNG_URB))

summary(dat_chester1)

##      CHESCO        X                 Y              SLOPE       
##  Min.   :1   Min.   :1586501   Min.   :152678   Min.   : 0.050  
##  1st Qu.:1   1st Qu.:1602001   1st Qu.:173178   1st Qu.: 1.344  
##  Median :1   Median :1610001   Median :186178   Median : 2.048  
##  Mean   :1   Mean   :1610704   Mean   :185065   Mean   : 2.407  
##  3rd Qu.:1   3rd Qu.:1618502   3rd Qu.:197178   3rd Qu.: 3.036  
##  Max.   :1   Max.   :1638502   Max.   :214179   Max.   :12.042  
##    FOURLNE300       INTERST800       REGRAIL300       RAILSTN100     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.4971   Mean   :0.3426   Mean   :0.1193   Mean   :0.01253  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##    PARKS500M          WATER100         CITBORO_10        DIST_WATER   
##  Min.   :0.00000   Min.   :0.00000   Min.   :0.00000   Min.   :    0  
##  1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.:0.00000   1st Qu.: 1414  
##  Median :0.00000   Median :0.00000   Median :0.00000   Median : 3162  
##  Mean   :0.02276   Mean   :0.05733   Mean   :0.07015   Mean   : 3753  
##  3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.:0.00000   3rd Qu.: 5701  
##  Max.   :1.00000   Max.   :1.00000   Max.   :1.00000   Max.   :13730  
##    DIST_RAILS      DIST_REGRA      DIST_PASSR      DIST_4LNE_   
##  Min.   :    0   Min.   :    0   Min.   :    0   Min.   :    0  
##  1st Qu.: 6671   1st Qu.: 6265   1st Qu.: 1000   1st Qu.: 1414  
##  Median :12500   Median :12176   Median : 2236   Median : 3000  
##  Mean   :13233   Mean   :12918   Mean   : 2763   Mean   : 3416  
##  3rd Qu.:18201   3rd Qu.:18000   3rd Qu.: 4123   3rd Qu.: 5315  
##  Max.   :40771   Max.   :40771   Max.   :11181   Max.   :11511  
##    DIST_INTER      DIST_PARKS      PAL_WETLND         FARM92        
##  Min.   :    0   Min.   :    0   Min.   :0.0000   Min.   :0.000000  
##  1st Qu.: 5590   1st Qu.: 2500   1st Qu.:0.0000   1st Qu.:0.000000  
##  Median :12530   Median : 4472   Median :0.0000   Median :0.000000  
##  Mean   :15640   Mean   : 5161   Mean   :0.1052   Mean   :0.009219  
##  3rd Qu.:24076   3rd Qu.: 7106   3rd Qu.:0.0000   3rd Qu.:0.000000  
##  Max.   :49359   Max.   :16808   Max.   :1.0000   Max.   :1.000000  
##    PASTURE92         FOREST92         URBAN01          URBAN92       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.4994   Mean   :0.3518   Mean   :0.0677   Mean   :0.04883  
##  3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##     POPDEN90        MEDINC90        MEDHSEVAL_       PCT_WHITE_    
##  Min.   :    0   Min.   :     0   Min.   :     0   Min.   : 34.00  
##  1st Qu.:   21   1st Qu.: 37986   1st Qu.:128000   1st Qu.: 94.00  
##  Median :   61   Median : 45074   Median :150200   Median : 97.00  
##  Mean   :  189   Mean   : 48876   Mean   :167526   Mean   : 94.74  
##  3rd Qu.:  154   3rd Qu.: 56394   3rd Qu.:214200   3rd Qu.: 98.00  
##  Max.   :13000   Max.   :103043   Max.   :384000   Max.   :100.00  
##    PCT_SFHOME       PCT_POV_90       PCT_HSB_19      hi_POPDEN90    
##  Min.   :  0.00   Min.   : 0.000   Min.   :  0.00   Min.   :0.0000  
##  1st Qu.: 77.00   1st Qu.: 2.000   1st Qu.: 14.00   1st Qu.:0.0000  
##  Median : 86.00   Median : 4.000   Median : 19.00   Median :0.0000  
##  Mean   : 82.67   Mean   : 4.424   Mean   : 23.11   Mean   :0.2025  
##  3rd Qu.: 93.00   3rd Qu.: 6.000   3rd Qu.: 31.00   3rd Qu.:0.0000  
##  Max.   :100.00   Max.   :29.000   Max.   :100.00   Max.   :1.0000  
##  hi_PCT_WHITE_    hi_PCT_SFHOME      CHNG_URB      
##  Min.   :0.0000   Min.   :0.000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.000   1st Qu.:0.00000  
##  Median :1.0000   Median :1.000   Median :0.00000  
##  Mean   :0.7151   Mean   :0.574   Mean   :0.03673  
##  3rd Qu.:1.0000   3rd Qu.:1.000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.000   Max.   :1.00000

#NEXT build binomial logit model of determinants for land use

Part A.3 - Binomial Logit Model of Determints for Land Use Change

Create Correlation Matrix of Potential Variables for Study

#identify determinants of agr/pasture/forest to urban use change ('92-'01)

colnamCleaning <- c("CHNG_URB", "FOURLNE300", "PARKS500M", "RAILSTN100",  "REGRAIL300", "WATER100", "CITBORO_10", 
                  "DIST_WATER", "DIST_RAILS","DIST_REGRA", "DIST_PASSR", "DIST_4LNE","DIST_INTER", "DIST_PARKS", "PAL_WETLND", "POPDEN90","MEDINC90", "MEDHSEVAL_", "PCT_WHITE_",  "hi_PCT_WHITE_", "PCT_POV_90", "PCT_HSB_19")
clean_datchester1 <- dat_chester1[ , which(names(dat_chester1) %in% colnamCleaning)]

cortable <- cor(clean_datchester1)
round(cortable, 2)

##               FOURLNE300 REGRAIL300 RAILSTN100 PARKS500M WATER100 CITBORO_10
## FOURLNE300          1.00       0.30       0.11      0.02    -0.08       0.10
## REGRAIL300          0.30       1.00       0.31     -0.04    -0.05       0.06
## RAILSTN100          0.11       0.31       1.00     -0.02    -0.02       0.06
## PARKS500M           0.02      -0.04      -0.02      1.00     0.03      -0.01
## WATER100           -0.08      -0.05      -0.02      0.03     1.00       0.04
## CITBORO_10          0.10       0.06       0.06     -0.01     0.04       1.00
## DIST_WATER          0.18       0.20       0.06     -0.04    -0.32      -0.06
## DIST_RAILS         -0.23      -0.50      -0.18      0.02     0.09      -0.08
## DIST_REGRA         -0.24      -0.51      -0.17      0.03     0.09      -0.08
## DIST_PASSR         -0.35      -0.27      -0.08      0.02     0.00      -0.25
## DIST_INTER         -0.17      -0.32      -0.10     -0.05     0.08      -0.03
## DIST_PARKS         -0.15      -0.11      -0.05     -0.22     0.00      -0.06
## PAL_WETLND          0.01       0.00      -0.03      0.00     0.02      -0.02
## POPDEN90            0.14       0.17       0.13     -0.04    -0.03       0.27
## MEDINC90            0.23       0.21       0.01      0.03    -0.07      -0.17
## MEDHSEVAL_          0.18       0.16       0.02      0.04    -0.07      -0.18
## PCT_WHITE_         -0.20      -0.03      -0.03      0.05    -0.02      -0.23
## PCT_POV_90         -0.14      -0.16      -0.04     -0.06     0.05       0.18
## PCT_HSB_19         -0.15      -0.33      -0.02     -0.03     0.00       0.25
## hi_PCT_WHITE_      -0.14      -0.13      -0.11      0.07     0.03      -0.22
## CHNG_URB            0.16       0.21       0.16     -0.01    -0.02       0.10
##               DIST_WATER DIST_RAILS DIST_REGRA DIST_PASSR DIST_INTER DIST_PARKS
## FOURLNE300          0.18      -0.23      -0.24      -0.35      -0.17      -0.15
## REGRAIL300          0.20      -0.50      -0.51      -0.27      -0.32      -0.11
## RAILSTN100          0.06      -0.18      -0.17      -0.08      -0.10      -0.05
## PARKS500M          -0.04       0.02       0.03       0.02      -0.05      -0.22
## WATER100           -0.32       0.09       0.09       0.00       0.08       0.00
## CITBORO_10         -0.06      -0.08      -0.08      -0.25      -0.03      -0.06
## DIST_WATER          1.00      -0.28      -0.29       0.03      -0.25       0.06
## DIST_RAILS         -0.28       1.00       1.00       0.31       0.80       0.17
## DIST_REGRA         -0.29       1.00       1.00       0.31       0.81       0.17
## DIST_PASSR          0.03       0.31       0.31       1.00       0.20       0.14
## DIST_INTER         -0.25       0.80       0.81       0.20       1.00       0.42
## DIST_PARKS          0.06       0.17       0.17       0.14       0.42       1.00
## PAL_WETLND          0.01      -0.07      -0.07       0.00      -0.08      -0.05
## POPDEN90            0.04      -0.18      -0.18      -0.15      -0.12      -0.10
## MEDINC90            0.24      -0.39      -0.39       0.05      -0.31      -0.23
## MEDHSEVAL_          0.36      -0.37      -0.37       0.10      -0.35      -0.24
## PCT_WHITE_          0.08      -0.06      -0.07       0.22      -0.24      -0.14
## PCT_POV_90         -0.03       0.21       0.22       0.05       0.21       0.18
## PCT_HSB_19          0.04       0.28       0.28       0.10       0.19       0.21
## hi_PCT_WHITE_      -0.08       0.03       0.03       0.27      -0.16      -0.22
## CHNG_URB            0.04      -0.15      -0.15      -0.13      -0.09      -0.08
##               PAL_WETLND POPDEN90 MEDINC90 MEDHSEVAL_ PCT_WHITE_ PCT_POV_90
## FOURLNE300          0.01     0.14     0.23       0.18      -0.20      -0.14
## REGRAIL300          0.00     0.17     0.21       0.16      -0.03      -0.16
## RAILSTN100         -0.03     0.13     0.01       0.02      -0.03      -0.04
## PARKS500M           0.00    -0.04     0.03       0.04       0.05      -0.06
## WATER100            0.02    -0.03    -0.07      -0.07      -0.02       0.05
## CITBORO_10         -0.02     0.27    -0.17      -0.18      -0.23       0.18
## DIST_WATER          0.01     0.04     0.24       0.36       0.08      -0.03
## DIST_RAILS         -0.07    -0.18    -0.39      -0.37      -0.06       0.21
## DIST_REGRA         -0.07    -0.18    -0.39      -0.37      -0.07       0.22
## DIST_PASSR          0.00    -0.15     0.05       0.10       0.22       0.05
## DIST_INTER         -0.08    -0.12    -0.31      -0.35      -0.24       0.21
## DIST_PARKS         -0.05    -0.10    -0.23      -0.24      -0.14       0.18
## PAL_WETLND          1.00     0.00     0.03       0.03       0.01      -0.02
## POPDEN90            0.00     1.00    -0.02       0.00      -0.09       0.04
## MEDINC90            0.03    -0.02     1.00       0.80       0.22      -0.48
## MEDHSEVAL_          0.03     0.00     0.80       1.00       0.28      -0.35
## PCT_WHITE_          0.01    -0.09     0.22       0.28       1.00      -0.33
## PCT_POV_90         -0.02     0.04    -0.48      -0.35      -0.33       1.00
## PCT_HSB_19         -0.03     0.06    -0.48      -0.31      -0.14       0.37
## hi_PCT_WHITE_       0.03    -0.11     0.22       0.18       0.62      -0.27
## CHNG_URB            0.00     0.12     0.04       0.03      -0.06      -0.03
##               PCT_HSB_19 hi_PCT_WHITE_ CHNG_URB
## FOURLNE300         -0.15         -0.14     0.16
## REGRAIL300         -0.33         -0.13     0.21
## RAILSTN100         -0.02         -0.11     0.16
## PARKS500M          -0.03          0.07    -0.01
## WATER100            0.00          0.03    -0.02
## CITBORO_10          0.25         -0.22     0.10
## DIST_WATER          0.04         -0.08     0.04
## DIST_RAILS          0.28          0.03    -0.15
## DIST_REGRA          0.28          0.03    -0.15
## DIST_PASSR          0.10          0.27    -0.13
## DIST_INTER          0.19         -0.16    -0.09
## DIST_PARKS          0.21         -0.22    -0.08
## PAL_WETLND         -0.03          0.03     0.00
## POPDEN90            0.06         -0.11     0.12
## MEDINC90           -0.48          0.22     0.04
## MEDHSEVAL_         -0.31          0.18     0.03
## PCT_WHITE_         -0.14          0.62    -0.06
## PCT_POV_90          0.37         -0.27    -0.03
## PCT_HSB_19          1.00         -0.13    -0.05
## hi_PCT_WHITE_      -0.13          1.00    -0.09
## CHNG_URB           -0.05         -0.09     1.00

N = cor(clean_datchester1)
corrplot(N, method="square", order = 'FPC')

Variables with highest degree of correlation with CHNG_URB

VARIABLE	DEGREE
REGRAIL300	0.21
FOURLNE300	0.16
RAILSTN100	0.16
DIST_REGRA	0.15
DIST_RAILS	0.15
DIST_PASSR	0.13
DIST_INTER	0.09
DIST_PARKS	0.08
PCT_WHITE_	0.06
PCT_HSB_19	0.05

Correlation Matrix of Potential Independent Variables

REGRAIL300 & DIST_REGRA & DIST_PASSR are highly correlated (>0.30) with almost all other independent variables and should not be used in the same model as any of the others.

This leaves independent variables with correlations < 30: PCT_WHITE_, RAILSTN100, DIST_PARKS & DIST_REGRA

colnamCleaning2 <- c("RAILSTN100",  "REGRAIL300", "DIST_RAILS", "DIST_PASSR", "hi_POPDEN90", "FOURLNE300", "DIST_REGRA", "DIST_INTER", "DIST_PARKS", "PCT_WHITE_", "PCT_HSB_19")
clean_datchester2 <- dat_chester1[ , which(names(dat_chester1) %in% colnamCleaning2)]

cortable <- cor(clean_datchester2)
round(cortable, 2)

##             FOURLNE300 REGRAIL300 RAILSTN100 DIST_RAILS DIST_REGRA DIST_PASSR
## FOURLNE300        1.00       0.30       0.11      -0.23      -0.24      -0.35
## REGRAIL300        0.30       1.00       0.31      -0.50      -0.51      -0.27
## RAILSTN100        0.11       0.31       1.00      -0.18      -0.17      -0.08
## DIST_RAILS       -0.23      -0.50      -0.18       1.00       1.00       0.31
## DIST_REGRA       -0.24      -0.51      -0.17       1.00       1.00       0.31
## DIST_PASSR       -0.35      -0.27      -0.08       0.31       0.31       1.00
## DIST_INTER       -0.17      -0.32      -0.10       0.80       0.81       0.20
## DIST_PARKS       -0.15      -0.11      -0.05       0.17       0.17       0.14
## PCT_WHITE_       -0.20      -0.03      -0.03      -0.06      -0.07       0.22
## PCT_HSB_19       -0.15      -0.33      -0.02       0.28       0.28       0.10
## hi_POPDEN90       0.23       0.31       0.14      -0.29      -0.30      -0.18
##             DIST_INTER DIST_PARKS PCT_WHITE_ PCT_HSB_19 hi_POPDEN90
## FOURLNE300       -0.17      -0.15      -0.20      -0.15        0.23
## REGRAIL300       -0.32      -0.11      -0.03      -0.33        0.31
## RAILSTN100       -0.10      -0.05      -0.03      -0.02        0.14
## DIST_RAILS        0.80       0.17      -0.06       0.28       -0.29
## DIST_REGRA        0.81       0.17      -0.07       0.28       -0.30
## DIST_PASSR        0.20       0.14       0.22       0.10       -0.18
## DIST_INTER        1.00       0.42      -0.24       0.19       -0.19
## DIST_PARKS        0.42       1.00      -0.14       0.21       -0.16
## PCT_WHITE_       -0.24      -0.14       1.00      -0.14       -0.05
## PCT_HSB_19        0.19       0.21      -0.14       1.00       -0.16
## hi_POPDEN90      -0.19      -0.16      -0.05      -0.16        1.00

Using variables identified to have correlation with urban land use create a binomial logit models for testing.

MODEL A: MOST HIGHLY CORRELATED VARIABLES WITH LAND USE CHANGE

Model A1: Change in Land Use vs Percent of White Pop

AIC: 2168.6

mod_chester1 <- glm ( CHNG_URB ~ PCT_WHITE_, data=dat_chester1, family = binomial)
summary(mod_chester1)

## 
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_, family = binomial, data = dat_chester1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6680  -0.2727  -0.2603  -0.2524   2.6431  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.317304   0.572871  -0.554     0.58    
## PCT_WHITE_  -0.031448   0.006146  -5.117 3.11e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2185.6  on 6941  degrees of freedom
## Residual deviance: 2164.6  on 6940  degrees of freedom
## AIC: 2168.6
## 
## Number of Fisher Scoring iterations: 6

Model A2: Change in Land Use vs Percent of White Pop vs 100 Meters of SEPTA Regional Rail

AIC: 2095.1

mod_chester2 <- glm ( CHNG_URB ~ PCT_WHITE_ + RAILSTN100, data=dat_chester1, family = binomial)
summary(mod_chester2)

## 
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100, family = binomial, 
##     data = dat_chester1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0338  -0.2593  -0.2477  -0.2402   2.6796  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.452314   0.596422  -0.758    0.448    
## PCT_WHITE_  -0.031099   0.006398  -4.861 1.17e-06 ***
## RAILSTN100   2.530466   0.241771  10.466  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2185.6  on 6941  degrees of freedom
## Residual deviance: 2089.1  on 6939  degrees of freedom
## AIC: 2095.1
## 
## Number of Fisher Scoring iterations: 6

Model A3: Change in Land Use vs Percent of White Pop vs 100 Meters of SEPTA Regional Rail vs Distance from Nearest Park

AIC: 2047.4

mod_chester3 <- glm ( CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS,
                  data=dat_chester1, family = binomial)
summary(mod_chester3)

## 
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS, 
##     family = binomial, data = dat_chester1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1205  -0.2929  -0.2502  -0.1991   2.8468  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.031e+00  6.247e-01   1.651   0.0987 .  
## PCT_WHITE_  -3.944e-02  6.411e-03  -6.152 7.66e-10 ***
## RAILSTN100   2.383e+00  2.437e-01   9.777  < 2e-16 ***
## DIST_PARKS  -1.570e-04  2.424e-05  -6.477 9.36e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2185.6  on 6941  degrees of freedom
## Residual deviance: 2039.4  on 6938  degrees of freedom
## AIC: 2047.4
## 
## Number of Fisher Scoring iterations: 6

Model A4: Change in Land Use vs Percent of White Pop vs 100 Meters of SEPTA Regional Rail vs Distance from Nearest Park vs Distance to a Regional Rail Line

AIC: 1937.8

mod_chester4 <- glm (CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA, data=dat_chester1, family = binomial)
summary(mod_chester4)

## 
## Call:
## glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + 
##     DIST_REGRA, family = binomial, data = dat_chester1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0861  -0.2998  -0.2132  -0.1476   3.4005  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.888e+00  6.573e-01   2.873  0.00407 ** 
## PCT_WHITE_  -3.900e-02  6.875e-03  -5.672 1.41e-08 ***
## RAILSTN100   1.385e+00  2.569e-01   5.393 6.94e-08 ***
## DIST_PARKS  -1.205e-04  2.628e-05  -4.585 4.55e-06 ***
## DIST_REGRA  -1.051e-04  1.137e-05  -9.249  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2185.6  on 6941  degrees of freedom
## Residual deviance: 1927.8  on 6937  degrees of freedom
## AIC: 1937.8
## 
## Number of Fisher Scoring iterations: 7

Selecting the Leanest & Meanest Model

Analysis by Variance Tables

All models are statistically significant improvements to model 1.

anova(mod_chester1, mod_chester2, mod_chester3, mod_chester4, test="Chisq")

## Analysis of Deviance Table
## 
## Model 1: CHNG_URB ~ PCT_WHITE_
## Model 2: CHNG_URB ~ PCT_WHITE_ + RAILSTN100
## Model 3: CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS
## Model 4: CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA
##   Resid. Df Resid. Dev Df Deviance  Pr(>Chi)    
## 1      6940     2164.6                          
## 2      6939     2089.1  1   75.496 < 2.2e-16 ***
## 3      6938     2039.4  1   49.734  1.76e-12 ***
## 4      6937     1927.8  1  111.595 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Order of Significance by Step Through

Backward Selection

Run backward selection for model A4.Nothing drops.

AIC = 1937.78 Null Deviance: 2186 Residual Deviance: 1928

step(mod_chester4, direction = "backward")

## Start:  AIC=1937.78
## CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA
## 
##              Df Deviance    AIC
## <none>            1927.8 1937.8
## - DIST_PARKS  1   1950.6 1958.6
## - RAILSTN100  1   1952.9 1960.9
## - PCT_WHITE_  1   1953.7 1961.7
## - DIST_REGRA  1   2039.4 2047.4

## 
## Call:  glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + 
##     DIST_REGRA, family = binomial, data = dat_chester1)
## 
## Coefficients:
## (Intercept)   PCT_WHITE_   RAILSTN100   DIST_PARKS   DIST_REGRA  
##   1.8882136   -0.0389976    1.3852463   -0.0001205   -0.0001051  
## 
## Degrees of Freedom: 6941 Total (i.e. Null);  6937 Residual
## Null Deviance:       2186 
## Residual Deviance: 1928  AIC: 1938

#nothing is dropped

Forward Selection

Setting up the forward selection

modUpper <- glm(formula = CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + 
                  DIST_REGRA, family = binomial, data = dat_chester1)
intonly <- glm(formula = CHNG_URB ~1, family = binomial, data = dat_chester1)
step(intonly, scope=list(lower=intonly, upper=modUpper), direction="forward")

## Start:  AIC=2187.6
## CHNG_URB ~ 1
## 
##              Df Deviance    AIC
## + DIST_REGRA  1   2001.9 2005.9
## + RAILSTN100  1   2108.1 2112.1
## + DIST_PARKS  1   2140.7 2144.7
## + PCT_WHITE_  1   2164.6 2168.6
## <none>            2185.6 2187.6
## 
## Step:  AIC=2005.88
## CHNG_URB ~ DIST_REGRA
## 
##              Df Deviance    AIC
## + RAILSTN100  1   1974.8 1980.8
## + PCT_WHITE_  1   1976.0 1982.0
## + DIST_PARKS  1   1980.2 1986.2
## <none>            2001.9 2005.9
## 
## Step:  AIC=1980.84
## CHNG_URB ~ DIST_REGRA + RAILSTN100
## 
##              Df Deviance    AIC
## + PCT_WHITE_  1   1950.6 1958.6
## + DIST_PARKS  1   1953.7 1961.7
## <none>            1974.8 1980.8
## 
## Step:  AIC=1958.64
## CHNG_URB ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_
## 
##              Df Deviance    AIC
## + DIST_PARKS  1   1927.8 1937.8
## <none>            1950.6 1958.6
## 
## Step:  AIC=1937.78
## CHNG_URB ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_ + DIST_PARKS

## 
## Call:  glm(formula = CHNG_URB ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_ + 
##     DIST_PARKS, family = binomial, data = dat_chester1)
## 
## Coefficients:
## (Intercept)   DIST_REGRA   RAILSTN100   PCT_WHITE_   DIST_PARKS  
##   1.8882136   -0.0001051    1.3852463   -0.0389976   -0.0001205  
## 
## Degrees of Freedom: 6941 Total (i.e. Null);  6937 Residual
## Null Deviance:       2186 
## Residual Deviance: 1928  AIC: 1938

#Start: CHNG_URB ~ 1  
## AIC = 2187.6

#Step: ~ DIST_REGRA
##AIC = 2005.88

#Step: ~ DIST_REGRA + RAILSTN100
##AIC = 1980.84

#Step: ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_
##AIC = 1958.64

#Step: ~ DIST_REGRA + RAILSTN100 + PCT_WHITE_ + DIST_PARKS
##AIC = 1937.78

#Degrees of Freedom: 6941 Total (i.e. Null);  6937 Residual
#Null Deviance:     2186 
#Residual Deviance: 1928

The AIC is at its lowest with all four possible independent variables present. This, model4, has been identified by ANOVA, backward and forward selection as the best fit.

Odds Ratio Calculation

PCT_White_ Coefficient

Unit increase in the percentage of white pop results in a 4% decrease in the odds land conversion

mod_chester4$coefficients

##   (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
##  1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128

# (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
# 1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128 

(exp(-0.0389976059) - 1) * 100

## [1] -3.824699

#unit increase in Y results in a 4% decrease in the odds land conversion

DIST_PARKS Coefficient

Unit increase in distance to the closest park 0.012% decrease in the odds land conversion. Virtually having no impact on land conversion likelihood within this model.

mod_chester4$coefficients

##   (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
##  1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128

# (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
# 1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128 

(exp(-0.0001204970) - 1) * 100

## [1] -0.01204897

#unit increase in Y results in a 4% decrease in the odds land conversion

DIST_REGRA Coefficient

Unit increase in distance to the closest park 0.011% decrease in the odds land conversion. Virtually having no impact on land conversion likelihood within this model.

mod_chester4$coefficients

##   (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
##  1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128

# (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
# 1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128 

(exp(-0.0001051128) - 1) * 100

## [1] -0.01051073

#unit increase in Y results in a 4% decrease in the odds land conversion

RAILSTN100 Coefficient

If a cell is within 100 meters of a SEPTA rail station then there is a 300% increase in the likelihood that the land will be converted to urban use. w

mod_chester4$coefficients

##   (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
##  1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128

# (Intercept)    PCT_WHITE_    RAILSTN100    DIST_PARKS    DIST_REGRA 
# 1.8882135917 -0.0389976059  1.3852463122 -0.0001204970 -0.0001051128 

(exp(1.3852463122) - 1) * 100

## [1] 299.581

#unit increase in Y results in a 4% decrease in the odds land conversion

Visualization of Model(s) & Scenerios

Jitter Plots

Shows distribution between land converted to urban use (1) and land that remained unconverted (0).

High percentages of white populations correspond with predominately unconverted land.

jit <- ggplot(dat_chester1, aes(PCT_WHITE_, CHNG_URB))
jit + geom_point(shape = 21, size = 4, 
               fill=I("lawngreen"), 
                color=("lawngreen"), alpha=I(.2), position = "jitter") + theme(panel.background = element_blank()) + plotTheme

#CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA

Unsurprisingly, a lack of proximity to rail infrastructure corresponds with unconverted land.

jit2 <- ggplot(dat_chester1, aes(RAILSTN100, CHNG_URB))
jit2 + geom_point(shape = 21, size = 4, 
               fill=I("lawngreen"), 
                color=("lawngreen"), alpha=I(.2), position = "jitter") +        theme(panel.background = element_blank()) + plotTheme

Interestingly, distance from parks seems to have more variation than other land use indicators. While the variable observations are concentrated within a close proximity to park space and unconverted land, there is more spread within this distribution.

jit3 <- ggplot(dat_chester1, aes(DIST_PARKS, CHNG_URB))
jit3 + geom_point(shape = 21, size = 4, 
               fill=I("lawngreen"), 
                color=("lawngreen"), alpha=I(.2), position = "jitter") +        theme(panel.background = element_blank()) + plotTheme

Scenerio Plots

Using model 4, our best model, we are evaluated how distance from regional rail, parks, and percentage of white population impacts changes in land use. Most notably is the downward curve of each scenario, indicating that as percentage of white population increases the likelihood of conversion to urban uses decreases. Conversely, it is notable that the lower percentages of white populations correspond with a higher likelihood of conversion to urban land. The plot also indicates the importance of rail infrastructure as seen in the red scenario “Pred_NoRAIL_PARKMED_REGA3Q”. In the absence of being within 100 meters from a SEPTA regional rail station and being farther away from a rail station than 75% (3rd quartile) of the data, the likelihood of experiencing conversion to urban land is under 10%. And as white population increases, this likelihood decreases even further.

Interestingly, holding distance from SEPTA rail and regional rail constant, being closer to a park, increases the probability of conversion to urban land uses, as seen between the purple “Pred_YesRAIL_PARKMED_REGAMED” and blue “Pred_YesRAIL_PARK3Q_REGAMED” line.

##CHNG_URB ~ PCT_WHITE_ + RAILSTN100 + DIST_PARKS + DIST_REGRA

newdat_chester <- data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester) <- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester$RAILSTN100 <- 0 
newdat_chester$DIST_PARKS <- 4472 #MEDIAN 
newdat_chester$DIST_REGRA <-18000 #third quar 

newdat_chester1<-data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester1)<- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester1$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester1$RAILSTN100 <- 1 
newdat_chester1$DIST_PARKS <- 4472 #MEDIAN 
newdat_chester1$DIST_REGRA <-12176 #MEDIAN 

newdat_chester2<-data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester2)<- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester2$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester2$RAILSTN100 <- 1 
newdat_chester2$DIST_PARKS <- 7106 #THIRD QUAR
newdat_chester2$DIST_REGRA <-12176 #MEDIAN 

newdat_chester3<-data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(newdat_chester3)<- c("PCT_WHITE_", "RAILSTN100", "DIST_PARKS", "DIST_REGRA")
newdat_chester3$PCT_WHITE_ <- dat_chester1$PCT_WHITE_
newdat_chester3$RAILSTN100 <- 1 
newdat_chester3$DIST_PARKS <- 7106 #THIRD QUAR
newdat_chester3$DIST_REGRA <-18000 #THIRD QUAR 

pred_dat<- data.frame(matrix(ncol = 3, nrow = nrow(dat_chester1)))
colnames(pred_dat)<- c("PCT_WHITE_", "Pred_NoRAIL_PARKMED_REGA3Q", "Pred_YesRAIL_PARKMED_REGAMED")
pred_dat$PCT_WHITE_<-dat_chester1$PCT_WHITE_
pred_dat$Pred_NoRAIL_PARKMED_REGA3Q<- predict(mod_chester4, newdat_chester, type="response")
pred_dat$Pred_YesRAIL_PARKMED_REGAMED<- predict(mod_chester4, newdat_chester1, type="response")

pred_dat<- data.frame(matrix(ncol = 4, nrow = nrow(dat_chester1)))
colnames(pred_dat)<- c("PCT_WHITE_", "Pred_NoRAIL_PARKMED_REGA3Q", "Pred_YesRAIL_PARKMED_REGAMED", "Pred_YesRAIL_PARK3Q_REGAMED")
pred_dat$PCT_WHITE_ <-dat_chester1$PCT_WHITE_
pred_dat$Pred_NoRAIL_PARKMED_REGA3Q<- predict(mod_chester4, newdat_chester, type="response")
pred_dat$Pred_YesRAIL_PARKMED_REGAMED<- predict(mod_chester4, newdat_chester1, type="response")
pred_dat$Pred_YesRAIL_PARK3Q_REGAMED<- predict(mod_chester4, newdat_chester2, type="response")

pred_dat<- data.frame(matrix(ncol = 5, nrow = nrow(dat_chester1)))
colnames(pred_dat)<- c("PCT_WHITE_", "Pred_NoRAIL_PARKMED_REGA3Q", "Pred_YesRAIL_PARKMED_REGAMED", "Pred_YesRAIL_PARK3Q_REGAMED", "Pred_YesRAIL_PARK3Q_REGA3Q")
pred_dat$PCT_WHITE_ <-dat_chester1$PCT_WHITE_
pred_dat$Pred_NoRAIL_PARKMED_REGA3Q<- predict(mod_chester4, newdat_chester, type="response")
pred_dat$Pred_YesRAIL_PARKMED_REGAMED<- predict(mod_chester4, newdat_chester1, type="response")
pred_dat$Pred_YesRAIL_PARK3Q_REGAMED<- predict(mod_chester4, newdat_chester2, type="response")
pred_dat$Pred_YesRAIL_PARK3Q_REGA3Q<- predict(mod_chester4, newdat_chester3, type="response")

dat_gg <- gather(pred_dat, -PCT_WHITE_, key = "Scenario", value = "value")

ggplot(dat_gg, aes(x = PCT_WHITE_, y = value, colour = Scenario)) + 
  geom_line() + ylim(0,1) +
  xlab("Percentage of White Population") + ylab("Predicted Probability of Conversion to Urban Land") + plotTheme

CPLN 505: Assignment 3 Part A

Emily Goldstein

22 April 2022

Summary of Findings

Part A: Modeling Land Use Change in Chester Co. PA

Part A.1 - Reading the Data & Running Descriptive Statistics

Descriptive Statistics

Part A.2 - Creating Data Frame with Binary Variable

Part A.3 - Binomial Logit Model of Determints for Land Use Change

Selecting the Leanest & Meanest Model

Odds Ratio Calculation

Visualization of Model(s) & Scenerios

Jitter Plots

Scenerio Plots