Final Project Code for IS310
Group 2
Group Members: Linda Schnabel, Eric Hibbs, Daphne Giffin, Amy
Zhang
# Spatial Data Mapping
##install.packages(c("ggplot2", "sf"))
library(ggplot2)
library(sf)
# Specify the path to the shapefile folder (without file extension)
shapefile_folder <- "PittHoods"
# Read the shapefile using st_read
shapefile_sf <- st_read(dsn = shapefile_folder)
Reading layer `Neighborhoods_' from data source `/Users/amyzhang/Downloads/amyFile/ISHW/IS310_final/PittHoods' using driver `ESRI Shapefile'
Simple feature collection with 90 features and 39 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -80.09533 ymin: 40.36152 xmax: -79.86585 ymax: 40.50104
Geodetic CRS: WGS 84
#print(shapefile_sf)
# Plot w ggplot2
ggplot() +
geom_sf(data = shapefile_sf) +
theme_minimal()

ZOOM IN ON HILL DISTRICT
# ZOOM IN ON HILL DISTRICT
library(ggplot2)
library(sf)
# Specify the path to the shapefile folder (without file extension)
Pitt_sf_folder <- "PittHoods"
# Read the shapefile using st_read
shapefile_sf <- st_read(dsn = shapefile_folder)
Reading layer `Neighborhoods_' from data source `/Users/amyzhang/Downloads/amyFile/ISHW/IS310_final/PittHoods' using driver `ESRI Shapefile'
Simple feature collection with 90 features and 39 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -80.09533 ymin: 40.36152 xmax: -79.86585 ymax: 40.50104
Geodetic CRS: WGS 84
#print(shapefile_sf)
hoods_of_interest <- c("Crawford-Roberts", "Upper Hill", "Middle Hill", "Bedford Dwellings","Terrace Village")
# Filter the shapefile to include the hoods that make up Hill Dist.
subset_sf <- shapefile_sf[shapefile_sf$hood %in% hoods_of_interest, ]
# Display using ggplot2
ggplot() +
geom_sf(data = subset_sf) +
theme_minimal()

First, filtered data to Hill District census tracts
df<- read.csv("food_access_research_atlas.csv")
##library("lpsolve")
library("dplyr")
library("plotly")
head(df)
##filtering out data by Pennsylvania
penndata <-filter(df,State == "Pennsylvania")
##filter out by Allegheny
alleghenydata <- filter(df, County == "Allegheny")
## allegheny data
head(alleghenydata)
##scatter plot of reality, x= lapop1(low access pop at 1 mile or more), y = share
##actualx <- data$lapop1
##actualy <-data$lapop1share
## ideal scatterplot x = lapop1 when 500 or 33% of pop, y = share 33%
hilldata <- filter(alleghenydata, CensusTract == 42003030500)
for (row in 1:nrow(alleghenydata)){
if(alleghenydata[row,"CensusTract"] == 42003050100)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003050600)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003050900)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003051000)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003051100)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
}
#42003050100 ||42003050900 |42003051000 | 42003051100
#42003030500, 42003050100,42003050600,42003050900,42003051000,42003051100)
head(hilldata)
#as of 2010 the area comprises Census Tracts 305 (Crawford Roberts, "Lower Hill"), 501 (Crawford-Roberts, "Middle Hill"), 506 (Upper Hill), 509 (Bedford Dwellings), 510 and 511 (Terrace Village)
##function for finding ideal
## first you take an actual share, than you subtract 33 from it to see by how much its over, lapop1share - 33%
## calculate how much that would be in people,
##then subtract that amount from lapop1 to know how many people there are at <33%
idealxvalues <- c()
thresholdcalculator <- function(xvalues){
for (row in 1:nrow(hilldata)){
x <- hilldata[row,"POP2010"] *.33
if(x < 500){
x <- hilldata[row,"POP2010"] *.33}
else {x <- 500}
xvalues<- c(xvalues,x)}
return(xvalues)
}
thresholdx <- thresholdcalculator(idealxvalues)
for(element in thresholdx)
{
print(element)
}
[1] 500
[1] 500
[1] 500
[1] 396.66
[1] 500
[1] 370.26
##ROWS 396 and 400 have 0 population, pero they arent hill so no problem
#vector to store shares
thresholdpopshare <- c()
thresholdsharecalc <- function(xthreshshare,xvals)
{
for (row in 1:nrow(hilldata))
{
i<- 1
x <- xvals[i]/hilldata[row,"POP2010"]
xthreshshare <- c(xthreshshare,x)
i<- i + 1
}
return(xthreshshare)
}
thresholdpop <- thresholdcalculator(idealxvalues)
thresholdpopshare <- thresholdsharecalc(thresholdpopshare,thresholdpop)
lapophalf <- hilldata$lapophalf
lapophalfshare <- hilldata$lapophalfshare
for(element in thresholdpopshare)
{
print(element)
}
[1] 0.2216312
[1] 0.2929115
[1] 0.2430724
[1] 0.4159734
[1] 0.2374169
[1] 0.4456328
head(hilldata)
#PLOTLY EXAMPLE
#CHANGE CENSUS TRACTS TO STRINGS Y WE ARE SET
ctnames <- c()
for(element in hilldata$CensusTract)
{
ctnames <- c(ctnames,toString(element))
}
lapopdata <- data.frame(ctnames,lapophalf,thresholdpop)
popbargraph <- plot_ly(lapopdata, x = ~ctnames, y = ~lapophalf, type = 'bar', name = 'low access at half mile')
popbargraph <- popbargraph %>% add_trace(y = ~thresholdpop, name = 'threshold pop')
popbargraph <- popbargraph %>% layout(yaxis = list(title = 'population'), barmode = 'group')
popbargraph
#poverty rate
twenties <- c(20,20,20,20,20,20)
poppovertyrate <- hilldata$PovertyRate
povertyrates <- data.frame(ctnames,poppovertyrate,twenties)
povertybargraph <- plot_ly(povertyrates, x = ~ctnames, y = ~poppovertyrate, type = 'bar', name = 'census poverty rate')
povertybargraph <- povertybargraph %>% add_trace(y = ~twenties, name = 'threshold for low income tract')
povertybargraph <- povertybargraph %>% layout(yaxis = list(title = 'population'), barmode = 'group')
povertybargraph
#TEAM REFERENCE
library("dplyr")
##filtering out data by Pennsylvania
filter(df,State == "Pennsylvania")
##filter out by Allegheny
alleghenydata <- filter(df, County == "Allegheny")
hilldata <- filter(alleghenydata, CensusTract == 42003030500)
for (row in 1:nrow(alleghenydata)){
if(alleghenydata[row,"CensusTract"] == 42003050100)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003050600)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003050900)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003051000)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
else if(alleghenydata[row,"CensusTract"] == 42003051100)
{
hilldata <- rbind(hilldata, alleghenydata[row,])
}
}
head(hilldata)
FIND STANDARDIZED FOOD INSECURITY SCORES FOR 2019
# FIND STANDARDIZED FOOD INSECURITY SCORES FOR 2019
FARA_2019 <- read.csv("food_access_research_atlas.csv")
# Filter down to Hill District census tracts for 2019
HillDist_census_tracts <- c(42003030500, 42003050100, 42003050600, 42003050900, 42003051000, 42003051100)
HD_FARA2019 <- FARA_2019[FARA_2019$CensusTract %in% HillDist_census_tracts, ]
# Define columns for the overall score
columns <- c("lapophalf", "lalowihalf")
# Standardize each column
standardized_columns <- scale(HD_FARA2019[, columns])
# Create overall score as the sum of standardized values
HD_FARA2019$overall_FI_score <- rowSums(standardized_columns)
# Create a new dataframe with lapophalf and lalowihalf columns and their and overall scores for each tract
results_2019_df <- data.frame(
CensusTract = HD_FARA2019$CensusTract,
lapophalf = HD_FARA2019$lapophalf,
lalowihalf = HD_FARA2019$lalowihalf,
overall_FI_score = HD_FARA2019$overall_FI_score
)
# Print the resulting dataframe
print(results_2019_df)
#import to latex
library(xtable)
table_latex <- xtable(results_2019_df, caption = "Food Insecurity Scores for 2019 in Hill District", label = "tab:food_insecurity_2019_hd")
print.xtable(table_latex, type = "latex", file = "METHODStable_2019_hd.tex")
FIND STANDARDIZED FOOD INSECURITY SCORES FOR 2010
# FIND STANDARDIZED FOOD INSECURITY SCORES FOR 2010
FARA_2010 <- read.csv("FoodAccessResearchAtlasData2010.csv")
# Filter down to Hill District census tracts for 2019
HillDist_census_tracts <- c(42003030500, 42003050100, 42003050600, 42003050900, 42003051000, 42003051100)
HD_FARA2010 <- FARA_2010[FARA_2010$CensusTract %in% HillDist_census_tracts, ]
# Define columns for the overall score
columns <- c("lapophalf", "lalowihalf")
# Standardize each column
standardized_columns <- scale(HD_FARA2010[, columns])
# Create overall score as the sum of standardized values
HD_FARA2010$overall_score <- rowSums(standardized_columns)
# Create a new dataframe with relevant columns and overall score
result_df <- data.frame(
CensusTract = HD_FARA2010$CensusTract,
lapophalf = HD_FARA2010$lapophalf,
lalowihalf = HD_FARA2010$lalowihalf,
overall_score = HD_FARA2010$overall_score
)
# Print the resulting dataframe
print(result_df)
#import to latex
library(xtable)
table_latex <- xtable(result_df, caption = "Food Insecurity Scores for 2010 in Hill District", label = "tab:food_insecurity_2010_hd")
print.xtable(table_latex, type = "latex", file = "methodstable2010.tex")
quick visualization for 2010
# Let's make a quick visualization for 2010
FARA_2010 <- read.csv("FoodAccessResearchAtlasData2010.csv")
# Filter down to Hill District census tracts for 2010
HillDist_census_tracts <- c(42003030500, 42003050100, 42003050600, 42003050900, 42003051000, 42003051100)
HD_FARA2010 <- FARA_2010[FARA_2010$CensusTract %in% HillDist_census_tracts, ]
# Define columns for the overall score
columns <- c("lapophalf", "lalowihalf")
# Standardize each column
standardized_columns <- scale(HD_FARA2010[, columns])
# Create overall Food Access score as the sum of standardized values
HD_FARA2010$overall_score <- rowSums(standardized_columns)
# Create a scatter plot with ggplot2
library(ggplot2)
ggplot(HD_FARA2010, aes(x = lapophalf, y = lalowihalf, color = overall_score)) +
geom_point() +
scale_color_gradient(low = "blue", high = "red") +
labs(title = "Food Accessibility in 2010 - Hill District Census Tracts",
x = "Population Beyond 1/2 Mile from Supermarket",
y = "Low-Income Population Beyond 1/2 Mile from Supermarket",
color = "Overall Score") +
theme_minimal()

#2010 bar graph of normalized change
# Load required libraries
library(ggplot2)
# Filter down to Hill District census tracts for 2010
HillDist_census_tracts <- c(42003030500, 42003050100, 42003050600, 42003050900, 42003051000, 42003051100)
HD_FARA2010 <- FARA_2010[FARA_2010$CensusTract %in% HillDist_census_tracts, ]
# Define columns for the overall score
columns <- c("lapophalf", "lalowihalf")
# Standardize each column
standardized_columns_2010 <- scale(HD_FARA2010[, columns])
# Create overall Food Access score as the sum of standardized values
HD_FARA2010$overall_score <- rowSums(standardized_columns_2010)
# Normalize the overall score
normalized_score_2010 <- (HD_FARA2010$overall_score - min(HD_FARA2010$overall_score)) / (max(HD_FARA2010$overall_score) - min(HD_FARA2010$overall_score))
# Add NormalizedScore to the dataframe
HD_FARA2010$NormalizedScore <- normalized_score_2010
# Create a bar plot
ggplot(HD_FARA2010, aes(x = as.factor(CensusTract), y = NormalizedScore)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Hill District Census Tract", y = "Normalized Overall Change", title = "Normalized Overall Change in Hill District Census Tracts for 2010") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

Now for 2019- plots for normalized change and scores
# Now for 2019
FARA_2019 <- read.csv("food_access_research_atlas.csv")
# Filter down to Hill District census tracts for 2019
HillDist_census_tracts <- c(42003030500, 42003050100, 42003050600, 42003050900, 42003051000, 42003051100)
HD_FARA2019 <- FARA_2019[FARA_2019$CensusTract %in% HillDist_census_tracts, ]
# Define columns for the overall score
columns <- c("lapophalf", "lalowihalf")
# Standardize each column
standardized_columns_2019 <- scale(HD_FARA2019[, columns])
# Create overall score as the sum of standardized values
HD_FARA2019$overall_FI_score <- rowSums(standardized_columns_2019)
# Create a scatter plot with ggplot2
library(ggplot2)
ggplot(HD_FARA2019, aes(x = lapophalf, y = lalowihalf, color = overall_FI_score)) +
geom_point() +
scale_color_gradient(low = "blue", high = "red") +
labs(title = "Food Accessibility in 2019 - Hill District Census Tracts",
x = "Population Beyond 1/2 Mile from Supermarket",
y = "Low-Income Population Beyond 1/2 Mile from Supermarket",
color = "Overall Score") +
theme_minimal()

#bar graph for 2019
# Load required libraries
library(ggplot2)
# Filter down to Hill District census tracts for 2019
HillDist_census_tracts <- c(42003030500, 42003050100, 42003050600, 42003050900, 42003051000, 42003051100)
HD_FARA2019 <- FARA_2019[FARA_2019$CensusTract %in% HillDist_census_tracts, ]
# Define columns for the overall score
columns <- c("lapophalf", "lalowihalf")
# Standardize each column
standardized_columns_2019 <- scale(HD_FARA2019[, columns])
# Create overall score as the sum of standardized values
HD_FARA2019$overall_FI_score <- rowSums(standardized_columns_2019)
# Normalize the overall score
normalized_score <- (HD_FARA2019$overall_FI_score - min(HD_FARA2019$overall_FI_score)) / (max(HD_FARA2019$overall_FI_score) - min(HD_FARA2019$overall_FI_score))
# Add NormalizedScore to the dataframe
HD_FARA2019$NormalizedScore <- normalized_score
# Create a bar plot
ggplot(HD_FARA2019, aes(x = as.factor(CensusTract), y = NormalizedScore)) +
geom_bar(stat = "identity", fill = "blue") +
labs(x = "Hill District Census Tract", y = "Normalized Overall Change", title = "Normalized Overall Change in Hill District Census Tracts for 2019") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

install.packages(c("igraph", "dplyr"))
Error in install.packages : Updating loaded packages
library(igraph)
library(dplyr)
# Create a data frame with Census Tract and overall score information
data <- data.frame(
CensusTract = c(42003030500, 42003050100, 42003050600, 42003050900, 42003051000, 42003051100),
overall_score_2019 = c(-2.0894987, -0.1954796, 0.4843697, 0.6179349, 3.2418969, -2.0592232),
overall_score_2010 = c(-1.22912461, -0.67851833, 0.09874682, -2.32037971, 3.35460811, 0.77466771)
)
# Create a graph based on overall scores above a certain threshold
threshold <- 0 # You can adjust the threshold as needed
# Create a graph from the data
graph_data <- data %>%
filter(overall_score_2019 > threshold) %>%
select(from = CensusTract, to = overall_score_2010)
# Create a graph object
graph <- graph_from_data_frame(graph_data, directed = FALSE)
# Plot the graph
plot(graph, main = "Graph of Census Tracts based on Overall Scores")

## Edges in the graph represent relationships between Census Tracts, where a relationship is established if the overall score in 2019 is above the specified threshold, provided in exploratory data analysis
