library(tidyverse)
library(sf)
library(tigris)
options(tigris_class = "sf")
library(tmap)

#Question 1

  1. Calculate the Education, Housing, Health/Environment, Civic Life and overall ROI indices.
download.file(url = "https://raw.githubusercontent.com/crd150/data/master/assign8files.zip", destfile = "assign8files.zip")
unzip(zipfile = "assign8files.zip")

capitaltracts <- st_read("capitaltracts.shp", stringsAsFactors = FALSE)
## Reading layer `capitaltracts' from data source `/Users/sherigudez/Documents/CRD 150/Lab 8/capitaltracts.shp' using driver `ESRI Shapefile'
## Simple feature collection with 486 features and 39 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -122.422 ymin: 38.01842 xmax: -119.8772 ymax: 39.3165
## epsg (SRID):    NA
## proj4string:    +proj=longlat +ellps=GRS80 +no_defs
capitaltracts <- mutate(capitaltracts, Education= (edplc1+edplc2+edplc3+edplc4)/4, Housing= (hsplc1+hsplc2)/2, HealthEnv= (enplc1+enplc2+enplc3+enplc4)/4, Civic= (soplc1+soplc2)/2, Overall= (Education+Housing+HealthEnv+Civic)/4)
  1. Using the quintile-based breaks to categorize tracts as Lowest Opportunity, Low, Moderate, High, and Highest Opportunity that we used in the lab guide, map the Education, Housing, Health/Environment, Civic Life, and overall ROI indices for the Capital region. You should have five separate maps for the four domains and the overall index
capitaltracts <- mutate(capitaltracts, EducationQ= cut(Education, breaks= quantile(Education, c(0,0.2,0.4,0.6,0.8,1), na.rm= TRUE), labels= c("Lowest Opportunity", "Low", "Moderate", "High", "Highest Opportunity")), HealthEnvQ= cut(HealthEnv, breaks= quantile(HealthEnv, c(0,0.2,0.4,0.6,0.8,1), na.rm= TRUE), labels= c("Lowest Opportunity", "Low", "Moderate", "High", "Highest Opportunity")), HousingQ= cut (Housing, breaks= quantile(Housing, c(0,0.2,0.4,0.6,0.8,1), na.rm= TRUE), labels= c("Lowest Opportunity", "Low", "Moderate", "High", "Highest Opportunity")), CivicQ= cut(Civic, breaks= quantile(Civic, c(0,0.2,0.4,0.6,0.8,1), na.rm= TRUE), labels= c("Lowest Opportunity", "Low", "Moderate", "High", "Highest Opportunity")), OverallQ= cut(Overall, breaks= quantile(Overall, c(0,0.2,0.4,0.6,0.8,1), na.rm= TRUE), labels= c("Lowest Opportunity", "Low", "Moderate", "High", "Highest Opportunity")))
tm_shape(capitaltracts)+
  tm_polygons(col= "OverallQ", palette= "Reds", border.alpha = 0, title= "Overall Opportunity", midpoint= NA)

tm_shape(capitaltracts)+
  tm_polygons(col= "EducationQ", palette= "Greens", border.alpha = 0, title= "Education Opportunity", midpoint= NA)

tm_shape(capitaltracts)+
  tm_polygons(col= "HousingQ", palette= "Purples", border.alpha = 0, title= "Housing Opportunity", midpoint= NA)

tm_shape(capitaltracts)+
  tm_polygons(col= "CivicQ", palette= "Blues", border.alpha = 0, title= "Civic Opportunity", midpoint= NA)

tm_shape(capitaltracts)+
  tm_polygons(col= "HealthEnvQ", palette= "Greys", border.alpha = 0, title= "Health Opportunity", midpoint= NA)

  1. Map percent of residents under 18 year olds for the capital region using style = quantile. Based on a visual comparisons of the maps, do you think that youth are located in high overall opportunity places?
tm_shape(capitaltracts)+
  tm_polygons(col= "p18und", palette= "Purples", border.alpha = 0, title= "Children Population", midpoint= NA)

#Based on the maps I created, the highest population of children are found in areas with low to moderate Educational opportunity- with the highest concentration of children in an area with low educational opportunity. Likewise, the majority of the children ppulation can be found in areas with low to moderate Housing opportunity. For Civic Opportunity, areas with the most population of children can be found in areas with various levels of civic opportunity, but the higher concentrations canbe found in areas with the lowest civic opportunities. For Health Opportunity, half of the majority of the Capital's children reside in areas with high Health Opportuniy. But, the other half reside in areas with low to moderate Health Opportunity. The highest concentration of the Capital's children reside in an area with low Health Opportunity. Overall, children tend to reside in areas with low to moderate Overall Opportunity, with a few concentrations of them living in areas with the highest Overall Opportunity.
  1. Calculate the correlation between percent under 18 year olds and all of the indices (overall and the 5 domains). Summarize your results.
summarize(capitaltracts,
          educccorr= cor(Education, p18und, use= "complete.obs"),
          housecorr= cor(Housing, p18und, use= "complete.obs"),
          healthenvcorr= cor(HealthEnv, p18und, use= "complete.obs"),
          civiccorr= cor(Civic, p18und, use= "complete.obs"),
          roicorr= cor(Overall, p18und, use= "complete.obs"))
## Simple feature collection with 1 feature and 5 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -122.422 ymin: 38.01842 xmax: -119.8772 ymax: 39.3165
## epsg (SRID):    NA
## proj4string:    +proj=longlat +ellps=GRS80 +no_defs
##    educccorr housecorr healthenvcorr   civiccorr    roicorr
## 1 -0.1111788 0.2521986    -0.3557642 -0.02018818 -0.1002093
##                         geometry
## 1 POLYGON ((-121.8625 38.0679...
#The results from my correlational tests show that there is a low to nonexistent negative correlation for Educational Opportunity (-0.11_ and percent under 18 years old, a low to nonexistent positive correlation between Housing Opportunity (0.2) and percent under 18 years. This same low to nonexistent correlational values can be seen in Health Opportunity, Civic Opportunity, and Overall Opportunity. 
  1. Calculate the correlation between percent black, percent Asian, and percent Hispanic and the overall ROI index. Summarize your results.
summarize(capitaltracts,
          roiblk= cor(Overall, pblk, use= "complete.obs"),
          roiasn= cor(Overall, pasn, use= "complete.obs"),
          roishsp= cor(Overall, phisp, use= "complete.obs"))
## Simple feature collection with 1 feature and 3 fields
## geometry type:  POLYGON
## dimension:      XY
## bbox:           xmin: -122.422 ymin: 38.01842 xmax: -119.8772 ymax: 39.3165
## epsg (SRID):    NA
## proj4string:    +proj=longlat +ellps=GRS80 +no_defs
##       roiblk      roiasn    roishsp                       geometry
## 1 -0.4492105 -0.07883642 -0.4762791 POLYGON ((-121.8625 38.0679...
#Based on my correlational analysis on racial demographicss and Overall Opportunity in California's Capital Region, you can see that the association between Percent Black and Overall Opportunity is a low to low moderate negative correlation (-.45). For Percent Asian and Overall Opportunity, there is an extremely low or noneexistent negative correlation (-.08). For Percent Hispanic and Overall Opportunity (-.48), there is a low to low moderate negative correlation.  

#Question 2

  1. Scan the Task Force’s Opportunity Mapping Methodology and answer the following questions regarding how they constructed the index.
  1. The data contain the index values for all the domains (Economic: economicc, Environment: environc, Education educc) and the composite index (composite with the five categories category and labels label) for urban tracts in the Capital Region. Merge this file with the file used in Question 1 (capitaltracts) using the GEOID as the ID. Map the State’s overall opportunity index (category) using the appropriate labels.
state <- read_csv("https://raw.githubusercontent.com/crd150/data/master/lihtccapital.csv")
capitaltracts <- left_join(capitaltracts, state, by= "GEOID")
 tm_shape(capitaltracts) +
  tm_polygons("category",  palette = "Greens", breaks = c(0,1,2,3,4, 5), labels = c("High Segregation & Poverty", "Low Resource", "Moderate Resource", "High Resource", "Highest Resource"),
              border.alpha = 0, title = "State Opportunity", midpoint = NA)