## Project 1 ##
# 1. Use Census API to get the census tract-level median household income, Hispanic population, Non-Hispanic African American population, male, female, total population, median age of the county to put them in a dataframe (4'). ##
library(tidycensus)
census_api_key("bd2af251ff3d1ee389e731f6d917f605cb300fdb", overwrite = "TRUE")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
var=c('B03002_012E','B03002_004E','B05003_002E', 'B05003_013E','B01001_001E', 'B01002I_001E', 'B19013I_001E')
#'B03002_012E Estimate!!Total:!!Hispanic or Latino: HISPANIC OR LATINO ORIGIN BY RACE'
#'B03002_004E Estimate!!Total:!!Not Hispanic or Latino:!!Black or African American alone HISPANIC OR LATINO ORIGIN BY RACE'
#'B05003_002E Estimate!!Total:!!Male:
#'B05003_013E Estimate!!Total:!!Female:
#'B01001_001E Total Population
#'B01002I_001E Estimate!!Median age --!!Total
#'B19013I_001E Estimate!!Median household income
Miami_segregation <- get_acs(geography = "tract", variables = var, county = "Miami-Dade",
state = "FL",output="wide", geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|==== | 5%
|
|==== | 6%
|
|===== | 7%
|
|====== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============ | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=================== | 27%
|
|===================== | 30%
|
|======================== | 34%
|
|========================= | 36%
|
|========================== | 37%
|
|========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 42%
|
|============================== | 43%
|
|=============================== | 44%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================= | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|======================================================= | 78%
|
|======================================================= | 79%
|
|======================================================== | 79%
|
|======================================================== | 80%
|
|========================================================= | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 85%
|
|============================================================ | 86%
|
|============================================================= | 87%
|
|============================================================= | 88%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 99%
|
|======================================================================| 100%
# 2. Remove the MOE columns and rename all the variables (2').
names(Miami_segregation)[3] <- 'Hispanic'
names(Miami_segregation)[5] <- 'Black or African American'
names(Miami_segregation)[7] <- 'Male'
names(Miami_segregation)[9] <- 'Female'
names(Miami_segregation)[11] <- 'TotalPop'
names(Miami_segregation)[13] <- 'MedianAge'
names(Miami_segregation)[15] <- 'MedianHouseholdIncome'
Miami_segregation$B03002_012M <- NULL
Miami_segregation$B03002_004M <- NULL
Miami_segregation$B05003_002M <- NULL
Miami_segregation$B05003_013M <- NULL
Miami_segregation$B01001_001M <- NULL
Miami_segregation$B01002I_001M <- NULL
Miami_segregation$B19013I_001M <- NULL
# 3. Save your data to CSV file (1').
write.csv(Miami_segregation, "/Users/gabbyrodriguez/Miami_segregation.csv")
# 4. Make a scatter plot to visualize the association between median household income and percentage of Non-Hispanic African American (2').
Miami_segregation$pct_BAA <- 100*Miami_segregation$'Black or African American'/Miami_segregation$TotalPop
library(ggplot2)
ggplot(data=Miami_segregation, aes(x = MedianHouseholdIncome, y = pct_BAA)) + geom_point()
## Warning: Removed 55 rows containing missing values (`geom_point()`).
# 5. Make a histogram to visualize the age distribution of the county (2')
qplot(MedianAge, data = Miami_segregation, geom = "histogram")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9 rows containing non-finite values (`stat_bin()`).
# 6. Make a PDF (probability density function) chart to show the distribution of median household income (2').
ggplot(data = Miami_segregation) +
geom_density(mapping = aes(x = MedianHouseholdIncome))
## Warning: Removed 55 rows containing non-finite values (`stat_density()`).
# 7. Make a CDF (cumulative density function) chart to show the distribution of median household income (2').
ggplot(data=Miami_segregation, aes(x = MedianHouseholdIncome))+
geom_step(stat = "ecdf")
## Warning: Removed 55 rows containing non-finite values (`stat_ecdf()`).
# 8. Make a boxplot to visualize the median household income (2').
qplot(MedianHouseholdIncome, data = Miami_segregation, geom = "boxplot")
## Warning: Removed 55 rows containing non-finite values (`stat_boxplot()`).
# 9. Make a map to show the spatial distribution of percentage of Hispanic population (2').
library(tmap)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
Miami_segregation$pct_Hisp <- 100*Miami_segregation$Hispanic/Miami_segregation$TotalPop
Miami_segregation <- Miami_segregation[-707,]
tm_shape(Miami_segregation) +tm_fill(col = "pct_Hisp")+ tm_layout(title = "Hispanic Percent")
# 10. Calculate and map the difference between female and male population to show what census tract has more female population (2').
Miami_segregation$diff_MF <- Miami_segregation$Male - Miami_segregation$Female
tm_shape(Miami_segregation) +tm_fill(col = "diff_MF")+ tm_layout(title = "Difference of Male and Female")
## Variable(s) "diff_MF" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
# 11. Find the population of the county (or the major city within the county) from 2010 to 2023, and predict the population for the next five years (2024-2028) (2').
x <- c(2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023) #year
y <- c(5.52, 5.58, 5.64, 5.71, 5.77, 5.84, 5.90, 6.00, 6.04, 6.08, 6.12, 6.17, 6.22, 6.27) #millions
new.x <- c(2024, 2025, 2026, 2027, 2028)
new.df <- data.frame(x=new.x)
poly.lm1 <- lm(y ~ poly(x, 1))
new.y1 <- predict(poly.lm1, newdata=new.df)