## Project 1 ##
# 1. Use Census API to get the census tract-level median household income, Hispanic population, Non-Hispanic African American population, male, female, total population, median age of the county to put them in a dataframe (4'). ##
library(tidycensus)
census_api_key("bd2af251ff3d1ee389e731f6d917f605cb300fdb", overwrite = "TRUE") 
## To install your API key for use in future sessions, run this function with `install = TRUE`.
var=c('B03002_012E','B03002_004E','B05003_002E', 'B05003_013E','B01001_001E', 'B01002I_001E', 'B19013I_001E') 
#'B03002_012E   Estimate!!Total:!!Hispanic or Latino:   HISPANIC OR LATINO ORIGIN BY RACE'
#'B03002_004E   Estimate!!Total:!!Not Hispanic or Latino:!!Black or African American alone  HISPANIC OR LATINO ORIGIN BY RACE'
#'B05003_002E Estimate!!Total:!!Male:
#'B05003_013E Estimate!!Total:!!Female:
#'B01001_001E Total Population
#'B01002I_001E Estimate!!Median age --!!Total
#'B19013I_001E Estimate!!Median household income

Miami_segregation <- get_acs(geography = "tract", variables = var, county = "Miami-Dade",
                              state = "FL",output="wide", geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |=                                                                     |   1%
  |                                                                            
  |==                                                                    |   3%
  |                                                                            
  |===                                                                   |   4%
  |                                                                            
  |====                                                                  |   5%
  |                                                                            
  |====                                                                  |   6%
  |                                                                            
  |=====                                                                 |   7%
  |                                                                            
  |======                                                                |   8%
  |                                                                            
  |======                                                                |   9%
  |                                                                            
  |=======                                                               |  10%
  |                                                                            
  |========                                                              |  11%
  |                                                                            
  |========                                                              |  12%
  |                                                                            
  |=========                                                             |  13%
  |                                                                            
  |==========                                                            |  14%
  |                                                                            
  |==========                                                            |  15%
  |                                                                            
  |===========                                                           |  16%
  |                                                                            
  |============                                                          |  17%
  |                                                                            
  |============                                                          |  18%
  |                                                                            
  |=============                                                         |  19%
  |                                                                            
  |==============                                                        |  20%
  |                                                                            
  |===============                                                       |  21%
  |                                                                            
  |===================                                                   |  27%
  |                                                                            
  |=====================                                                 |  30%
  |                                                                            
  |========================                                              |  34%
  |                                                                            
  |=========================                                             |  36%
  |                                                                            
  |==========================                                            |  37%
  |                                                                            
  |==========================                                            |  38%
  |                                                                            
  |===========================                                           |  39%
  |                                                                            
  |============================                                          |  40%
  |                                                                            
  |=============================                                         |  41%
  |                                                                            
  |=============================                                         |  42%
  |                                                                            
  |==============================                                        |  42%
  |                                                                            
  |==============================                                        |  43%
  |                                                                            
  |===============================                                       |  44%
  |                                                                            
  |================================                                      |  45%
  |                                                                            
  |================================                                      |  46%
  |                                                                            
  |=================================                                     |  47%
  |                                                                            
  |==================================                                    |  48%
  |                                                                            
  |==================================                                    |  49%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |====================================                                  |  51%
  |                                                                            
  |====================================                                  |  52%
  |                                                                            
  |=====================================                                 |  53%
  |                                                                            
  |======================================                                |  54%
  |                                                                            
  |======================================                                |  55%
  |                                                                            
  |=======================================                               |  56%
  |                                                                            
  |========================================                              |  57%
  |                                                                            
  |=========================================                             |  58%
  |                                                                            
  |=========================================                             |  59%
  |                                                                            
  |==========================================                            |  60%
  |                                                                            
  |===========================================                           |  61%
  |                                                                            
  |===========================================                           |  62%
  |                                                                            
  |============================================                          |  62%
  |                                                                            
  |============================================                          |  63%
  |                                                                            
  |=============================================                         |  64%
  |                                                                            
  |=============================================                         |  65%
  |                                                                            
  |==============================================                        |  65%
  |                                                                            
  |==============================================                        |  66%
  |                                                                            
  |===============================================                       |  67%
  |                                                                            
  |==================================================                    |  72%
  |                                                                            
  |===================================================                   |  73%
  |                                                                            
  |====================================================                  |  74%
  |                                                                            
  |====================================================                  |  75%
  |                                                                            
  |=====================================================                 |  76%
  |                                                                            
  |======================================================                |  77%
  |                                                                            
  |=======================================================               |  78%
  |                                                                            
  |=======================================================               |  79%
  |                                                                            
  |========================================================              |  79%
  |                                                                            
  |========================================================              |  80%
  |                                                                            
  |=========================================================             |  81%
  |                                                                            
  |=========================================================             |  82%
  |                                                                            
  |==========================================================            |  82%
  |                                                                            
  |==========================================================            |  83%
  |                                                                            
  |===========================================================           |  84%
  |                                                                            
  |===========================================================           |  85%
  |                                                                            
  |============================================================          |  85%
  |                                                                            
  |============================================================          |  86%
  |                                                                            
  |=============================================================         |  87%
  |                                                                            
  |=============================================================         |  88%
  |                                                                            
  |==============================================================        |  88%
  |                                                                            
  |==============================================================        |  89%
  |                                                                            
  |===============================================================       |  90%
  |                                                                            
  |================================================================      |  91%
  |                                                                            
  |================================================================      |  92%
  |                                                                            
  |=================================================================     |  93%
  |                                                                            
  |==================================================================    |  94%
  |                                                                            
  |==================================================================    |  95%
  |                                                                            
  |===================================================================   |  96%
  |                                                                            
  |====================================================================  |  97%
  |                                                                            
  |===================================================================== |  98%
  |                                                                            
  |===================================================================== |  99%
  |                                                                            
  |======================================================================|  99%
  |                                                                            
  |======================================================================| 100%
# 2. Remove the MOE columns and rename all the variables (2').
names(Miami_segregation)[3] <- 'Hispanic'
names(Miami_segregation)[5] <- 'Black or African American'
names(Miami_segregation)[7] <- 'Male'
names(Miami_segregation)[9] <- 'Female'
names(Miami_segregation)[11] <- 'TotalPop'
names(Miami_segregation)[13] <- 'MedianAge'
names(Miami_segregation)[15] <- 'MedianHouseholdIncome'

Miami_segregation$B03002_012M <- NULL
Miami_segregation$B03002_004M <- NULL
Miami_segregation$B05003_002M <- NULL
Miami_segregation$B05003_013M <- NULL
Miami_segregation$B01001_001M <- NULL
Miami_segregation$B01002I_001M <- NULL
Miami_segregation$B19013I_001M <- NULL

# 3. Save your data to CSV file (1').
write.csv(Miami_segregation, "/Users/gabbyrodriguez/Miami_segregation.csv")

# 4. Make a scatter plot to visualize the association between median household income and percentage of Non-Hispanic African American (2').
Miami_segregation$pct_BAA <- 100*Miami_segregation$'Black or African American'/Miami_segregation$TotalPop

library(ggplot2)
ggplot(data=Miami_segregation, aes(x = MedianHouseholdIncome, y = pct_BAA)) + geom_point()
## Warning: Removed 55 rows containing missing values (`geom_point()`).

# 5. Make a histogram to visualize the age distribution of the county (2')
qplot(MedianAge, data = Miami_segregation, geom = "histogram")
## Warning: `qplot()` was deprecated in ggplot2 3.4.0.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 9 rows containing non-finite values (`stat_bin()`).

# 6. Make a PDF (probability density function) chart to show the distribution of median household income (2').
ggplot(data = Miami_segregation) +
  geom_density(mapping = aes(x = MedianHouseholdIncome))
## Warning: Removed 55 rows containing non-finite values (`stat_density()`).

# 7. Make a  CDF (cumulative density function) chart to show the distribution of median household income (2').
ggplot(data=Miami_segregation, aes(x = MedianHouseholdIncome))+
  geom_step(stat = "ecdf")
## Warning: Removed 55 rows containing non-finite values (`stat_ecdf()`).

# 8. Make a boxplot to visualize the median household income (2').
qplot(MedianHouseholdIncome, data = Miami_segregation, geom = "boxplot")
## Warning: Removed 55 rows containing non-finite values (`stat_boxplot()`).

# 9. Make a map to show the spatial distribution of percentage of Hispanic population (2').
library(tmap)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
##      (status 2 uses the sf package in place of rgdal)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
Miami_segregation$pct_Hisp <- 100*Miami_segregation$Hispanic/Miami_segregation$TotalPop
Miami_segregation <- Miami_segregation[-707,]
tm_shape(Miami_segregation) +tm_fill(col = "pct_Hisp")+ tm_layout(title = "Hispanic Percent")

# 10. Calculate and map the difference between female and male population to show what census tract has more female population (2').
Miami_segregation$diff_MF <- Miami_segregation$Male - Miami_segregation$Female
tm_shape(Miami_segregation) +tm_fill(col = "diff_MF")+ tm_layout(title = "Difference of Male and Female")
## Variable(s) "diff_MF" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.

# 11. Find the population of the county (or the major city within the county) from 2010 to 2023, and predict the population for the next five years (2024-2028) (2').
x <- c(2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023) #year
y <- c(5.52, 5.58, 5.64, 5.71, 5.77, 5.84, 5.90, 6.00, 6.04, 6.08, 6.12, 6.17, 6.22, 6.27) #millions

new.x <- c(2024, 2025, 2026, 2027, 2028)
new.df <- data.frame(x=new.x)

poly.lm1 <- lm(y ~ poly(x, 1))

new.y1 <- predict(poly.lm1, newdata=new.df)