library(tidycensus)
library(ggplot2)

#1. Using your census API key, capture the median age of males (P013002) for all census tracts from the 2010 decennial census database in Texas.
census_api_key("6f85b9f82dda6507d580cd15d7eb8b399f4db621",overwrite = "TRUE")
## To install your API key for use in future sessions, run this function with `install = TRUE`.
census_var <- load_variables(2021, 'acs5', cache = TRUE)

age10 <- get_decennial(geography = "tract", state = "TX",
                       variables = "P013002",
                       year = 2010)
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
#2. Export the data collected in step 2 to a .xlsx file. (using function ‘write_xlsx’)
library(writexl)
write_xlsx(x = age10, path = "D:/Documents/Academics/UTSA/2_Urban Planning Methods I/Assignments/Male_TexasTract_MedianAge.xlsx", col_names = TRUE)

#3. Using your census API key, capture the median household income (B19013_001E) for all census tracts in Bexar County, TX, from the 2018 ACS database in Texas.
bc_medincome <- get_acs(geography = "tract", state = "TX", county = "Bexar County",
                       variables = "B19013_001E",
                       year = 2018)
## Getting data from the 2014-2018 5-year ACS
#4. Using your census API key, capture the Hispanic population (B03002_012E) for all counties in Texas from the 2018 ACS database.
tx_hispanicpop <- get_acs(geography = "county", state = "TX",
                                  variables = "B03002_012E",
                                  year = 2018)
## Getting data from the 2014-2018 5-year ACS
#5. Following step 5, rename the column of ‘estimate’ to ‘HispanicPop', and show all the column names of the dataframe.
names(tx_hispanicpop)[4] <- "HispanicPop"
names(tx_hispanicpop)
## [1] "GEOID"       "NAME"        "variable"    "HispanicPop" "moe"
#6. Make a boxplot to show the distribution of Hispanic population across TX counties.
ggplot(tx_hispanicpop, aes(x = HispanicPop)) + 
  geom_boxplot()

#7. Using your census API key, capture the population in poverty, for all census tracts in Bexar County, TX, from the 2018 ACS database in Texas.
bc_tracts_poppoverty <- get_acs(geography = "tract", state = "TX", county = "Bexar County",
                             variables = "B17017_002E",
                             year = 2018)
## Getting data from the 2014-2018 5-year ACS
#8. Using your census API key, capture the Hispanic population, White population, and Black Population, for all census tracts in Bexar County, TX, from the 2018 ACS database in Texas.

var <- c(poptotal='B03002_001E', 
         hispanic='B03002_012E',
         white='B03002_003E',
         black='B03002_004E',
         poptotal2='B17017_001E',
         poverty='B17017_002E') 

bc_pop <- get_acs(geography = "tract", variables = var, count="Bexar County",
               state = "TX",output="wide", year = 2018, geometry = FALSE)
## Getting data from the 2014-2018 5-year ACS
#9. Create a new column that combines poverty and race/ethnicity categories.
bc_pop$black_pct <-bc_pop$black/bc_pop$poptotal
bc_pop$white_pct <- bc_pop$white/bc_pop$poptotal
bc_pop$hispanic_pct <- bc_pop$hispanic/bc_pop$poptotal
bc_pop$poverty_pct <- bc_pop$poverty /bc_pop$poptotal2

bc_pop$Poor <- ifelse(bc_pop$poverty > 0.3, "Poor", "Nonpoor")

bc_pop$Race <- "Other"
bc_pop$Race[bc_pop$white_pct > 0.5] <- "White"
bc_pop$Race[bc_pop$black_pct > 0.5] <- "Black"
bc_pop$Race[bc_pop$hispanic_pct > 0.5] <- "Hispanic"

bc_pop$"race poverty" <- paste0(bc_pop$Poor, bc_pop$Race)


#10. Following step 9, make a bar plot to show the number of different categories
ggplot(bc_pop, aes(x = Poor, fill = `race poverty`)) +
  geom_bar()