options(repos = c(CRAN = "https://cloud.r-project.org"))

install.packages(c("tidycensus", "dplyr", "readxl", "writexl", "ggplot2"))
## 
## The downloaded binary packages are in
##  /var/folders/4b/1p0sp0rs33xg19wq8_5j5lm40000gn/T//RtmpwlY6P9/downloaded_packages
library(tidycensus)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(writexl)
library(ggplot2)

census_api_key("f4f4801feced14583355d7a62646514912610e1d", install = TRUE, overwrite = TRUE)
## Your original .Renviron will be backed up and stored in your R HOME directory if needed.
## Your API key has been stored in your .Renviron and can be accessed by Sys.getenv("CENSUS_API_KEY"). 
## To use now, restart R or run `readRenviron("~/.Renviron")`
## [1] "f4f4801feced14583355d7a62646514912610e1d"
# Median age of males for all census tracts in Texas from the 2010 decennial census
median_age_males <- get_decennial(
  geography = "tract",
  variables = "P013002",
  state = "TX",
  year = 2010,
  survey = "sf1"
)
## Getting data from the 2010 decennial Census
## Using Census Summary File 1
head(median_age_males)
## # A tibble: 6 × 4
##   GEOID       NAME                                     variable value
##   <chr>       <chr>                                    <chr>    <dbl>
## 1 48141000101 Census Tract 1.01, El Paso County, Texas P013002   34.8
## 2 48141000111 Census Tract 1.11, El Paso County, Texas P013002   31.6
## 3 48141000205 Census Tract 2.05, El Paso County, Texas P013002   28.2
## 4 48141000208 Census Tract 2.08, El Paso County, Texas P013002   26.3
## 5 48141000301 Census Tract 3.01, El Paso County, Texas P013002   27.6
## 6 48141000302 Census Tract 3.02, El Paso County, Texas P013002   29
# Export data to a .xlsx file
write_xlsx(median_age_males, "median_age_males_tx_2010.xlsx")

# Median household income for Bexar County, TX from the 2018 ACS
median_income_bexar <- get_acs(
  geography = "tract",
  variables = "B19013_001E",
  county = "Bexar",
  state = "TX",
  year = 2018
)
## Getting data from the 2014-2018 5-year ACS
head(median_income_bexar)
## # A tibble: 6 × 5
##   GEOID       NAME                                   variable   estimate   moe
##   <chr>       <chr>                                  <chr>         <dbl> <dbl>
## 1 48029110100 Census Tract 1101, Bexar County, Texas B19013_001    44050 13806
## 2 48029110300 Census Tract 1103, Bexar County, Texas B19013_001    34375 14338
## 3 48029110500 Census Tract 1105, Bexar County, Texas B19013_001    11360  2396
## 4 48029110600 Census Tract 1106, Bexar County, Texas B19013_001    14547  4007
## 5 48029110700 Census Tract 1107, Bexar County, Texas B19013_001    14891  5218
## 6 48029110800 Census Tract 1108, Bexar County, Texas B19013_001    29345  4495
# Hispanic population for all counties in Texas from the 2018 ACS
hispanic_population_tx <- get_acs(
  geography = "county",
  variables = "B03002_012E",
  state = "TX",
  year = 2018
)
## Getting data from the 2014-2018 5-year ACS
head(hispanic_population_tx)
## # A tibble: 6 × 5
##   GEOID NAME                    variable   estimate   moe
##   <chr> <chr>                   <chr>         <dbl> <dbl>
## 1 48001 Anderson County, Texas  B03002_012    10142    NA
## 2 48003 Andrews County, Texas   B03002_012     9979    NA
## 3 48005 Angelina County, Texas  B03002_012    19174    NA
## 4 48007 Aransas County, Texas   B03002_012     6756    NA
## 5 48009 Archer County, Texas    B03002_012      727    NA
## 6 48011 Armstrong County, Texas B03002_012      131    69
# Rename the 'estimate' column to 'HispanicPop'
hispanic_population_tx <- hispanic_population_tx %>%
  rename(HispanicPop = estimate)

# Show column names
colnames(hispanic_population_tx)
## [1] "GEOID"       "NAME"        "variable"    "HispanicPop" "moe"
# Boxplot for Hispanic population distribution across TX counties
ggplot(hispanic_population_tx, aes(y = HispanicPop)) +
  geom_boxplot(fill = "orange", color = "blue") +
  labs(title = "Distribution of Hispanic Population Across Texas Counties",
       y = "Hispanic Population")

# Population in poverty for Bexar County, TX from the 2018 ACS
poverty_bexar <- get_acs(
  geography = "tract",
  variables = "B17001_002E",  # Population in poverty variable
  county = "Bexar",
  state = "TX",
  year = 2018
)
## Getting data from the 2014-2018 5-year ACS
head(poverty_bexar)
## # A tibble: 6 × 5
##   GEOID       NAME                                   variable   estimate   moe
##   <chr>       <chr>                                  <chr>         <dbl> <dbl>
## 1 48029110100 Census Tract 1101, Bexar County, Texas B17001_002      513   178
## 2 48029110300 Census Tract 1103, Bexar County, Texas B17001_002     1057   375
## 3 48029110500 Census Tract 1105, Bexar County, Texas B17001_002     1623   251
## 4 48029110600 Census Tract 1106, Bexar County, Texas B17001_002     1441   387
## 5 48029110700 Census Tract 1107, Bexar County, Texas B17001_002      376   112
## 6 48029110800 Census Tract 1108, Bexar County, Texas B17001_002      432   134
# Hispanic, White, and Black population for Bexar County, TX from the 2018 ACS
race_bexar <- get_acs(
  geography = "tract",
  variables = c(Hispanic = "B03002_012E", White = "B03002_003E", Black = "B03002_004E"),
  county = "Bexar",
  state = "TX",
  year = 2018
)
## Getting data from the 2014-2018 5-year ACS
head(race_bexar)
## # A tibble: 6 × 5
##   GEOID       NAME                                   variable   estimate   moe
##   <chr>       <chr>                                  <chr>         <dbl> <dbl>
## 1 48029110100 Census Tract 1101, Bexar County, Texas B03002_003      999   186
## 2 48029110100 Census Tract 1101, Bexar County, Texas B03002_004      157    76
## 3 48029110100 Census Tract 1101, Bexar County, Texas B03002_012     1832   340
## 4 48029110300 Census Tract 1103, Bexar County, Texas B03002_003      506   150
## 5 48029110300 Census Tract 1103, Bexar County, Texas B03002_004      352   333
## 6 48029110300 Census Tract 1103, Bexar County, Texas B03002_012     2096   400
# Merge poverty and race/ethnicity data
combined_data <- merge(race_bexar, poverty_bexar, by = c("GEOID", "NAME"))

# Create a new column that combines poverty and race/ethnicity categories
combined_data <- combined_data %>%
  mutate(poverty_race_category = paste(estimate.x, estimate.y, sep = "_"))

head(combined_data)
##         GEOID                                   NAME variable.x estimate.x
## 1 48029110100 Census Tract 1101, Bexar County, Texas B03002_003        999
## 2 48029110100 Census Tract 1101, Bexar County, Texas B03002_004        157
## 3 48029110100 Census Tract 1101, Bexar County, Texas B03002_012       1832
## 4 48029110300 Census Tract 1103, Bexar County, Texas B03002_003        506
## 5 48029110300 Census Tract 1103, Bexar County, Texas B03002_004        352
## 6 48029110300 Census Tract 1103, Bexar County, Texas B03002_012       2096
##   moe.x variable.y estimate.y moe.y poverty_race_category
## 1   186 B17001_002        513   178               999_513
## 2    76 B17001_002        513   178               157_513
## 3   340 B17001_002        513   178              1832_513
## 4   150 B17001_002       1057   375              506_1057
## 5   333 B17001_002       1057   375              352_1057
## 6   400 B17001_002       1057   375             2096_1057
# Bar plot to show the number of different categories
ggplot(combined_data, aes(x = poverty_race_category)) +
  geom_bar(fill = "red", color = "purple") +
  labs(title = "Bar Plot of Poverty and Race/Ethnicity Categories",
       x = "Poverty and Race/Ethnicity Categories",
       y = "Count")