#Q1#How does median income vary with population size and median age across Census tracts in Colorado and how does the geographic distribution of median income vary across Census tracts?

library(tidycensus)   
library(dplyr)        
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)        
library(ggplot2)     
library(tmap)         
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
variables <- c(
  total_population = "B01003_001",
  median_income = "B19013_001",
  median_age = "B01002_001",
  education_bachelors = "B15003_022")


Colorado_Data1 <- get_acs(
  geography = "tract",
  variables = variables,
  state = "08",  # FIPS code for Colorado
  year = 2021,
  output = "wide",
  geometry = FALSE)
## Getting data from the 2017-2021 5-year ACS
head(Colorado_Data1)
## # A tibble: 6 × 10
##   GEOID  NAME  total_populationE total_populationM median_incomeE median_incomeM
##   <chr>  <chr>             <dbl>             <dbl>          <dbl>          <dbl>
## 1 08001… Cens…              4027               646          37702          10292
## 2 08001… Cens…              4598               688          46096           4163
## 3 08001… Cens…              5749               540          52952          14609
## 4 08001… Cens…              5515               657          60447           8938
## 5 08001… Cens…              1538               243          51034           7266
## 6 08001… Cens…              6892               917          84240          13459
## # ℹ 4 more variables: median_ageE <dbl>, median_ageM <dbl>,
## #   education_bachelorsE <dbl>, education_bachelorsM <dbl>
mean_income <- mean(Colorado_Data1$median_incomeE)
median_income <- median(Colorado_Data1$median_incomeE)
min_income = min(Colorado_Data1$median_incomeE, na.rm = TRUE)
max_income = max(Colorado_Data1$median_incomeE, na.rm = TRUE) 


ggplot(Colorado_Data1, aes(x =total_populationE , y = median_incomeE)) +
  geom_point(color = "blue", alpha = 0.6) +
  labs(
    title = "Population vs. Median Income by Census Tract in Colorado",
    x = "Total Population",
    y = "Median Income (USD)"
  ) + theme_minimal()
## Warning: Removed 22 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(Colorado_Data1, aes(y = median_incomeE)) +
  geom_boxplot(fill = "orange", alpha = 0.7) +
  labs(
    title = "Distribution of Median Income Across Census Tracts",
    y = "Median Income (USD)"
  ) +
  theme_minimal()
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

ggplot(Colorado_Data1, aes(x = median_ageE)) +
  geom_histogram(binwidth = 5, fill = "blue", color = "black") +
  labs(
    title = "Histogram of Median Age Across Census Tracts",
    x = "Median Age",
    y = "Count of Tracts"
  ) +
  theme_minimal()
## Warning: Removed 10 rows containing non-finite outside the scale range
## (`stat_bin()`).

ggplot(Colorado_Data1, aes(x = median_incomeE)) +
  geom_density(fill = "blue", alpha = 0.5) +
  labs(
    title = "Probability Density Function of Median Income",
    x = "Median Income (USD)",
    y = "Density"
  ) +
  theme_minimal()
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_density()`).

ggplot(Colorado_Data1, aes(x = median_incomeE)) +
  stat_ecdf(geom = "step", color = "darkgreen") +
  labs(
    title = "Cumulative Density Function of Median Income",
    x = "Median Income (USD)",
    y = "Cumulative Probability"
  ) +
  theme_minimal()
## Warning: Removed 22 rows containing non-finite outside the scale range
## (`stat_ecdf()`).

population_model <- lm(total_populationE ~ median_ageE, data = Colorado_Data1)


future_data <- data.frame(median_ageE = seq(min(Colorado_Data1$median_ageE, na.rm = TRUE),
                                           max(Colorado_Data1$median_ageE, na.rm = TRUE),
                                           length.out = 5))


future_data$predicted_population <- predict(population_model, newdata = future_data)


print(future_data)
##   median_ageE predicted_population
## 1       16.80             5221.575
## 2       34.85             4217.991
## 3       52.90             3214.406
## 4       70.95             2210.821
## 5       89.00             1207.236
ols_model <- lm(median_incomeE ~ median_ageE + total_populationE, data = Colorado_Data1)


summary(ols_model)
## 
## Call:
## lm(formula = median_incomeE ~ median_ageE + total_populationE, 
##     data = Colorado_Data1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -97594 -23441  -6111  18697 161915 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       1.114e+04  5.519e+03   2.019   0.0437 *  
## median_ageE       1.415e+03  1.156e+02  12.246   <2e-16 ***
## total_populationE 4.731e+00  5.231e-01   9.045   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 33430 on 1422 degrees of freedom
##   (22 observations deleted due to missingness)
## Multiple R-squared:  0.1163, Adjusted R-squared:  0.1151 
## F-statistic: 93.59 on 2 and 1422 DF,  p-value: < 2.2e-16
colorado_data_geom <- get_acs(
  geography = "tract",
  variables = "B19013_001",
  state = "08",
  year = 2021,
  geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website.  To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##   |                                                                              |                                                                      |   0%  |                                                                              |=                                                                     |   1%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   6%  |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  13%  |                                                                              |==========                                                            |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  21%  |                                                                              |===============                                                       |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  25%  |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  28%  |                                                                              |====================                                                  |  29%  |                                                                              |======================                                                |  31%  |                                                                              |=======================                                               |  32%  |                                                                              |========================                                              |  34%  |                                                                              |=========================                                             |  35%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  38%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |================================                                      |  45%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  48%  |                                                                              |===================================                                   |  50%  |                                                                              |====================================                                  |  51%  |                                                                              |=====================================                                 |  53%  |                                                                              |======================================                                |  54%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  63%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |================================================                      |  69%  |                                                                              |=================================================                     |  70%  |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  84%  |                                                                              |============================================================          |  85%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  88%  |                                                                              |===============================================================       |  89%  |                                                                              |================================================================      |  91%  |                                                                              |=================================================================     |  92%  |                                                                              |==================================================================    |  94%  |                                                                              |===================================================================   |  95%  |                                                                              |====================================================================  |  97%  |                                                                              |===================================================================== |  98%  |                                                                              |======================================================================| 100%
tmap_mode("view")
## tmap mode set to interactive viewing
tm_shape(colorado_data_geom) +
  tm_polygons("estimate", title = "Median Income (USD)") +
  tm_layout(title = "Interactive Map of Median Income by Census Tract")
#Summary: I wanted to explore the relationships between demographic factors (population size, median age) and economic conditions (median income) as a way to identify areas of economic disparity or concentrations.

#Summary Findings: I wanted to explore the relationships between demographic factors (population size, median age) and economic conditions (median income) as a way to identify areas of economic disparity or concentrations.The Census tracts reveal that population size does not consistently correlate with higher median incomes. In contrast, median age shows a strong positive relationship with income; older populations generally report higher incomes, likely due to wealth accumulation.
#Income distribution is uneven. Most tracts cluster at lower-to-moderate income levels, and a few high-income tracts show a distinct disparity. High-income tracts are concentrated in urban centers and affluent suburbs like Denver. In contrast, rural areas and some urban neighborhoods report lower incomes. Median income in urban areas is higher than in rural areas. Stark disparities between neighbor tract 
#highlight localized economic inequities.