##1 List your research questions and ensure the following analysis will help you answer the questions (2')
#Research Question: Where are the new residents of Travis County coming from and what is their housing tenure?
library(tidycensus)
library(ggplot2)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##2 Use Census API to get the census tract-level data with at least 4 variables (3')
var <- c("Total" = "B07204_011E",
"Northeast" = "B07204_012E",
"Midwest" = "B07204_013E",
"South" = "B07204_014E",
"West" = "B07204_015E",
"HousingTenure" = "B07013_013E",
"HomeOwner" = "B07013_014E",
"Renter" = "B07013_015E")
Mobility <- get_acs (
geography = "tract", state = "TX", county = "Travis County",
variables = var,
output = "wide",
year = 2021,
geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
## | | | 0% | |= | 1% | |= | 2% | |== | 2% | |== | 3% | |=== | 4% | |=== | 5% | |==== | 6% | |===== | 7% | |===== | 8% | |====== | 9% | |======= | 10% | |========= | 13% | |========== | 15% | |=========== | 16% | |============ | 17% | |============= | 19% | |================ | 22% | |================== | 25% | |=================== | 28% | |===================== | 31% | |======================== | 34% | |========================== | 37% | |============================= | 41% | |============================== | 43% | |============================== | 44% | |=============================== | 44% | |=============================== | 45% | |================================= | 47% | |======================================== | 57% | |============================================= | 65% | |============================================== | 65% | |========================================================= | 82% | |============================================================= | 88% | |=============================================================== | 90% | |================================================================== | 95% | |======================================================================| 100%
##3 Calculate mean, median, min, and max values for at least one variable (3')
#Home owners in Travis County by residents from out of state
mean_HomeOwner <- mean(Mobility$`HomeOwner`, na.rm = TRUE)
median_HomeOwner <- median(Mobility$`HomeOwner`, na.rm = TRUE)
max_HomeOwner <- max(Mobility$`HomeOwner`, na.rm = TRUE)
min_HomeOwner <- min(Mobility$`HomeOwner`, na.rm = TRUE)
#Renters in Travis County by residents from out of state
mean_Renter <- mean(Mobility$`Renter`, na.rm = TRUE)
median_Renter <- median(Mobility$`Renter`, na.rm = TRUE)
max_Renter <- max(Mobility$`Renter`, na.rm = TRUE)
min_Renter <- min(Mobility$`Renter`, na.rm = TRUE)
mean_HomeOwner
## [1] 39.42069
median_HomeOwner
## [1] 15
max_HomeOwner
## [1] 435
min_HomeOwner
## [1] 0
mean_Renter
## [1] 98.67586
median_Renter
## [1] 53.5
max_Renter
## [1] 912
min_Renter
## [1] 0
##4 Make at least three types of figures (scatter plot, histogram plot, boxplot, bar plot, etc.) and summarize your findings (3')
#Bar Graph
Sum_Northeast <- sum(Mobility$Northeast, na.rm = TRUE)
Sum_Midwest <- sum(Mobility$Midwest, na.rm = TRUE)
Sum_South <- sum(Mobility$South, na.rm = TRUE)
Sum_West <- sum(Mobility$West, na.rm = TRUE)
Population <- c(Sum_Northeast, Sum_Midwest, Sum_South, Sum_West)
Region <- c("Northeast", "Midwest", "South", "West")
ggplot(mapping=aes(x=Region, y=Population)) +
geom_bar(stat="identity", fill="#5FB617") +
labs(title="Intranational Immigrants in Travis County by Region", x="Region", y="Population")

#Histogram
ggplot(Mobility, aes(x = HomeOwner)) +
geom_histogram(binwidth = 10, fill = "#5FB617") +
labs(title = "Distribution of Tracts based on Home Ownership",
x = "Home Ownership",
y = "Number of Tracts")

#Boxplot
ggplot(Mobility, aes(y = HomeOwner)) +
geom_boxplot(fill = "#5FB617") +
labs(title = "Boxplot of Tracts based on Home Ownership",
y = "Home Ownership")

##5 Make at least a PDF (probability density function) chart and CDF (cumulative density function) chart for any variable (2')
# Probability Density Functon (PDF)
ggplot(Mobility, aes(x = HomeOwner)) +
geom_density(fill = "#5FB617", alpha = 0.5) +
labs(title = "PDF of Home Ownership",
x = "Home Ownership",
y = "Density")

#Cumulative Density Function (CDF)
ggplot(Mobility, aes(x = HomeOwner)) +
stat_ecdf(geom = "step", color = "#5FB617", size = 1) +
labs(title = "CDF of Home Ownership",
x = "Home Ownership",
y = "Cumulative Probability")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

##6 Make a prediction of population OR GDP OR other variable of your study area for the next five years (2025-2030) (2')
#Prediction of Population growth of Migrants from Out of State
#Data taken from Census API: B07204 ACS 1 year estimates from multiple years.
Pred_year <- c(2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2021, 2022, 2023)
Pred_population <- c(27730, 36732, 28132, 32670, 36674, 32667, 33260, 39536, 39170, 37991, 51427, 46392, 41908)
plot(Pred_year, Pred_population, pch=19,
main='Population Projection of Intrastate Immigrants',
cex.main = 1.1,
xlab='Year',
ylab='Population')
poly.lm1 <- lm(Pred_population ~ poly(Pred_year, 1))
poly.lm2 <- lm(Pred_population ~ poly(Pred_year, 2))
poly.lm3 <- lm(Pred_population ~ poly(Pred_year, 3))
future_years <- c(2025, 2026, 2027, 2028, 2029, 2030)
future_data <- data.frame(Pred_year = future_years)
new.y <- predict(poly.lm1, newdata=future_data)
new.y <- predict(poly.lm2, newdata=future_data)
new.y <- predict(poly.lm3, newdata=future_data)
x_axis <- seq(2010, 2023, length=13)
lines(x_axis, predict(poly.lm1, data.frame(x=x_axis)), col='green')
lines(x_axis, predict(poly.lm2, data.frame(x=x_axis)), col='red')
lines(x_axis, predict(poly.lm3, data.frame(x=x_axis)), col='purple')

predicted_population <- predict(poly.lm3, newdata = future_data)
print(predicted_population)
## 1 2 3 4 5 6
## 43041.95 40512.58 36886.50 32043.15 25861.94 18222.31
##7 Make OLS regression analysis or correlation analysis to examine your research questions (2')
Mobility <- Mobility %>%
mutate(
pct_HomeOwnership = 100 * HomeOwner / HousingTenure
)
ols_model <- lm(pct_HomeOwnership ~ Total , data = Mobility)
summary(ols_model)
##
## Call:
## lm(formula = pct_HomeOwnership ~ Total, data = Mobility)
##
## Residuals:
## Min 1Q Median 3Q Max
## -46.10 -32.79 -12.51 38.02 69.29
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 46.45481 3.39445 13.686 < 2e-16 ***
## Total -0.05833 0.01512 -3.859 0.000145 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 37.55 on 252 degrees of freedom
## (36 observations deleted due to missingness)
## Multiple R-squared: 0.05579, Adjusted R-squared: 0.05205
## F-statistic: 14.89 on 1 and 252 DF, p-value: 0.0001448
ggplot(Mobility, aes(x = Total, y = pct_HomeOwnership)) +
geom_point() +
geom_smooth(method = "lm", col = "blue") +
labs(title = "Home Ownership % vs. Intranational Immigrants Regression Line",
x = "Count of Intranational Immigrants per Tract",
y = "Home Ownership Percentage")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

#8 Make at least one interactive plot or one interactive map (1')
p <- ggplot(Mobility, aes(x = Total, y = pct_HomeOwnership)) +
geom_point() +
geom_smooth(method = "lm", col = "blue") +
labs(title = "Home Ownership % vs. Intranational Immigrants Regression Line",
x = "Count of Intranational Immigrants per Tract",
y = "Home Ownership Percentage")
p
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Removed 36 rows containing missing values or values outside the scale range
## (`geom_point()`).

interactive_plot <- ggplotly(p)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 36 rows containing non-finite outside the scale range
## (`stat_smooth()`).
interactive_plot
##9 Have brief write-up and summarize your findings in the R Markdown file (2')
#For the demographic analysis of intranational immigration in Travis County, the housing tenure was studied as well as the breakdown of the immigration data by where they originated from the year prior. All the data used was from Census API, 2021 ACS 5-year estimates and ACS 1-year estimates from 2010 to 2023 for the population prediction. It was found that the highest number of immigrants in the county was from Western United States with 17,780 residents while the lowest was from the Midwest with 6,425. The average homeownership per tract is 39.420 and the highest number (max) is 435 on a single tract. Also shown is the population projection from intranational immigration for the years 2025 to 2030. Historical data shows that the population has been increasing and decreasing but is seen to be growing overall. For the regression scatter plot, instead of count of homeowners, the percentage of home ownership was used instead. The line trends downward as the tracts with more immigrants have a lower home ownership rate.