knitr::opts_chunk$set(echo = TRUE)
#1
library(tidycensus)
# Median Household Income B19013_001E
# Hispanic Population B03002_012E
# Non Hispanic African American Population B03002_004E
# Male B01001_002E
# Female B01001_026E
# Total Population B01001_001E
# Median Age of County B01002_001E
var=c('B19013_001E','B03002_012E','B03002_004E','B01001_002E',
'B01001_026E', 'B01001_001E', 'B01002_001E')
Victoria_segregation <- get_acs(geography = "tract", variables = var, county = "Victoria",
state = "TX",output="wide", geometry = TRUE)
## Getting data from the 2017-2021 5-year ACS
## Downloading feature geometry from the Census website. To cache shapefiles for use in future sessions, set `options(tigris_use_cache = TRUE)`.
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|= | 2%
|
|== | 2%
|
|== | 3%
|
|== | 4%
|
|=== | 4%
|
|=== | 5%
|
|==== | 5%
|
|==== | 6%
|
|===== | 6%
|
|===== | 7%
|
|===== | 8%
|
|====== | 8%
|
|====== | 9%
|
|======= | 9%
|
|======= | 10%
|
|======== | 11%
|
|======== | 12%
|
|========= | 12%
|
|========= | 13%
|
|========== | 14%
|
|========== | 15%
|
|=========== | 15%
|
|=========== | 16%
|
|============ | 17%
|
|============= | 18%
|
|============= | 19%
|
|============== | 20%
|
|=============== | 21%
|
|=============== | 22%
|
|================ | 22%
|
|================ | 23%
|
|================= | 24%
|
|================= | 25%
|
|================== | 25%
|
|================== | 26%
|
|=================== | 27%
|
|=================== | 28%
|
|==================== | 28%
|
|==================== | 29%
|
|===================== | 30%
|
|====================== | 31%
|
|====================== | 32%
|
|======================= | 33%
|
|======================== | 34%
|
|======================== | 35%
|
|========================= | 35%
|
|========================= | 36%
|
|========================== | 37%
|
|=========================== | 38%
|
|=========================== | 39%
|
|============================ | 40%
|
|============================ | 41%
|
|============================= | 41%
|
|============================= | 42%
|
|============================== | 43%
|
|============================== | 44%
|
|=============================== | 44%
|
|=============================== | 45%
|
|================================ | 45%
|
|================================ | 46%
|
|================================= | 47%
|
|================================= | 48%
|
|================================== | 48%
|
|================================== | 49%
|
|=================================== | 50%
|
|==================================== | 51%
|
|==================================== | 52%
|
|===================================== | 52%
|
|===================================== | 53%
|
|====================================== | 54%
|
|====================================== | 55%
|
|======================================= | 56%
|
|======================================== | 57%
|
|======================================== | 58%
|
|========================================= | 59%
|
|========================================== | 59%
|
|========================================== | 60%
|
|=========================================== | 61%
|
|=========================================== | 62%
|
|============================================ | 63%
|
|============================================= | 64%
|
|============================================= | 65%
|
|============================================== | 65%
|
|============================================== | 66%
|
|=============================================== | 67%
|
|=============================================== | 68%
|
|================================================ | 69%
|
|================================================= | 70%
|
|================================================= | 71%
|
|================================================== | 71%
|
|================================================== | 72%
|
|=================================================== | 72%
|
|=================================================== | 73%
|
|==================================================== | 74%
|
|==================================================== | 75%
|
|===================================================== | 76%
|
|====================================================== | 77%
|
|====================================================== | 78%
|
|======================================================= | 79%
|
|======================================================== | 80%
|
|======================================================== | 81%
|
|========================================================= | 82%
|
|========================================================== | 82%
|
|========================================================== | 83%
|
|========================================================== | 84%
|
|=========================================================== | 84%
|
|=========================================================== | 85%
|
|============================================================ | 86%
|
|============================================================= | 86%
|
|============================================================= | 87%
|
|============================================================== | 88%
|
|============================================================== | 89%
|
|=============================================================== | 89%
|
|=============================================================== | 90%
|
|================================================================ | 91%
|
|================================================================ | 92%
|
|================================================================= | 93%
|
|================================================================== | 94%
|
|================================================================== | 95%
|
|=================================================================== | 95%
|
|=================================================================== | 96%
|
|==================================================================== | 97%
|
|===================================================================== | 98%
|
|===================================================================== | 99%
|
|======================================================================| 100%
#2
names(Victoria_segregation)[3] <- 'MedianHouseholdIncome'
names(Victoria_segregation)[5] <- 'HispanicPop'
names(Victoria_segregation)[7] <- 'NHIPAfAM'
names(Victoria_segregation)[9] <- 'Male'
names(Victoria_segregation)[11] <- 'Female'
names(Victoria_segregation)[13] <- 'TotalPop'
names(Victoria_segregation)[15] <- 'MedianAge'
Victoria_segregation$B19013_001M<-NULL
Victoria_segregation$B03002_012M<-NULL
Victoria_segregation$B03002_004M<-NULL
Victoria_segregation$B01001_002M<-NULL
Victoria_segregation$B01001_026M<-NULL
Victoria_segregation$B01001_001M<-NULL
Victoria_segregation$B01002_001M<-NULL
#3
write.csv(Victoria_segregation, "C:/Users/Mario/Documents/Wei CLASS Folder/Victoria_seg3.csv")
#4 Scatter Plot-association between median household income and percentage of Non-Hispanic African American
library(ggplot2)
Victoria_segregation$Pct_Black <- 100*Victoria_segregation$NHIPAfAM/Victoria_segregation$TotalPop
ggplot(data = Victoria_segregation, aes(x = Pct_Black,
y = MedianHouseholdIncome)) +
geom_point()
## Warning: Removed 1 rows containing missing values (`geom_point()`).
#5 Histogram-visualize the age distribution of the county
AgeDistrbution <- ggplot(Victoria_segregation, aes(x = MedianAge))
AgeDistrbution + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
#6 PDF (probability density function) chart to show the distribution of median household income
library(ggplot2)
MedianHouseholdIncome <- ggplot(Victoria_segregation, aes(x = MedianHouseholdIncome))
MedianHouseholdIncome + geom_density()
## Warning: Removed 1 rows containing non-finite values (`stat_density()`).
#7 CDF (cumulative density function) chart to show the distribution of median household income (2').
MedianHouseholdIncome + stat_ecdf()
## Warning: Removed 1 rows containing non-finite values (`stat_ecdf()`).
#8Make a boxplot to visualize the median household income
MedianHouseholdIncome + geom_boxplot()
## Warning: Removed 1 rows containing non-finite values (`stat_boxplot()`).
#9 Make a map to show the spatial distribution of percentage of Hispanic population
library(sf)
## Linking to GEOS 3.11.2, GDAL 3.6.2, PROJ 9.2.0; sf_use_s2() is TRUE
library(tmap)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
## Breaking News: tmap 3.x is retiring. Please test v4, e.g. with
## remotes::install_github('r-tmap/tmap')
Victoria_segregation$pct_Hispanic <- 100*Victoria_segregation$HispanicPop/Victoria_segregation$TotalPop
tm_shape(Victoria_segregation) +tm_fill(col = "pct_Hispanic")+ tm_layout(title = "Hispanic Percent")
#10 Calculate and map the difference between female and male population to show what census tract has more female population
Victoria_segregation$Difference <- Victoria_segregation$Female - Victoria_segregation$Male
tm_shape(Victoria_segregation) +tm_fill(col = "Difference")+ tm_layout(title = "Difference female and male")
## Variable(s) "Difference" contains positive and negative values, so midpoint is set to 0. Set midpoint = NA to show the full spectrum of the color palette.
#
Victoria_segregation$moreFemale <- Victoria_segregation$Female > Victoria_segregation$Male
tm_shape(Victoria_segregation) +tm_fill(col = "moreFemale")+ tm_layout(title = "More Female Population")
#11 Find the population of the county (or the major city within the county) from 2010 to 2023, and predict the population for the next five years (2024-2028) (2').
x <- c(2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022) #year
y <- c(86.88,87.56,89.12,90.08,91.04,92.13,92.42,92.01,91.83,92.02,91.30,90.90,91.07) #(Thousands persons)
poly.lm1 <- lm(y ~ poly(x, 1))
new.x <- c(2023,2024, 2025, 2026, 2027,2028)
new.df <- data.frame(x=new.x)
new.y <- predict(poly.lm1, newdata=new.df)
print(new.y)
## 1 2 3 4 5 6
## 92.86769 93.18549 93.50330 93.82110 94.13890 94.45670