#Load all the data at the same time
library("tidycensus")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.4 ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
profile <- load_variables(2021, "acs5/profile")
profile2010 <- load_variables(2010, "acs5/profile")
vars <- load_variables(2021, "acs5")
#Variables:
#DP04_008: Estimate!!VALUE!!Owner-occupied units!!Median (dollars)
#DP03_0062: Estimate!!INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)!!Total households!!Median household income (dollars)
#DP05_0077P: Percent!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!White alone
#DP04_0091: Estimate!!MORTGAGE STATUS!!Owner-occupied units!!Housing units with a mortgage
#DP04_0134: Estimate!!GROSS RENT!!Occupied units paying rent!!Median (dollars)
#DP05_0086: Estimate!!Total housing units
#DP03_0009P: Percent!!EMPLOYMENT STATUS!!Civilian labor force!!Unemployment Rate
#DP03_0025: Estimate!!COMMUTING TO WORK!!Workers 16 years and over!!Mean travel time to work (minutes)
#DP02_0068: Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Bachelor's degree or higher
county2021 <- get_acs(
geography = "county",
variables = c(median_value = "DP04_0089",
median_income = "DP03_0062",
percent_white = "DP05_0077P",
mortgage = "DP04_0091",
gross_rent = "DP04_0134",
total_house_unit = "DP05_0086",
unemployment = "DP03_0009P",
travel_time = "DP03_0025",
bachelor = "DP02_0068"
),
year = 2021,
output = "wide"
)
## Getting data from the 2017-2021 5-year ACS
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
## Using the ACS Data Profile
#2010
#Variable
#DP04_0088: Estimate!!VALUE!!Median (dollars) ##This variable is different from 2021
#DP03_0062: Estimate!!INCOME AND BENEFITS (IN 2010 INFLATION-ADJUSTED DOLLARS)!!Median household income (dollars)
#DP05_0072P: Percent!!HISPANIC OR LATINO AND RACE!!Not Hispanic or Latino!!White alone!!White alone
#DP04_0092: Estimate!!SELECTED MONTHLY OWNER COSTS (SMOC)!!Housing units with a mortgage
#DP04_0134: Estimate!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)
#DP05_0081: Estimate!!Total housing units
#DP03_0009P: Percent!!EMPLOYMENT STATUS!!Percent Unemployed
#DP03_0025: Estimate!!COMMUTING TO WORK!!Mean travel time to work (minutes)
#DP02_0067: Estimate!!EDUCATIONAL ATTAINMENT!!Percent bachelor's degree or higher
county2010 <- get_acs(
geography = "county",
variables = c(median_value = "DP04_0088", #diff var
median_income = "DP03_0062",
percent_white = "DP05_0072P", #diff var
mortgage = "DP04_0092", #diff var
gross_rent = "DP04_0134",
total_house_unit = "DP05_0081", #diff var
unemployment = "DP03_0009P",
travel_time = "DP03_0025",
bachelor = "DP02_0067" #different variable
),
year = 2010,
output = "wide"
)
## Getting data from the 2006-2010 5-year ACS
## Using the ACS Data Profile
#Clean tables (drop margin of error columns)
county2021_clean <- county2021 %>% select(-"median_valueM", -"median_incomeM", -"percent_whiteM", -"mortgageM", -"gross_rentM", -"total_house_unitM", -"unemploymentM", -"travel_timeM", -"bachelorM") %>% separate(NAME, into = c("County", "State"), sep = ", ")
county2010_clean <- county2010 %>% select(-"median_valueM", -"median_incomeM", -"percent_whiteM", -"mortgageM", -"gross_rentM", -"total_house_unitM", -"unemploymentM", -"travel_timeM", -"bachelorM") %>% separate(NAME, into = c("County", "State"), sep = ", ")
#Compare the changes, join data
join_table <- left_join(county2021_clean, county2010_clean, by = "GEOID") %>% select(-"County.y", -"State.y") %>% rename("County" = "County.x", "State" = "State.x", "value2021" = "median_valueE.x", "value2010" = "median_valueE.y", "income2021" = "median_incomeE.x", "income2010" = "median_incomeE.y", "white2021" = "percent_whiteE.x", "white2010" = "percent_whiteE.y", "mortgage2021" = "mortgageE.x", "mortgage2010" = "mortgageE.y", "rent2021" = "gross_rentE.x", "rent2010" = "gross_rentE.y", "totalhouse2021" = "total_house_unitE.x", "totalhouse2010" = "total_house_unitE.y", "unemployment2021" = "unemploymentE.x", "unemployment2010" = "unemploymentE.y", "travel2021" = "travel_timeE.x", "travel2010" = "travel_timeE.y", "bachelor2021" = "bachelorE.x", "bachelor2010" = "bachelorE.y") %>% mutate(value_change = (value2021 - value2010), value_change_per = (value2021 - value2010)/value2010*100, income_change = (income2021 - income2010), income_change_per = (income2021 - income2010)/income2010*100) %>% arrange(desc(value_change_per))
#Divide increases and decreases
increases <- join_table %>% filter(value_change >= 0) %>% arrange(desc("value_change_per"))
decreases <- join_table %>% filter(value_change < 0) %>% arrange(desc("value_change_per"))
#Read the job data
library(readr)
job <- read_csv("/Users/hanvuhavu/Downloads/housing_price/BDSTIMESERIES.BDSGEO_2023-02-23T131702/BDSTIMESERIES.BDSGEO-Data.csv")
## New names:
## Rows: 2702121 Columns: 56
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (55): GEO_ID, NAME, NAICS, NAICS_LABEL, METRO, METRO_LABEL, YEAR, FIRM, ... lgl
## (1): ...56
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...56`
#filter data for 2010 - 2020 period
job1 <- job %>% filter(YEAR >= 2010 & YEAR <=2020)
#Drop all flag column
job1 <- job1 %>% select(-contains("_F"))
#select the relevant variables: GEO_ID, NAME, NAICS, NAICS_LABEL, YEAR, FIRM, EMP, JOB_CREATION, JOB_CREATION_RATE, NET_JOB_CREATION, NET_JOB_CREATION_RATE
job1 <-job1 %>% select("GEO_ID", "NAME", "NAICS", "NAICS_LABEL", "YEAR", "FIRM", "EMP", "JOB_CREATION", "JOB_CREATION_RATE", "NET_JOB_CREATION", "NET_JOB_CREATION_RATE")
#Filter job in 2020
job2020 <- job1 %>% filter(YEAR == 2020)
job2010 <- job1 %>% filter(YEAR == 2010)
#Filter total for all sectors
job2020_allsector <- job2020 %>% filter(NAICS_LABEL == "Total for all sectors") %>% select(-"NAICS", -"NAICS_LABEL", -"YEAR") %>% separate(NAME, into = c("County", "State"), sep = ", ")
job2010_allsector <- job2010 %>% filter(NAICS_LABEL == "Total for all sectors") %>% select(-"NAICS", -"NAICS_LABEL", -"YEAR") %>% separate(NAME, into = c("County", "State"), sep = ", ")
#Substract the last 5 character of GEO_ID column
library(dplyr)
# select just the last five characters of all values in GEOID column
job2020_allsector <- job2020_allsector %>% mutate(GEOID = substr(GEO_ID, start = nchar(GEO_ID) - 4, stop = nchar(GEO_ID))) %>% select(-"GEO_ID") #drop the GEO_ID column
job2010_allsector <- job2010_allsector %>% mutate(GEOID = substr(GEO_ID, start = nchar(GEO_ID) - 4, stop = nchar(GEO_ID))) %>% select(-"GEO_ID") #drop the GEO_ID column
#Join job 2010 and 2020
joined_job1 <- left_join(job2020_allsector, job2010_allsector, by = "GEOID") %>% select("GEOID", "County.x", "State.x", "FIRM.x", "EMP.x", "JOB_CREATION.x", "FIRM.y", "EMP.y", "JOB_CREATION.y")
#Join job with housing price table
job_joined <- left_join(join_table, joined_job1, by = "GEOID") %>% select(-"County.x", -"State.x") %>% select(-"JOB_CREATION.x", -"JOB_CREATION.y")
#calculate all the changes
job_joined2 <- job_joined %>% rename("firm2020" = "FIRM.x", "firm2010" = "FIRM.y", "emp2020" = "EMP.x", "emp2010" = "EMP.y") %>% mutate(white_change = (white2021 - white2010), unemployment_change = (unemployment2021 - unemployment2010), totalhouse_change = (totalhouse2021 - totalhouse2010)) %>% select(-"mortgage2021", -"mortgage2010", -"bachelor2010", -"rent2021", -"rent2010", -"totalhouse2010", -"unemployment2010", -"white2010", -"travel2021", -"travel2010")
#Handle NA
job_joined2$firm2020 <- as.numeric(ifelse(job_joined2$firm2020 == "NA", NA, job_joined2$firm2020))
## Warning: NAs introduced by coercion
job_joined2$emp2020 <- as.numeric(ifelse(job_joined2$emp2020 == "NA", NA, job_joined2$emp2020))
## Warning: NAs introduced by coercion
job_joined2$firm2010 <- as.numeric(ifelse(job_joined2$firm2010 == "NA", NA, job_joined2$firm2010))
## Warning: NAs introduced by coercion
job_joined2$emp2010 <- as.numeric(ifelse(job_joined2$emp2010 == "NA", NA, job_joined2$emp2010))
## Warning: NAs introduced by coercion
#summary dataset
summary(job_joined2)
## GEOID County State value2021
## Length:3221 Length:3221 Length:3221 Min. : 22600
## Class :character Class :character Class :character 1st Qu.: 105300
## Mode :character Mode :character Mode :character Median : 139550
## Mean : 168143
## 3rd Qu.: 193650
## Max. :1225900
## NA's :3
## income2021 white2021 totalhouse2021 unemployment2021
## Min. : 12856 Min. : 0.00 Min. : 50 Min. : 0.000
## 1st Qu.: 47300 1st Qu.: 61.70 1st Qu.: 5340 1st Qu.: 3.700
## Median : 55652 Median : 81.70 Median : 12300 Median : 4.900
## Mean : 57327 Mean : 73.39 Mean : 43838 Mean : 5.468
## 3rd Qu.: 64610 3rd Qu.: 91.40 3rd Qu.: 30832 3rd Qu.: 6.500
## Max. :156821 Max. :100.00 Max. :3578801 Max. :34.100
## NA's :1
## bachelor2021 value2010 income2010 value_change
## Min. : 0 Min. : 29700 Min. : 10932 Min. :-302400
## 1st Qu.: 1250 1st Qu.: 80675 1st Qu.: 36464 1st Qu.: 17600
## Median : 3382 Median : 105400 Median : 42240 Median : 29400
## Mean : 24120 Mean : 131898 Mean : 43619 Mean : 36232
## 3rd Qu.: 11126 3rd Qu.: 151525 3rd Qu.: 48899 3rd Qu.: 46900
## Max. :2356572 Max. :1000001 Max. :115574 Max. : 441100
## NA's :78 NA's :5 NA's :4 NA's :7
## value_change_per income_change income_change_per firm2020
## Min. :-93.05 Min. :-28945 Min. :-38.77 Min. : 5
## 1st Qu.: 15.81 1st Qu.: 10000 1st Qu.: 24.34 1st Qu.: 186
## Median : 27.68 Median : 13517 Median : 31.12 Median : 447
## Mean : 31.15 Mean : 13722 Mean : 31.82 Mean : 1927
## 3rd Qu.: 41.63 3rd Qu.: 17140 3rd Qu.: 38.68 3rd Qu.: 1246
## Max. :257.27 Max. : 54883 Max. :149.89 Max. :207759
## NA's :7 NA's :5 NA's :5 NA's :82
## emp2020 firm2010 emp2010 white_change
## Min. : 15 Min. : 0 Min. : 0 Min. :-36.700
## 1st Qu.: 2101 1st Qu.: 196 1st Qu.: 2146 1st Qu.: -4.800
## Median : 6561 Median : 464 Median : 6201 Median : -3.000
## Mean : 41921 Mean : 1816 Mean : 35371 Mean : -3.476
## 3rd Qu.: 21122 3rd Qu.: 1227 3rd Qu.: 19029 3rd Qu.: -1.900
## Max. :4125775 Max. :175974 Max. :3553071 Max. : 20.800
## NA's :82 NA's :82 NA's :82 NA's :4
## unemployment_change totalhouse_change
## Min. :-25.900 Min. :-33121
## 1st Qu.: -3.800 1st Qu.: -193
## Median : -2.200 Median : 86
## Mean : -2.311 Mean : 3013
## 3rd Qu.: -0.700 3rd Qu.: 1344
## Max. : 20.300 Max. :265266
## NA's :4 NA's :4
#Divide increases and decreases
increases <- job_joined2 %>% filter(value_change >= 0) %>% arrange(desc("value_change_per"))
decreases <- job_joined2 %>% filter(value_change < 0) %>% arrange(desc("value_change_per"))
decreases %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
#Increase 0 - 50%
increases_50 <- increases %>% filter(value_change_per <= 50)
increases_50 %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
#Increase 50 - 100%
increases_100 <- increases %>% filter(value_change_per > 50, value_change_per <=100)
increases_100 %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
#Increase over 100%
increase_most <- increases %>% filter(value_change_per > 100)
increase_most %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
#summary
summary(increase_most)
## GEOID County State value2021
## Length:53 Length:53 Length:53 Min. : 83000
## Class :character Class :character Class :character 1st Qu.:113100
## Mode :character Mode :character Mode :character Median :155100
## Mean :160574
## 3rd Qu.:192500
## Max. :309400
## income2021 white2021 totalhouse2021 unemployment2021
## Min. :44083 Min. : 12.20 Min. : 245 Min. :0.000
## 1st Qu.:56806 1st Qu.: 46.60 1st Qu.: 1244 1st Qu.:1.500
## Median :61042 Median : 84.80 Median : 2387 Median :2.500
## Mean :64352 Mean : 70.57 Mean : 6540 Mean :2.885
## 3rd Qu.:71250 3rd Qu.: 92.40 3rd Qu.: 5784 3rd Qu.:4.200
## Max. :97802 Max. :100.00 Max. :68662 Max. :7.200
## bachelor2021 value2010 income2010 value_change
## Min. : 90 Min. : 31400 Min. :24388 Min. : 43400
## 1st Qu.: 290 1st Qu.: 50900 1st Qu.:38404 1st Qu.: 64000
## Median : 549 Median : 69000 Median :42422 Median : 83800
## Mean : 1999 Mean : 70991 Mean :43926 Mean : 89583
## 3rd Qu.: 1308 3rd Qu.: 86600 3rd Qu.:49234 3rd Qu.:108200
## Max. :29891 Max. :124300 Max. :62308 Max. :222800
## value_change_per income_change income_change_per firm2020
## Min. :100.1 Min. :-7292 Min. :-11.73 Min. : 5
## 1st Qu.:107.0 1st Qu.:16513 1st Qu.: 38.15 1st Qu.: 60
## Median :116.4 Median :20797 Median : 45.68 Median : 122
## Mean :127.6 Mean :20426 Mean : 48.87 Mean : 349
## 3rd Qu.:129.4 3rd Qu.:25099 3rd Qu.: 61.85 3rd Qu.: 310
## Max. :257.3 Max. :47002 Max. :141.61 Max. :4630
## emp2020 firm2010 emp2010 white_change
## Min. : 35 Min. : 6.0 Min. : 20 Min. :-15.600
## 1st Qu.: 487 1st Qu.: 61.0 1st Qu.: 453 1st Qu.: -7.600
## Median : 1392 Median : 129.0 Median : 1040 Median : -4.400
## Mean : 6080 Mean : 294.8 Mean : 4118 Mean : -4.794
## 3rd Qu.: 4687 3rd Qu.: 241.0 3rd Qu.: 3184 3rd Qu.: -2.600
## Max. :101102 Max. :3716.0 Max. :58599 Max. : 12.000
## unemployment_change totalhouse_change
## Min. :-13.800 Min. : -699
## 1st Qu.: -2.100 1st Qu.: -62
## Median : -1.100 Median : 39
## Mean : -1.285 Mean : 1047
## 3rd Qu.: 0.400 3rd Qu.: 279
## Max. : 4.900 Max. :15565
write.csv(summary(increase_most), file = "summary_output1.csv")
#Increase 100
summary(increases_100)
## GEOID County State value2021
## Length:461 Length:461 Length:461 Min. : 52700
## Class :character Class :character Class :character 1st Qu.: 117100
## Mode :character Mode :character Mode :character Median : 150900
## Mean : 180501
## 3rd Qu.: 208200
## Max. :1225900
## income2021 white2021 totalhouse2021 unemployment2021
## Min. : 25049 Min. : 3.40 Min. : 357 Min. : 0.000
## 1st Qu.: 52400 1st Qu.:64.60 1st Qu.: 2610 1st Qu.: 2.700
## Median : 58932 Median :82.50 Median : 5758 Median : 4.000
## Mean : 61111 Mean :75.68 Mean : 38957 Mean : 4.336
## 3rd Qu.: 67614 3rd Qu.:91.40 3rd Qu.: 16845 3rd Qu.: 5.400
## Max. :140258 Max. :98.00 Max. :1821640 Max. :16.200
## bachelor2021 value2010 income2010 value_change
## Min. : 86 Min. : 29700 Min. :21707 Min. : 20500
## 1st Qu.: 776 1st Qu.: 71500 1st Qu.:37522 1st Qu.: 45400
## Median : 1721 Median : 88700 Median :43072 Median : 59600
## Mean : 24473 Mean :109728 Mean :44397 Mean : 70773
## 3rd Qu.: 5927 3rd Qu.:124100 3rd Qu.:48699 3rd Qu.: 79200
## Max. :979189 Max. :785200 Max. :99198 Max. :441100
## value_change_per income_change income_change_per firm2020
## Min. :50.19 Min. :-11758 Min. :-30.25 Min. : 9
## 1st Qu.:55.40 1st Qu.: 12351 1st Qu.: 28.94 1st Qu.: 117
## Median :62.65 Median : 15970 Median : 37.46 Median : 244
## Mean :65.55 Mean : 16714 Mean : 38.23 Mean : 1793
## 3rd Qu.:73.22 3rd Qu.: 20753 3rd Qu.: 46.12 3rd Qu.: 760
## Max. :99.85 Max. : 54883 Max. :149.89 Max. :73889
## emp2020 firm2010 emp2010 white_change
## Min. : 41 Min. : 6 Min. : 12 Min. :-30.100
## 1st Qu.: 1090 1st Qu.: 119 1st Qu.: 1073 1st Qu.: -5.600
## Median : 2882 Median : 254 Median : 2604 Median : -3.800
## Mean : 41964 Mean : 1556 Mean : 32349 Mean : -4.242
## 3rd Qu.: 11001 3rd Qu.: 705 3rd Qu.: 9735 3rd Qu.: -2.500
## Max. :2221796 Max. :64841 Max. :1811318 Max. : 10.800
## unemployment_change totalhouse_change
## Min. :-9.700 Min. : -1342
## 1st Qu.:-3.000 1st Qu.: -111
## Median :-1.400 Median : 42
## Mean :-1.426 Mean : 5507
## 3rd Qu.: 0.100 3rd Qu.: 1323
## Max. : 7.700 Max. :265266
write.csv(summary(increases_100), file = "summary_output2.csv")
#Increase 50
summary(increases_50)
## GEOID County State value2021
## Length:2542 Length:2542 Length:2542 Min. : 43700
## Class :character Class :character Class :character 1st Qu.: 104300
## Mode :character Mode :character Mode :character Median : 137350
## Mean : 166243
## 3rd Qu.: 190775
## Max. :1118300
##
## income2021 white2021 totalhouse2021 unemployment2021
## Min. : 12856 Min. : 0.00 Min. : 256 Min. : 0.000
## 1st Qu.: 47234 1st Qu.: 63.20 1st Qu.: 6544 1st Qu.: 3.800
## Median : 55000 Median : 82.70 Median : 13928 Median : 5.050
## Mean : 56855 Mean : 74.69 Mean : 45925 Mean : 5.517
## 3rd Qu.: 63790 3rd Qu.: 91.60 3rd Qu.: 33580 3rd Qu.: 6.500
## Max. :156821 Max. :100.00 Max. :3578801 Max. :34.100
##
## bachelor2021 value2010 income2010 value_change
## Min. : 40 Min. : 32800 Min. : 10932 Min. : 0
## 1st Qu.: 1566 1st Qu.: 82725 1st Qu.: 36512 1st Qu.: 17425
## Median : 4040 Median : 109150 Median : 42155 Median : 26900
## Mean : 24592 Mean : 134304 Mean : 43548 Mean : 31939
## 3rd Qu.: 12678 3rd Qu.: 153875 3rd Qu.: 48646 3rd Qu.: 39700
## Max. :2356572 Max. :1000001 Max. :115574 Max. :279200
## NA's :42
## value_change_per income_change income_change_per firm2020
## Min. : 0.00 Min. :-28945 Min. :-38.77 Min. : 5.0
## 1st Qu.:15.62 1st Qu.: 9942 1st Qu.: 24.57 1st Qu.: 217.8
## Median :25.01 Median : 13122 Median : 30.59 Median : 513.5
## Mean :25.28 Mean : 13307 Mean : 30.86 Mean : 1992.6
## 3rd Qu.:34.61 3rd Qu.: 16478 3rd Qu.: 37.27 3rd Qu.: 1356.5
## Max. :50.00 Max. : 42633 Max. :109.41 Max. :207759.0
## NA's :42
## emp2020 firm2010 emp2010 white_change
## Min. : 15 Min. : 8.0 Min. : 15 Min. :-18.200
## 1st Qu.: 2747 1st Qu.: 234.8 1st Qu.: 2690 1st Qu.: -4.600
## Median : 7722 Median : 539.5 Median : 7380 Median : -2.900
## Mean : 43009 Mean : 1895.9 Mean : 36758 Mean : -3.305
## 3rd Qu.: 23415 3rd Qu.: 1376.0 3rd Qu.: 21599 3rd Qu.: -1.800
## Max. :4125775 Max. :175974.0 Max. :3553071 Max. : 17.900
## NA's :42 NA's :42 NA's :42
## unemployment_change totalhouse_change
## Min. :-19.200 Min. :-33121
## 1st Qu.: -4.000 1st Qu.: -223
## Median : -2.400 Median : 96
## Mean : -2.505 Mean : 2709
## 3rd Qu.: -1.000 3rd Qu.: 1404
## Max. : 20.300 Max. :198083
##
write.csv(summary(increases_50), file = "summary_output3.csv")
#Decrease
summary(decreases)
## GEOID County State value2021
## Length:158 Length:158 Length:158 Min. : 22600
## Class :character Class :character Class :character 1st Qu.: 86025
## Mode :character Mode :character Mode :character Median :121750
## Mean :165703
## 3rd Qu.:231000
## Max. :479500
##
## income2021 white2021 totalhouse2021 unemployment2021
## Min. : 15375 Min. : 0.00 Min. : 136 Min. : 1.300
## 1st Qu.: 32333 1st Qu.:10.80 1st Qu.: 6203 1st Qu.: 5.300
## Median : 44034 Median :57.75 Median : 13962 Median : 7.400
## Mean : 51497 Mean :48.34 Mean : 38874 Mean : 8.791
## 3rd Qu.: 67130 3rd Qu.:76.83 3rd Qu.: 41227 3rd Qu.:10.100
## Max. :123727 Max. :96.70 Max. :377249 Max. :25.300
##
## bachelor2021 value2010 income2010 value_change
## Min. : 42.0 Min. : 49500 Min. : 11943 Min. :-302400
## 1st Qu.: 822.8 1st Qu.: 92075 1st Qu.: 28003 1st Qu.: -13900
## Median : 2880.0 Median :133200 Median : 37353 Median : -7000
## Mean : 24091.2 Mean :179092 Mean : 42104 Mean : -13389
## 3rd Qu.: 22003.5 3rd Qu.:244800 3rd Qu.: 54900 3rd Qu.: -2200
## Max. :322329.0 Max. :493000 Max. :100980 Max. : -200
## NA's :36
## value_change_per income_change income_change_per firm2020
## Min. :-93.04615 Min. :-19438 Min. :-31.57 Min. : 9
## 1st Qu.: -8.50373 1st Qu.: 3938 1st Qu.: 14.05 1st Qu.: 130
## Median : -4.67446 Median : 8569 Median : 21.60 Median : 391
## Mean : -7.20364 Mean : 9392 Mean : 22.73 Mean : 1847
## 3rd Qu.: -2.09131 3rd Qu.: 14570 3rd Qu.: 28.55 3rd Qu.: 1763
## Max. : -0.04213 Max. : 32522 Max. : 78.79 Max. :19784
## NA's :37
## emp2020 firm2010 emp2010 white_change
## Min. : 38 Min. : 0 Min. : 0 Min. :-16.600
## 1st Qu.: 1545 1st Qu.: 140 1st Qu.: 1668 1st Qu.: -5.375
## Median : 5643 Median : 412 Median : 5318 Median : -3.000
## Mean : 36335 Mean : 1868 Mean : 32808 Mean : -3.446
## 3rd Qu.: 28151 3rd Qu.: 1827 3rd Qu.: 27970 3rd Qu.: -0.500
## Max. :433388 Max. :20464 Max. :392604 Max. : 9.300
## NA's :37 NA's :36 NA's :36
## unemployment_change totalhouse_change
## Min. :-25.900 Min. :-3276.0
## 1st Qu.: -4.200 1st Qu.: -117.0
## Median : -1.600 Median : 268.5
## Mean : -2.166 Mean : 1329.9
## 3rd Qu.: -0.300 3rd Qu.: 1258.2
## Max. : 15.500 Max. :19117.0
##
write.csv(summary(decreases), file = "summary_output4.csv")
#Filter housing over 1 mil
mil <- job_joined2 %>% filter(value2021 >= 1000000)
summary(mil)
## GEOID County State value2021
## Length:6 Length:6 Length:6 Min. :1008500
## Class :character Class :character Class :character 1st Qu.:1091750
## Mode :character Mode :character Mode :character Median :1126350
## Mean :1127417
## 3rd Qu.:1179475
## Max. :1225900
## income2021 white2021 totalhouse2021 unemployment2021
## Min. : 93956 Min. :29.90 Min. : 12138 Min. :4.500
## 1st Qu.:118975 1st Qu.:38.15 1st Qu.:154320 1st Qu.:4.750
## Median :128598 Median :42.95 Median :343645 Median :5.250
## Mean :124136 Mean :49.17 Mean :400254 Mean :5.700
## 3rd Qu.:135380 3rd Qu.:63.95 3rd Qu.:613812 3rd Qu.:6.275
## Max. :140258 Max. :71.70 Max. :907018 Max. :8.000
## bachelor2021 value2010 income2010 value_change
## Min. : 5179 Min. : 701000 Min. :64971 Min. : 82899
## 1st Qu.:159280 1st Qu.: 784900 1st Qu.:74315 1st Qu.:200050
## Median :349726 Median : 805200 Median :84498 Median :329800
## Mean :392595 Mean : 827367 Mean :80231 Mean :300050
## 3rd Qu.:652976 3rd Qu.: 857300 3rd Qu.:86550 3rd Qu.:427375
## Max. :801081 Max. :1000001 Max. :89268 Max. :441100
## value_change_per income_change income_change_per firm2020
## Min. : 8.29 Min. :28985 Min. :39.86 Min. : 811
## 1st Qu.:23.87 1st Qu.:35353 1st Qu.:45.15 1st Qu.:10226
## Median :40.48 Median :46464 Median :53.26 Median :21872
## Mean :38.25 Mean :43905 Mean :54.91 Mean :27581
## 3rd Qu.:55.19 3rd Qu.:52853 3rd Qu.:61.06 3rd Qu.:34357
## Max. :61.83 Max. :54883 Max. :76.97 Max. :76069
## emp2020 firm2010 emp2010 white_change
## Min. : 4845 Min. : 625 Min. : 3534 Min. :-11.200
## 1st Qu.: 184283 1st Qu.: 9591 1st Qu.: 149588 1st Qu.: -6.550
## Median : 601265 Median :19291 Median : 400478 Median : -5.300
## Mean : 809022 Mean :26423 Mean : 619552 Mean : -5.417
## 3rd Qu.:1008899 3rd Qu.:30430 3rd Qu.: 751529 3rd Qu.: -3.300
## Max. :2456689 Max. :78829 Max. :1978316 Max. : -1.100
## unemployment_change totalhouse_change
## Min. :-3.3000 Min. : 812
## 1st Qu.:-1.8750 1st Qu.: 3913
## Median :-1.5500 Median :22346
## Mean :-0.4833 Mean :28622
## 3rd Qu.:-0.6250 3rd Qu.:50929
## Max. : 5.8000 Max. :68005
#House 500 - 1mil
fivetomil <- job_joined2 %>% filter(value2021 >=500000 & value2021 < 1000000)
fivetomil %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
summary(fivetomil)
## GEOID County State value2021
## Length:49 Length:49 Length:49 Min. :507400
## Class :character Class :character Class :character 1st Qu.:572600
## Mode :character Mode :character Mode :character Median :625200
## Mean :641202
## 3rd Qu.:685500
## Max. :945300
## income2021 white2021 totalhouse2021 unemployment2021
## Min. : 67228 Min. :17.70 Min. : 3158 Min. :1.900
## 1st Qu.: 84356 1st Qu.:42.30 1st Qu.: 30147 1st Qu.:3.600
## Median : 93547 Median :57.10 Median : 139302 Median :4.800
## Mean : 98294 Mean :57.08 Mean : 328285 Mean :4.731
## 3rd Qu.:111790 3rd Qu.:72.40 3rd Qu.: 386583 3rd Qu.:5.700
## Max. :156821 Max. :92.10 Max. :3578801 Max. :7.200
## bachelor2021 value2010 income2010 value_change
## Min. : 1843 Min. :338700 Min. : 43567 Min. : 3000
## 1st Qu.: 15704 1st Qu.:461700 1st Qu.: 61854 1st Qu.: 77000
## Median : 117251 Median :503900 Median : 68750 Median :122200
## Mean : 258683 Mean :511741 Mean : 71878 Mean :129461
## 3rd Qu.: 351129 3rd Qu.:568700 3rd Qu.: 78958 3rd Qu.:177800
## Max. :2356572 Max. :723700 Max. :115574 Max. :279200
## value_change_per income_change income_change_per firm2020
## Min. : 0.5387 Min. : 8835 Min. :15.13 Min. : 249
## 1st Qu.:14.9697 1st Qu.:22507 1st Qu.:32.11 1st Qu.: 1722
## Median :25.8502 Median :25171 Median :37.25 Median : 7174
## Mean :26.5528 Mean :26416 Mean :37.48 Mean : 17998
## 3rd Qu.:32.2022 3rd Qu.:30840 3rd Qu.:43.84 3rd Qu.: 18601
## Max. :62.9493 Max. :42633 Max. :61.45 Max. :207759
## emp2020 firm2010 emp2010 white_change
## Min. : 1372 Min. : 227 Min. : 1045 Min. :-10.200
## 1st Qu.: 27523 1st Qu.: 1560 1st Qu.: 23403 1st Qu.: -7.100
## Median : 123815 Median : 6340 Median : 99143 Median : -5.400
## Mean : 372244 Mean : 15904 Mean : 307710 Mean : -5.243
## 3rd Qu.: 469802 3rd Qu.: 16598 3rd Qu.: 403049 3rd Qu.: -3.600
## Max. :4125775 Max. :175974 Max. :3553071 Max. : 3.300
## unemployment_change totalhouse_change
## Min. :-5.000 Min. : 596
## 1st Qu.:-2.300 1st Qu.: 1107
## Median :-1.500 Median : 7104
## Mean :-1.647 Mean : 21996
## 3rd Qu.:-1.000 3rd Qu.: 30181
## Max. : 1.300 Max. :153065
#100000 - 500000
onetofive <- job_joined2 %>% filter(value2021 >=100000 & value2021 <500000)
onetofive %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
summary(onetofive)
## GEOID County State value2021
## Length:2498 Length:2498 Length:2498 Min. :100000
## Class :character Class :character Class :character 1st Qu.:123900
## Mode :character Mode :character Mode :character Median :156650
## Mean :179036
## 3rd Qu.:211375
## Max. :497500
##
## income2021 white2021 totalhouse2021 unemployment2021
## Min. : 14666 Min. : 0.00 Min. : 245 Min. : 0.00
## 1st Qu.: 51387 1st Qu.: 66.72 1st Qu.: 7146 1st Qu.: 3.60
## Median : 58239 Median : 83.10 Median : 16026 Median : 4.80
## Mean : 60015 Mean : 76.13 Mean : 46980 Mean : 5.11
## 3rd Qu.: 66650 3rd Qu.: 91.30 3rd Qu.: 38836 3rd Qu.: 6.10
## Max. :129549 Max. :100.00 Max. :2253936 Max. :25.60
##
## bachelor2021 value2010 income2010 value_change
## Min. : 40 Min. : 45200 Min. : 12927 Min. :-91900
## 1st Qu.: 1858 1st Qu.: 93000 1st Qu.: 38832 1st Qu.: 22700
## Median : 4879 Median :118900 Median : 43955 Median : 34700
## Mean : 24256 Mean :139358 Mean : 45617 Mean : 39626
## 3rd Qu.: 15646 3rd Qu.:163225 3rd Qu.: 50557 3rd Qu.: 51000
## Max. :1489931 Max. :493000 Max. :103643 Max. :222800
## NA's :39 NA's :2 NA's :2 NA's :2
## value_change_per income_change income_change_per firm2020
## Min. :-36.87 Min. :-28945 Min. :-38.77 Min. : 5
## 1st Qu.: 17.44 1st Qu.: 10894 1st Qu.: 25.30 1st Qu.: 260
## Median : 29.33 Median : 14184 Median : 31.25 Median : 599
## Mean : 33.22 Mean : 14383 Mean : 31.99 Mean : 1973
## 3rd Qu.: 44.17 3rd Qu.: 17492 3rd Qu.: 38.24 3rd Qu.: 1559
## Max. :257.27 Max. : 49962 Max. :149.89 Max. :96009
## NA's :2 NA's :2 NA's :2 NA's :41
## emp2020 firm2010 emp2010 white_change
## Min. : 15 Min. : 6 Min. : 12 Min. :-30.100
## 1st Qu.: 3252 1st Qu.: 267 1st Qu.: 2987 1st Qu.: -4.900
## Median : 9101 Median : 619 Median : 8728 Median : -3.200
## Mean : 43134 Mean : 1869 Mean : 36517 Mean : -3.576
## 3rd Qu.: 28179 3rd Qu.: 1544 3rd Qu.: 25305 3rd Qu.: -2.000
## Max. :2526838 Max. :90750 Max. :2173540 Max. : 17.500
## NA's :41 NA's :41 NA's :41 NA's :2
## unemployment_change totalhouse_change
## Min. :-19.200 Min. :-33121
## 1st Qu.: -3.700 1st Qu.: -98
## Median : -2.200 Median : 287
## Mean : -2.296 Mean : 3433
## 3rd Qu.: -0.900 3rd Qu.: 2036
## Max. : 15.500 Max. :265266
## NA's :2 NA's :2
#less than 100000
lessthanone <- job_joined2 %>% filter(value2021 < 100000)
lessthanone %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
summary(lessthanone)
## GEOID County State value2021
## Length:665 Length:665 Length:665 Min. :22600
## Class :character Class :character Class :character 1st Qu.:77400
## Mode :character Mode :character Mode :character Median :85500
## Mean :83715
## 3rd Qu.:93200
## Max. :99900
##
## income2021 white2021 totalhouse2021 unemployment2021
## Min. :12856 Min. : 0.00 Min. : 136 Min. : 0.000
## 1st Qu.:37720 1st Qu.: 44.00 1st Qu.: 2815 1st Qu.: 3.900
## Median :43942 Median : 70.00 Median : 5231 Median : 5.900
## Mean :43599 Mean : 64.65 Mean : 8055 Mean : 6.892
## 3rd Qu.:51351 3rd Qu.: 92.00 3rd Qu.: 9544 3rd Qu.: 8.500
## Max. :81354 Max. :100.00 Max. :290393 Max. :34.100
##
## bachelor2021 value2010 income2010 value_change
## Min. : 42 Min. : 29700 Min. :10932 Min. :-302400
## 1st Qu.: 567 1st Qu.: 60500 1st Qu.:29870 1st Qu.: 8400
## Median : 1050 Median : 69500 Median :34055 Median : 14800
## Mean : 1808 Mean : 69636 Mean :33609 Mean : 14173
## 3rd Qu.: 1874 3rd Qu.: 77500 3rd Qu.:39183 3rd Qu.: 21450
## Max. :95429 Max. :325000 Max. :61563 Max. : 66500
## NA's :39 NA's :2 NA's :2 NA's :2
## value_change_per income_change income_change_per firm2020
## Min. :-93.05 Min. :-19438 Min. :-31.57 Min. : 9.0
## 1st Qu.: 11.81 1st Qu.: 6085 1st Qu.: 19.66 1st Qu.: 87.0
## Median : 21.54 Median : 10051 Median : 29.50 Median : 158.0
## Mean : 23.63 Mean : 10015 Mean : 30.53 Mean : 248.6
## 3rd Qu.: 32.75 3rd Qu.: 13346 3rd Qu.: 39.40 3rd Qu.: 295.0
## Max. :211.78 Max. : 37163 Max. :141.61 Max. :9058.0
## NA's :2 NA's :2 NA's :2 NA's :40
## emp2020 firm2010 emp2010 white_change
## Min. : 24 Min. : 0.0 Min. : 0.0 Min. :-18.200
## 1st Qu.: 893 1st Qu.: 95.0 1st Qu.: 881.8 1st Qu.: -4.000
## Median : 1868 Median : 174.0 Median : 2018.0 Median : -2.400
## Mean : 4026 Mean : 272.2 Mean : 4015.5 Mean : -2.925
## 3rd Qu.: 4476 3rd Qu.: 321.8 3rd Qu.: 4546.0 3rd Qu.: -1.200
## Max. :216880 Max. :8235.0 Max. :171284.0 Max. : 17.900
## NA's :40 NA's :39 NA's :39 NA's :2
## unemployment_change totalhouse_change
## Min. :-25.900 Min. :-3411.0
## 1st Qu.: -4.700 1st Qu.: -449.5
## Median : -2.000 Median : -241.0
## Mean : -2.447 Mean : -192.1
## 3rd Qu.: 0.100 3rd Qu.: -82.5
## Max. : 20.300 Max. :47183.0
## NA's :2 NA's :2
#region value
region_value <- get_acs(
geography = "region",
variables = "B25077_001",
year = 2021,
survey = "acs5"
)
## Getting data from the 2017-2021 5-year ACS
region_value1 <- get_acs(
geography = "region",
variables = "B25077_001",
year = 2010,
survey = "acs5"
)
## Getting data from the 2006-2010 5-year ACS
#region change
region_change <- left_join(region_value, region_value1, by="GEOID") %>% select("GEOID", "NAME.x", "estimate.x", "estimate.y") %>% mutate(percent = (estimate.x - estimate.y)/estimate.y*100)