#Load all the data at the same time

library("tidycensus")
library("tidyverse")
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1     ✔ purrr   1.0.1
## ✔ tibble  3.1.8     ✔ dplyr   1.1.0
## ✔ tidyr   1.3.0     ✔ stringr 1.5.0
## ✔ readr   2.1.4     ✔ forcats 1.0.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
profile <- load_variables(2021, "acs5/profile")

profile2010 <- load_variables(2010, "acs5/profile")

vars <- load_variables(2021, "acs5")
#Variables:
#DP04_008: Estimate!!VALUE!!Owner-occupied units!!Median (dollars)
#DP03_0062: Estimate!!INCOME AND BENEFITS (IN 2021 INFLATION-ADJUSTED DOLLARS)!!Total households!!Median household income (dollars)
#DP05_0077P: Percent!!HISPANIC OR LATINO AND RACE!!Total population!!Not Hispanic or Latino!!White alone
#DP04_0091: Estimate!!MORTGAGE STATUS!!Owner-occupied units!!Housing units with a mortgage
#DP04_0134: Estimate!!GROSS RENT!!Occupied units paying rent!!Median (dollars)
#DP05_0086: Estimate!!Total housing units
#DP03_0009P: Percent!!EMPLOYMENT STATUS!!Civilian labor force!!Unemployment Rate
#DP03_0025: Estimate!!COMMUTING TO WORK!!Workers 16 years and over!!Mean travel time to work (minutes)
#DP02_0068: Estimate!!EDUCATIONAL ATTAINMENT!!Population 25 years and over!!Bachelor's degree or higher
county2021 <- get_acs(
    geography = "county",
    variables = c(median_value = "DP04_0089",
                  median_income = "DP03_0062",
                  percent_white = "DP05_0077P",
                  mortgage = "DP04_0091",
                  gross_rent = "DP04_0134",
                  total_house_unit = "DP05_0086",
                  unemployment = "DP03_0009P",
                  travel_time = "DP03_0025",
                  bachelor = "DP02_0068"
                  ),
    year = 2021,
    output = "wide"
  )
## Getting data from the 2017-2021 5-year ACS
## Warning: • You have not set a Census API key. Users without a key are limited to 500
## queries per day and may experience performance limitations.
## ℹ For best results, get a Census API key at
## http://api.census.gov/data/key_signup.html and then supply the key to the
## `census_api_key()` function to use it throughout your tidycensus session.
## This warning is displayed once per session.
## Using the ACS Data Profile
#2010
#Variable
#DP04_0088: Estimate!!VALUE!!Median (dollars) ##This variable is different from 2021
#DP03_0062: Estimate!!INCOME AND BENEFITS (IN 2010 INFLATION-ADJUSTED DOLLARS)!!Median household income (dollars)
#DP05_0072P: Percent!!HISPANIC OR LATINO AND RACE!!Not Hispanic or Latino!!White alone!!White alone
#DP04_0092: Estimate!!SELECTED MONTHLY OWNER COSTS (SMOC)!!Housing units with a mortgage
#DP04_0134: Estimate!!GROSS RENT AS A PERCENTAGE OF HOUSEHOLD INCOME (GRAPI)!!Occupied units paying rent (excluding units where GRAPI cannot be computed)
#DP05_0081: Estimate!!Total housing units
#DP03_0009P: Percent!!EMPLOYMENT STATUS!!Percent Unemployed
#DP03_0025: Estimate!!COMMUTING TO WORK!!Mean travel time to work (minutes)
#DP02_0067: Estimate!!EDUCATIONAL ATTAINMENT!!Percent bachelor's degree or higher

county2010 <- get_acs(
    geography = "county",
    variables = c(median_value = "DP04_0088", #diff var
                  median_income = "DP03_0062", 
                  percent_white = "DP05_0072P", #diff var
                  mortgage = "DP04_0092", #diff var
                  gross_rent = "DP04_0134",
                  total_house_unit = "DP05_0081", #diff var
                  unemployment = "DP03_0009P",
                  travel_time = "DP03_0025",
                  bachelor = "DP02_0067" #different variable
                  ),
    year = 2010,
    output = "wide"
  )
## Getting data from the 2006-2010 5-year ACS
## Using the ACS Data Profile

#Clean tables (drop margin of error columns)

county2021_clean <- county2021 %>% select(-"median_valueM", -"median_incomeM", -"percent_whiteM", -"mortgageM", -"gross_rentM", -"total_house_unitM", -"unemploymentM", -"travel_timeM", -"bachelorM") %>% separate(NAME, into = c("County", "State"), sep = ", ")


county2010_clean <- county2010 %>% select(-"median_valueM", -"median_incomeM", -"percent_whiteM", -"mortgageM", -"gross_rentM", -"total_house_unitM", -"unemploymentM", -"travel_timeM", -"bachelorM") %>% separate(NAME, into = c("County", "State"), sep = ", ")

#Compare the changes, join data

join_table <- left_join(county2021_clean, county2010_clean, by = "GEOID") %>% select(-"County.y", -"State.y") %>% rename("County" = "County.x", "State" = "State.x", "value2021" = "median_valueE.x", "value2010" = "median_valueE.y", "income2021" = "median_incomeE.x", "income2010" = "median_incomeE.y", "white2021" = "percent_whiteE.x", "white2010" = "percent_whiteE.y", "mortgage2021" = "mortgageE.x", "mortgage2010" = "mortgageE.y", "rent2021" = "gross_rentE.x", "rent2010" = "gross_rentE.y", "totalhouse2021" = "total_house_unitE.x", "totalhouse2010" = "total_house_unitE.y", "unemployment2021" = "unemploymentE.x", "unemployment2010" = "unemploymentE.y", "travel2021" = "travel_timeE.x", "travel2010" = "travel_timeE.y", "bachelor2021" = "bachelorE.x", "bachelor2010" = "bachelorE.y") %>% mutate(value_change = (value2021 - value2010), value_change_per = (value2021 - value2010)/value2010*100, income_change = (income2021 - income2010), income_change_per = (income2021 - income2010)/income2010*100) %>% arrange(desc(value_change_per))

#Divide increases and decreases

increases <- join_table %>% filter(value_change >= 0) %>% arrange(desc("value_change_per"))

decreases <- join_table %>% filter(value_change < 0) %>% arrange(desc("value_change_per"))

#Read the job data

library(readr)

job <- read_csv("/Users/hanvuhavu/Downloads/housing_price/BDSTIMESERIES.BDSGEO_2023-02-23T131702/BDSTIMESERIES.BDSGEO-Data.csv")
## New names:
## Rows: 2702121 Columns: 56
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (55): GEO_ID, NAME, NAICS, NAICS_LABEL, METRO, METRO_LABEL, YEAR, FIRM, ... lgl
## (1): ...56
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...56`
#filter data for 2010 - 2020 period

job1 <- job %>% filter(YEAR >= 2010 & YEAR <=2020)

#Drop all flag column
job1 <- job1 %>% select(-contains("_F"))

#select the relevant variables: GEO_ID, NAME, NAICS, NAICS_LABEL, YEAR, FIRM, EMP, JOB_CREATION, JOB_CREATION_RATE, NET_JOB_CREATION, NET_JOB_CREATION_RATE

job1 <-job1 %>% select("GEO_ID", "NAME", "NAICS", "NAICS_LABEL", "YEAR", "FIRM", "EMP", "JOB_CREATION", "JOB_CREATION_RATE", "NET_JOB_CREATION", "NET_JOB_CREATION_RATE")

#Filter job in 2020
job2020 <- job1 %>% filter(YEAR == 2020)

job2010 <- job1 %>% filter(YEAR == 2010)

#Filter total for all sectors
job2020_allsector <- job2020 %>% filter(NAICS_LABEL == "Total for all sectors") %>% select(-"NAICS", -"NAICS_LABEL", -"YEAR") %>% separate(NAME, into = c("County", "State"), sep = ", ")

job2010_allsector <- job2010 %>% filter(NAICS_LABEL == "Total for all sectors") %>% select(-"NAICS", -"NAICS_LABEL", -"YEAR") %>% separate(NAME, into = c("County", "State"), sep = ", ")

#Substract the last 5 character of GEO_ID column

library(dplyr)

# select just the last five characters of all values in GEOID column
job2020_allsector <- job2020_allsector %>% mutate(GEOID = substr(GEO_ID, start = nchar(GEO_ID) - 4, stop = nchar(GEO_ID))) %>% select(-"GEO_ID") #drop the GEO_ID column

job2010_allsector <- job2010_allsector %>% mutate(GEOID = substr(GEO_ID, start = nchar(GEO_ID) - 4, stop = nchar(GEO_ID))) %>% select(-"GEO_ID") #drop the GEO_ID column

#Join job 2010 and 2020
joined_job1 <- left_join(job2020_allsector, job2010_allsector, by = "GEOID") %>% select("GEOID", "County.x", "State.x", "FIRM.x", "EMP.x", "JOB_CREATION.x", "FIRM.y", "EMP.y", "JOB_CREATION.y")

#Join job with housing price table

job_joined <- left_join(join_table, joined_job1, by = "GEOID") %>% select(-"County.x", -"State.x") %>% select(-"JOB_CREATION.x", -"JOB_CREATION.y")

#calculate all the changes

job_joined2 <- job_joined %>% rename("firm2020" = "FIRM.x", "firm2010" = "FIRM.y", "emp2020" = "EMP.x", "emp2010" = "EMP.y") %>%  mutate(white_change = (white2021 - white2010), unemployment_change = (unemployment2021 - unemployment2010), totalhouse_change = (totalhouse2021 - totalhouse2010)) %>% select(-"mortgage2021", -"mortgage2010", -"bachelor2010", -"rent2021", -"rent2010", -"totalhouse2010", -"unemployment2010", -"white2010", -"travel2021", -"travel2010") 

#Handle NA

job_joined2$firm2020 <- as.numeric(ifelse(job_joined2$firm2020 == "NA", NA, job_joined2$firm2020))
## Warning: NAs introduced by coercion
job_joined2$emp2020 <- as.numeric(ifelse(job_joined2$emp2020 == "NA", NA, job_joined2$emp2020))
## Warning: NAs introduced by coercion
job_joined2$firm2010 <- as.numeric(ifelse(job_joined2$firm2010 == "NA", NA, job_joined2$firm2010))
## Warning: NAs introduced by coercion
job_joined2$emp2010 <- as.numeric(ifelse(job_joined2$emp2010 == "NA", NA, job_joined2$emp2010))
## Warning: NAs introduced by coercion

#summary dataset

summary(job_joined2)
##     GEOID              County             State             value2021      
##  Length:3221        Length:3221        Length:3221        Min.   :  22600  
##  Class :character   Class :character   Class :character   1st Qu.: 105300  
##  Mode  :character   Mode  :character   Mode  :character   Median : 139550  
##                                                           Mean   : 168143  
##                                                           3rd Qu.: 193650  
##                                                           Max.   :1225900  
##                                                           NA's   :3        
##    income2021       white2021      totalhouse2021    unemployment2021
##  Min.   : 12856   Min.   :  0.00   Min.   :     50   Min.   : 0.000  
##  1st Qu.: 47300   1st Qu.: 61.70   1st Qu.:   5340   1st Qu.: 3.700  
##  Median : 55652   Median : 81.70   Median :  12300   Median : 4.900  
##  Mean   : 57327   Mean   : 73.39   Mean   :  43838   Mean   : 5.468  
##  3rd Qu.: 64610   3rd Qu.: 91.40   3rd Qu.:  30832   3rd Qu.: 6.500  
##  Max.   :156821   Max.   :100.00   Max.   :3578801   Max.   :34.100  
##  NA's   :1                                                           
##   bachelor2021       value2010         income2010      value_change    
##  Min.   :      0   Min.   :  29700   Min.   : 10932   Min.   :-302400  
##  1st Qu.:   1250   1st Qu.:  80675   1st Qu.: 36464   1st Qu.:  17600  
##  Median :   3382   Median : 105400   Median : 42240   Median :  29400  
##  Mean   :  24120   Mean   : 131898   Mean   : 43619   Mean   :  36232  
##  3rd Qu.:  11126   3rd Qu.: 151525   3rd Qu.: 48899   3rd Qu.:  46900  
##  Max.   :2356572   Max.   :1000001   Max.   :115574   Max.   : 441100  
##  NA's   :78        NA's   :5         NA's   :4        NA's   :7        
##  value_change_per income_change    income_change_per    firm2020     
##  Min.   :-93.05   Min.   :-28945   Min.   :-38.77    Min.   :     5  
##  1st Qu.: 15.81   1st Qu.: 10000   1st Qu.: 24.34    1st Qu.:   186  
##  Median : 27.68   Median : 13517   Median : 31.12    Median :   447  
##  Mean   : 31.15   Mean   : 13722   Mean   : 31.82    Mean   :  1927  
##  3rd Qu.: 41.63   3rd Qu.: 17140   3rd Qu.: 38.68    3rd Qu.:  1246  
##  Max.   :257.27   Max.   : 54883   Max.   :149.89    Max.   :207759  
##  NA's   :7        NA's   :5        NA's   :5         NA's   :82      
##     emp2020           firm2010         emp2010         white_change    
##  Min.   :     15   Min.   :     0   Min.   :      0   Min.   :-36.700  
##  1st Qu.:   2101   1st Qu.:   196   1st Qu.:   2146   1st Qu.: -4.800  
##  Median :   6561   Median :   464   Median :   6201   Median : -3.000  
##  Mean   :  41921   Mean   :  1816   Mean   :  35371   Mean   : -3.476  
##  3rd Qu.:  21122   3rd Qu.:  1227   3rd Qu.:  19029   3rd Qu.: -1.900  
##  Max.   :4125775   Max.   :175974   Max.   :3553071   Max.   : 20.800  
##  NA's   :82        NA's   :82       NA's   :82        NA's   :4        
##  unemployment_change totalhouse_change
##  Min.   :-25.900     Min.   :-33121   
##  1st Qu.: -3.800     1st Qu.:  -193   
##  Median : -2.200     Median :    86   
##  Mean   : -2.311     Mean   :  3013   
##  3rd Qu.: -0.700     3rd Qu.:  1344   
##  Max.   : 20.300     Max.   :265266   
##  NA's   :4           NA's   :4

#Divide increases and decreases

increases <- job_joined2 %>% filter(value_change >= 0) %>% arrange(desc("value_change_per"))

decreases <- job_joined2 %>% filter(value_change < 0) %>% arrange(desc("value_change_per"))
decreases %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))

#Increase 0 - 50%

increases_50 <- increases %>% filter(value_change_per <= 50)  

increases_50 %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))

#Increase 50 - 100%

increases_100 <- increases %>% filter(value_change_per > 50, value_change_per <=100)

increases_100 %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))

#Increase over 100%

increase_most <- increases %>% filter(value_change_per > 100)

increase_most %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))

#summary

summary(increase_most)
##     GEOID              County             State             value2021     
##  Length:53          Length:53          Length:53          Min.   : 83000  
##  Class :character   Class :character   Class :character   1st Qu.:113100  
##  Mode  :character   Mode  :character   Mode  :character   Median :155100  
##                                                           Mean   :160574  
##                                                           3rd Qu.:192500  
##                                                           Max.   :309400  
##    income2021      white2021      totalhouse2021  unemployment2021
##  Min.   :44083   Min.   : 12.20   Min.   :  245   Min.   :0.000   
##  1st Qu.:56806   1st Qu.: 46.60   1st Qu.: 1244   1st Qu.:1.500   
##  Median :61042   Median : 84.80   Median : 2387   Median :2.500   
##  Mean   :64352   Mean   : 70.57   Mean   : 6540   Mean   :2.885   
##  3rd Qu.:71250   3rd Qu.: 92.40   3rd Qu.: 5784   3rd Qu.:4.200   
##  Max.   :97802   Max.   :100.00   Max.   :68662   Max.   :7.200   
##   bachelor2021     value2010        income2010     value_change   
##  Min.   :   90   Min.   : 31400   Min.   :24388   Min.   : 43400  
##  1st Qu.:  290   1st Qu.: 50900   1st Qu.:38404   1st Qu.: 64000  
##  Median :  549   Median : 69000   Median :42422   Median : 83800  
##  Mean   : 1999   Mean   : 70991   Mean   :43926   Mean   : 89583  
##  3rd Qu.: 1308   3rd Qu.: 86600   3rd Qu.:49234   3rd Qu.:108200  
##  Max.   :29891   Max.   :124300   Max.   :62308   Max.   :222800  
##  value_change_per income_change   income_change_per    firm2020   
##  Min.   :100.1    Min.   :-7292   Min.   :-11.73    Min.   :   5  
##  1st Qu.:107.0    1st Qu.:16513   1st Qu.: 38.15    1st Qu.:  60  
##  Median :116.4    Median :20797   Median : 45.68    Median : 122  
##  Mean   :127.6    Mean   :20426   Mean   : 48.87    Mean   : 349  
##  3rd Qu.:129.4    3rd Qu.:25099   3rd Qu.: 61.85    3rd Qu.: 310  
##  Max.   :257.3    Max.   :47002   Max.   :141.61    Max.   :4630  
##     emp2020          firm2010         emp2010       white_change    
##  Min.   :    35   Min.   :   6.0   Min.   :   20   Min.   :-15.600  
##  1st Qu.:   487   1st Qu.:  61.0   1st Qu.:  453   1st Qu.: -7.600  
##  Median :  1392   Median : 129.0   Median : 1040   Median : -4.400  
##  Mean   :  6080   Mean   : 294.8   Mean   : 4118   Mean   : -4.794  
##  3rd Qu.:  4687   3rd Qu.: 241.0   3rd Qu.: 3184   3rd Qu.: -2.600  
##  Max.   :101102   Max.   :3716.0   Max.   :58599   Max.   : 12.000  
##  unemployment_change totalhouse_change
##  Min.   :-13.800     Min.   : -699    
##  1st Qu.: -2.100     1st Qu.:  -62    
##  Median : -1.100     Median :   39    
##  Mean   : -1.285     Mean   : 1047    
##  3rd Qu.:  0.400     3rd Qu.:  279    
##  Max.   :  4.900     Max.   :15565
write.csv(summary(increase_most), file = "summary_output1.csv")

#Increase 100

summary(increases_100)
##     GEOID              County             State             value2021      
##  Length:461         Length:461         Length:461         Min.   :  52700  
##  Class :character   Class :character   Class :character   1st Qu.: 117100  
##  Mode  :character   Mode  :character   Mode  :character   Median : 150900  
##                                                           Mean   : 180501  
##                                                           3rd Qu.: 208200  
##                                                           Max.   :1225900  
##    income2021       white2021     totalhouse2021    unemployment2021
##  Min.   : 25049   Min.   : 3.40   Min.   :    357   Min.   : 0.000  
##  1st Qu.: 52400   1st Qu.:64.60   1st Qu.:   2610   1st Qu.: 2.700  
##  Median : 58932   Median :82.50   Median :   5758   Median : 4.000  
##  Mean   : 61111   Mean   :75.68   Mean   :  38957   Mean   : 4.336  
##  3rd Qu.: 67614   3rd Qu.:91.40   3rd Qu.:  16845   3rd Qu.: 5.400  
##  Max.   :140258   Max.   :98.00   Max.   :1821640   Max.   :16.200  
##   bachelor2021      value2010        income2010     value_change   
##  Min.   :    86   Min.   : 29700   Min.   :21707   Min.   : 20500  
##  1st Qu.:   776   1st Qu.: 71500   1st Qu.:37522   1st Qu.: 45400  
##  Median :  1721   Median : 88700   Median :43072   Median : 59600  
##  Mean   : 24473   Mean   :109728   Mean   :44397   Mean   : 70773  
##  3rd Qu.:  5927   3rd Qu.:124100   3rd Qu.:48699   3rd Qu.: 79200  
##  Max.   :979189   Max.   :785200   Max.   :99198   Max.   :441100  
##  value_change_per income_change    income_change_per    firm2020    
##  Min.   :50.19    Min.   :-11758   Min.   :-30.25    Min.   :    9  
##  1st Qu.:55.40    1st Qu.: 12351   1st Qu.: 28.94    1st Qu.:  117  
##  Median :62.65    Median : 15970   Median : 37.46    Median :  244  
##  Mean   :65.55    Mean   : 16714   Mean   : 38.23    Mean   : 1793  
##  3rd Qu.:73.22    3rd Qu.: 20753   3rd Qu.: 46.12    3rd Qu.:  760  
##  Max.   :99.85    Max.   : 54883   Max.   :149.89    Max.   :73889  
##     emp2020           firm2010        emp2010         white_change    
##  Min.   :     41   Min.   :    6   Min.   :     12   Min.   :-30.100  
##  1st Qu.:   1090   1st Qu.:  119   1st Qu.:   1073   1st Qu.: -5.600  
##  Median :   2882   Median :  254   Median :   2604   Median : -3.800  
##  Mean   :  41964   Mean   : 1556   Mean   :  32349   Mean   : -4.242  
##  3rd Qu.:  11001   3rd Qu.:  705   3rd Qu.:   9735   3rd Qu.: -2.500  
##  Max.   :2221796   Max.   :64841   Max.   :1811318   Max.   : 10.800  
##  unemployment_change totalhouse_change
##  Min.   :-9.700      Min.   : -1342   
##  1st Qu.:-3.000      1st Qu.:  -111   
##  Median :-1.400      Median :    42   
##  Mean   :-1.426      Mean   :  5507   
##  3rd Qu.: 0.100      3rd Qu.:  1323   
##  Max.   : 7.700      Max.   :265266
write.csv(summary(increases_100), file = "summary_output2.csv")

#Increase 50

summary(increases_50)
##     GEOID              County             State             value2021      
##  Length:2542        Length:2542        Length:2542        Min.   :  43700  
##  Class :character   Class :character   Class :character   1st Qu.: 104300  
##  Mode  :character   Mode  :character   Mode  :character   Median : 137350  
##                                                           Mean   : 166243  
##                                                           3rd Qu.: 190775  
##                                                           Max.   :1118300  
##                                                                            
##    income2021       white2021      totalhouse2021    unemployment2021
##  Min.   : 12856   Min.   :  0.00   Min.   :    256   Min.   : 0.000  
##  1st Qu.: 47234   1st Qu.: 63.20   1st Qu.:   6544   1st Qu.: 3.800  
##  Median : 55000   Median : 82.70   Median :  13928   Median : 5.050  
##  Mean   : 56855   Mean   : 74.69   Mean   :  45925   Mean   : 5.517  
##  3rd Qu.: 63790   3rd Qu.: 91.60   3rd Qu.:  33580   3rd Qu.: 6.500  
##  Max.   :156821   Max.   :100.00   Max.   :3578801   Max.   :34.100  
##                                                                      
##   bachelor2021       value2010         income2010      value_change   
##  Min.   :     40   Min.   :  32800   Min.   : 10932   Min.   :     0  
##  1st Qu.:   1566   1st Qu.:  82725   1st Qu.: 36512   1st Qu.: 17425  
##  Median :   4040   Median : 109150   Median : 42155   Median : 26900  
##  Mean   :  24592   Mean   : 134304   Mean   : 43548   Mean   : 31939  
##  3rd Qu.:  12678   3rd Qu.: 153875   3rd Qu.: 48646   3rd Qu.: 39700  
##  Max.   :2356572   Max.   :1000001   Max.   :115574   Max.   :279200  
##  NA's   :42                                                           
##  value_change_per income_change    income_change_per    firm2020       
##  Min.   : 0.00    Min.   :-28945   Min.   :-38.77    Min.   :     5.0  
##  1st Qu.:15.62    1st Qu.:  9942   1st Qu.: 24.57    1st Qu.:   217.8  
##  Median :25.01    Median : 13122   Median : 30.59    Median :   513.5  
##  Mean   :25.28    Mean   : 13307   Mean   : 30.86    Mean   :  1992.6  
##  3rd Qu.:34.61    3rd Qu.: 16478   3rd Qu.: 37.27    3rd Qu.:  1356.5  
##  Max.   :50.00    Max.   : 42633   Max.   :109.41    Max.   :207759.0  
##                                                      NA's   :42        
##     emp2020           firm2010           emp2010         white_change    
##  Min.   :     15   Min.   :     8.0   Min.   :     15   Min.   :-18.200  
##  1st Qu.:   2747   1st Qu.:   234.8   1st Qu.:   2690   1st Qu.: -4.600  
##  Median :   7722   Median :   539.5   Median :   7380   Median : -2.900  
##  Mean   :  43009   Mean   :  1895.9   Mean   :  36758   Mean   : -3.305  
##  3rd Qu.:  23415   3rd Qu.:  1376.0   3rd Qu.:  21599   3rd Qu.: -1.800  
##  Max.   :4125775   Max.   :175974.0   Max.   :3553071   Max.   : 17.900  
##  NA's   :42        NA's   :42         NA's   :42                         
##  unemployment_change totalhouse_change
##  Min.   :-19.200     Min.   :-33121   
##  1st Qu.: -4.000     1st Qu.:  -223   
##  Median : -2.400     Median :    96   
##  Mean   : -2.505     Mean   :  2709   
##  3rd Qu.: -1.000     3rd Qu.:  1404   
##  Max.   : 20.300     Max.   :198083   
## 
write.csv(summary(increases_50), file = "summary_output3.csv")

#Decrease

summary(decreases)
##     GEOID              County             State             value2021     
##  Length:158         Length:158         Length:158         Min.   : 22600  
##  Class :character   Class :character   Class :character   1st Qu.: 86025  
##  Mode  :character   Mode  :character   Mode  :character   Median :121750  
##                                                           Mean   :165703  
##                                                           3rd Qu.:231000  
##                                                           Max.   :479500  
##                                                                           
##    income2021       white2021     totalhouse2021   unemployment2021
##  Min.   : 15375   Min.   : 0.00   Min.   :   136   Min.   : 1.300  
##  1st Qu.: 32333   1st Qu.:10.80   1st Qu.:  6203   1st Qu.: 5.300  
##  Median : 44034   Median :57.75   Median : 13962   Median : 7.400  
##  Mean   : 51497   Mean   :48.34   Mean   : 38874   Mean   : 8.791  
##  3rd Qu.: 67130   3rd Qu.:76.83   3rd Qu.: 41227   3rd Qu.:10.100  
##  Max.   :123727   Max.   :96.70   Max.   :377249   Max.   :25.300  
##                                                                    
##   bachelor2021        value2010        income2010      value_change    
##  Min.   :    42.0   Min.   : 49500   Min.   : 11943   Min.   :-302400  
##  1st Qu.:   822.8   1st Qu.: 92075   1st Qu.: 28003   1st Qu.: -13900  
##  Median :  2880.0   Median :133200   Median : 37353   Median :  -7000  
##  Mean   : 24091.2   Mean   :179092   Mean   : 42104   Mean   : -13389  
##  3rd Qu.: 22003.5   3rd Qu.:244800   3rd Qu.: 54900   3rd Qu.:  -2200  
##  Max.   :322329.0   Max.   :493000   Max.   :100980   Max.   :   -200  
##  NA's   :36                                                            
##  value_change_per    income_change    income_change_per    firm2020    
##  Min.   :-93.04615   Min.   :-19438   Min.   :-31.57    Min.   :    9  
##  1st Qu.: -8.50373   1st Qu.:  3938   1st Qu.: 14.05    1st Qu.:  130  
##  Median : -4.67446   Median :  8569   Median : 21.60    Median :  391  
##  Mean   : -7.20364   Mean   :  9392   Mean   : 22.73    Mean   : 1847  
##  3rd Qu.: -2.09131   3rd Qu.: 14570   3rd Qu.: 28.55    3rd Qu.: 1763  
##  Max.   : -0.04213   Max.   : 32522   Max.   : 78.79    Max.   :19784  
##                                                         NA's   :37     
##     emp2020          firm2010        emp2010        white_change    
##  Min.   :    38   Min.   :    0   Min.   :     0   Min.   :-16.600  
##  1st Qu.:  1545   1st Qu.:  140   1st Qu.:  1668   1st Qu.: -5.375  
##  Median :  5643   Median :  412   Median :  5318   Median : -3.000  
##  Mean   : 36335   Mean   : 1868   Mean   : 32808   Mean   : -3.446  
##  3rd Qu.: 28151   3rd Qu.: 1827   3rd Qu.: 27970   3rd Qu.: -0.500  
##  Max.   :433388   Max.   :20464   Max.   :392604   Max.   :  9.300  
##  NA's   :37       NA's   :36      NA's   :36                        
##  unemployment_change totalhouse_change
##  Min.   :-25.900     Min.   :-3276.0  
##  1st Qu.: -4.200     1st Qu.: -117.0  
##  Median : -1.600     Median :  268.5  
##  Mean   : -2.166     Mean   : 1329.9  
##  3rd Qu.: -0.300     3rd Qu.: 1258.2  
##  Max.   : 15.500     Max.   :19117.0  
## 
write.csv(summary(decreases), file = "summary_output4.csv")

#Filter housing over 1 mil

mil <- job_joined2 %>% filter(value2021 >= 1000000)
summary(mil)
##     GEOID              County             State             value2021      
##  Length:6           Length:6           Length:6           Min.   :1008500  
##  Class :character   Class :character   Class :character   1st Qu.:1091750  
##  Mode  :character   Mode  :character   Mode  :character   Median :1126350  
##                                                           Mean   :1127417  
##                                                           3rd Qu.:1179475  
##                                                           Max.   :1225900  
##    income2021       white2021     totalhouse2021   unemployment2021
##  Min.   : 93956   Min.   :29.90   Min.   : 12138   Min.   :4.500   
##  1st Qu.:118975   1st Qu.:38.15   1st Qu.:154320   1st Qu.:4.750   
##  Median :128598   Median :42.95   Median :343645   Median :5.250   
##  Mean   :124136   Mean   :49.17   Mean   :400254   Mean   :5.700   
##  3rd Qu.:135380   3rd Qu.:63.95   3rd Qu.:613812   3rd Qu.:6.275   
##  Max.   :140258   Max.   :71.70   Max.   :907018   Max.   :8.000   
##   bachelor2021      value2010         income2010     value_change   
##  Min.   :  5179   Min.   : 701000   Min.   :64971   Min.   : 82899  
##  1st Qu.:159280   1st Qu.: 784900   1st Qu.:74315   1st Qu.:200050  
##  Median :349726   Median : 805200   Median :84498   Median :329800  
##  Mean   :392595   Mean   : 827367   Mean   :80231   Mean   :300050  
##  3rd Qu.:652976   3rd Qu.: 857300   3rd Qu.:86550   3rd Qu.:427375  
##  Max.   :801081   Max.   :1000001   Max.   :89268   Max.   :441100  
##  value_change_per income_change   income_change_per    firm2020    
##  Min.   : 8.29    Min.   :28985   Min.   :39.86     Min.   :  811  
##  1st Qu.:23.87    1st Qu.:35353   1st Qu.:45.15     1st Qu.:10226  
##  Median :40.48    Median :46464   Median :53.26     Median :21872  
##  Mean   :38.25    Mean   :43905   Mean   :54.91     Mean   :27581  
##  3rd Qu.:55.19    3rd Qu.:52853   3rd Qu.:61.06     3rd Qu.:34357  
##  Max.   :61.83    Max.   :54883   Max.   :76.97     Max.   :76069  
##     emp2020           firm2010        emp2010         white_change    
##  Min.   :   4845   Min.   :  625   Min.   :   3534   Min.   :-11.200  
##  1st Qu.: 184283   1st Qu.: 9591   1st Qu.: 149588   1st Qu.: -6.550  
##  Median : 601265   Median :19291   Median : 400478   Median : -5.300  
##  Mean   : 809022   Mean   :26423   Mean   : 619552   Mean   : -5.417  
##  3rd Qu.:1008899   3rd Qu.:30430   3rd Qu.: 751529   3rd Qu.: -3.300  
##  Max.   :2456689   Max.   :78829   Max.   :1978316   Max.   : -1.100  
##  unemployment_change totalhouse_change
##  Min.   :-3.3000     Min.   :  812    
##  1st Qu.:-1.8750     1st Qu.: 3913    
##  Median :-1.5500     Median :22346    
##  Mean   :-0.4833     Mean   :28622    
##  3rd Qu.:-0.6250     3rd Qu.:50929    
##  Max.   : 5.8000     Max.   :68005

#House 500 - 1mil

fivetomil <- job_joined2 %>% filter(value2021 >=500000 & value2021 < 1000000)

fivetomil %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
summary(fivetomil)
##     GEOID              County             State             value2021     
##  Length:49          Length:49          Length:49          Min.   :507400  
##  Class :character   Class :character   Class :character   1st Qu.:572600  
##  Mode  :character   Mode  :character   Mode  :character   Median :625200  
##                                                           Mean   :641202  
##                                                           3rd Qu.:685500  
##                                                           Max.   :945300  
##    income2021       white2021     totalhouse2021    unemployment2021
##  Min.   : 67228   Min.   :17.70   Min.   :   3158   Min.   :1.900   
##  1st Qu.: 84356   1st Qu.:42.30   1st Qu.:  30147   1st Qu.:3.600   
##  Median : 93547   Median :57.10   Median : 139302   Median :4.800   
##  Mean   : 98294   Mean   :57.08   Mean   : 328285   Mean   :4.731   
##  3rd Qu.:111790   3rd Qu.:72.40   3rd Qu.: 386583   3rd Qu.:5.700   
##  Max.   :156821   Max.   :92.10   Max.   :3578801   Max.   :7.200   
##   bachelor2021       value2010        income2010      value_change   
##  Min.   :   1843   Min.   :338700   Min.   : 43567   Min.   :  3000  
##  1st Qu.:  15704   1st Qu.:461700   1st Qu.: 61854   1st Qu.: 77000  
##  Median : 117251   Median :503900   Median : 68750   Median :122200  
##  Mean   : 258683   Mean   :511741   Mean   : 71878   Mean   :129461  
##  3rd Qu.: 351129   3rd Qu.:568700   3rd Qu.: 78958   3rd Qu.:177800  
##  Max.   :2356572   Max.   :723700   Max.   :115574   Max.   :279200  
##  value_change_per  income_change   income_change_per    firm2020     
##  Min.   : 0.5387   Min.   : 8835   Min.   :15.13     Min.   :   249  
##  1st Qu.:14.9697   1st Qu.:22507   1st Qu.:32.11     1st Qu.:  1722  
##  Median :25.8502   Median :25171   Median :37.25     Median :  7174  
##  Mean   :26.5528   Mean   :26416   Mean   :37.48     Mean   : 17998  
##  3rd Qu.:32.2022   3rd Qu.:30840   3rd Qu.:43.84     3rd Qu.: 18601  
##  Max.   :62.9493   Max.   :42633   Max.   :61.45     Max.   :207759  
##     emp2020           firm2010         emp2010         white_change    
##  Min.   :   1372   Min.   :   227   Min.   :   1045   Min.   :-10.200  
##  1st Qu.:  27523   1st Qu.:  1560   1st Qu.:  23403   1st Qu.: -7.100  
##  Median : 123815   Median :  6340   Median :  99143   Median : -5.400  
##  Mean   : 372244   Mean   : 15904   Mean   : 307710   Mean   : -5.243  
##  3rd Qu.: 469802   3rd Qu.: 16598   3rd Qu.: 403049   3rd Qu.: -3.600  
##  Max.   :4125775   Max.   :175974   Max.   :3553071   Max.   :  3.300  
##  unemployment_change totalhouse_change
##  Min.   :-5.000      Min.   :   596   
##  1st Qu.:-2.300      1st Qu.:  1107   
##  Median :-1.500      Median :  7104   
##  Mean   :-1.647      Mean   : 21996   
##  3rd Qu.:-1.000      3rd Qu.: 30181   
##  Max.   : 1.300      Max.   :153065

#100000 - 500000

onetofive <- job_joined2 %>% filter(value2021 >=100000 & value2021 <500000)

onetofive %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
summary(onetofive)
##     GEOID              County             State             value2021     
##  Length:2498        Length:2498        Length:2498        Min.   :100000  
##  Class :character   Class :character   Class :character   1st Qu.:123900  
##  Mode  :character   Mode  :character   Mode  :character   Median :156650  
##                                                           Mean   :179036  
##                                                           3rd Qu.:211375  
##                                                           Max.   :497500  
##                                                                           
##    income2021       white2021      totalhouse2021    unemployment2021
##  Min.   : 14666   Min.   :  0.00   Min.   :    245   Min.   : 0.00   
##  1st Qu.: 51387   1st Qu.: 66.72   1st Qu.:   7146   1st Qu.: 3.60   
##  Median : 58239   Median : 83.10   Median :  16026   Median : 4.80   
##  Mean   : 60015   Mean   : 76.13   Mean   :  46980   Mean   : 5.11   
##  3rd Qu.: 66650   3rd Qu.: 91.30   3rd Qu.:  38836   3rd Qu.: 6.10   
##  Max.   :129549   Max.   :100.00   Max.   :2253936   Max.   :25.60   
##                                                                      
##   bachelor2021       value2010        income2010      value_change   
##  Min.   :     40   Min.   : 45200   Min.   : 12927   Min.   :-91900  
##  1st Qu.:   1858   1st Qu.: 93000   1st Qu.: 38832   1st Qu.: 22700  
##  Median :   4879   Median :118900   Median : 43955   Median : 34700  
##  Mean   :  24256   Mean   :139358   Mean   : 45617   Mean   : 39626  
##  3rd Qu.:  15646   3rd Qu.:163225   3rd Qu.: 50557   3rd Qu.: 51000  
##  Max.   :1489931   Max.   :493000   Max.   :103643   Max.   :222800  
##  NA's   :39        NA's   :2        NA's   :2        NA's   :2       
##  value_change_per income_change    income_change_per    firm2020    
##  Min.   :-36.87   Min.   :-28945   Min.   :-38.77    Min.   :    5  
##  1st Qu.: 17.44   1st Qu.: 10894   1st Qu.: 25.30    1st Qu.:  260  
##  Median : 29.33   Median : 14184   Median : 31.25    Median :  599  
##  Mean   : 33.22   Mean   : 14383   Mean   : 31.99    Mean   : 1973  
##  3rd Qu.: 44.17   3rd Qu.: 17492   3rd Qu.: 38.24    3rd Qu.: 1559  
##  Max.   :257.27   Max.   : 49962   Max.   :149.89    Max.   :96009  
##  NA's   :2        NA's   :2        NA's   :2         NA's   :41     
##     emp2020           firm2010        emp2010         white_change    
##  Min.   :     15   Min.   :    6   Min.   :     12   Min.   :-30.100  
##  1st Qu.:   3252   1st Qu.:  267   1st Qu.:   2987   1st Qu.: -4.900  
##  Median :   9101   Median :  619   Median :   8728   Median : -3.200  
##  Mean   :  43134   Mean   : 1869   Mean   :  36517   Mean   : -3.576  
##  3rd Qu.:  28179   3rd Qu.: 1544   3rd Qu.:  25305   3rd Qu.: -2.000  
##  Max.   :2526838   Max.   :90750   Max.   :2173540   Max.   : 17.500  
##  NA's   :41        NA's   :41      NA's   :41        NA's   :2        
##  unemployment_change totalhouse_change
##  Min.   :-19.200     Min.   :-33121   
##  1st Qu.: -3.700     1st Qu.:   -98   
##  Median : -2.200     Median :   287   
##  Mean   : -2.296     Mean   :  3433   
##  3rd Qu.: -0.900     3rd Qu.:  2036   
##  Max.   : 15.500     Max.   :265266   
##  NA's   :2           NA's   :2

#less than 100000

lessthanone <- job_joined2 %>% filter(value2021 < 100000)

lessthanone %>% group_by(State) %>% summarise(num_count = n()) %>% arrange(desc(num_count))
summary(lessthanone)
##     GEOID              County             State             value2021    
##  Length:665         Length:665         Length:665         Min.   :22600  
##  Class :character   Class :character   Class :character   1st Qu.:77400  
##  Mode  :character   Mode  :character   Mode  :character   Median :85500  
##                                                           Mean   :83715  
##                                                           3rd Qu.:93200  
##                                                           Max.   :99900  
##                                                                          
##    income2021      white2021      totalhouse2021   unemployment2021
##  Min.   :12856   Min.   :  0.00   Min.   :   136   Min.   : 0.000  
##  1st Qu.:37720   1st Qu.: 44.00   1st Qu.:  2815   1st Qu.: 3.900  
##  Median :43942   Median : 70.00   Median :  5231   Median : 5.900  
##  Mean   :43599   Mean   : 64.65   Mean   :  8055   Mean   : 6.892  
##  3rd Qu.:51351   3rd Qu.: 92.00   3rd Qu.:  9544   3rd Qu.: 8.500  
##  Max.   :81354   Max.   :100.00   Max.   :290393   Max.   :34.100  
##                                                                    
##   bachelor2021     value2010        income2010     value_change    
##  Min.   :   42   Min.   : 29700   Min.   :10932   Min.   :-302400  
##  1st Qu.:  567   1st Qu.: 60500   1st Qu.:29870   1st Qu.:   8400  
##  Median : 1050   Median : 69500   Median :34055   Median :  14800  
##  Mean   : 1808   Mean   : 69636   Mean   :33609   Mean   :  14173  
##  3rd Qu.: 1874   3rd Qu.: 77500   3rd Qu.:39183   3rd Qu.:  21450  
##  Max.   :95429   Max.   :325000   Max.   :61563   Max.   :  66500  
##  NA's   :39      NA's   :2        NA's   :2       NA's   :2        
##  value_change_per income_change    income_change_per    firm2020     
##  Min.   :-93.05   Min.   :-19438   Min.   :-31.57    Min.   :   9.0  
##  1st Qu.: 11.81   1st Qu.:  6085   1st Qu.: 19.66    1st Qu.:  87.0  
##  Median : 21.54   Median : 10051   Median : 29.50    Median : 158.0  
##  Mean   : 23.63   Mean   : 10015   Mean   : 30.53    Mean   : 248.6  
##  3rd Qu.: 32.75   3rd Qu.: 13346   3rd Qu.: 39.40    3rd Qu.: 295.0  
##  Max.   :211.78   Max.   : 37163   Max.   :141.61    Max.   :9058.0  
##  NA's   :2        NA's   :2        NA's   :2         NA's   :40      
##     emp2020          firm2010         emp2010          white_change    
##  Min.   :    24   Min.   :   0.0   Min.   :     0.0   Min.   :-18.200  
##  1st Qu.:   893   1st Qu.:  95.0   1st Qu.:   881.8   1st Qu.: -4.000  
##  Median :  1868   Median : 174.0   Median :  2018.0   Median : -2.400  
##  Mean   :  4026   Mean   : 272.2   Mean   :  4015.5   Mean   : -2.925  
##  3rd Qu.:  4476   3rd Qu.: 321.8   3rd Qu.:  4546.0   3rd Qu.: -1.200  
##  Max.   :216880   Max.   :8235.0   Max.   :171284.0   Max.   : 17.900  
##  NA's   :40       NA's   :39       NA's   :39         NA's   :2        
##  unemployment_change totalhouse_change
##  Min.   :-25.900     Min.   :-3411.0  
##  1st Qu.: -4.700     1st Qu.: -449.5  
##  Median : -2.000     Median : -241.0  
##  Mean   : -2.447     Mean   : -192.1  
##  3rd Qu.:  0.100     3rd Qu.:  -82.5  
##  Max.   : 20.300     Max.   :47183.0  
##  NA's   :2           NA's   :2

#region value

region_value <- get_acs(
    geography = "region",
    variables = "B25077_001",
    year = 2021,
    survey = "acs5"
  )
## Getting data from the 2017-2021 5-year ACS
region_value1 <- get_acs(
    geography = "region",
    variables = "B25077_001",
    year = 2010,
    survey = "acs5"
  )
## Getting data from the 2006-2010 5-year ACS

#region change

region_change <- left_join(region_value, region_value1, by="GEOID") %>% select("GEOID", "NAME.x", "estimate.x", "estimate.y") %>% mutate(percent = (estimate.x - estimate.y)/estimate.y*100)