Loading Dataset
home_values <- read_excel("home_valules_sf_PA_NY.xlsx")
home_list_prices <- read_csv("House Price Listings.csv")
hnames <- read_csv("home_valules_sf_PA_NY.csv", n_max=0, col_types = paste(rep("c", 9,"n",260) ,collapse="")) %>% names()
## # A tibble: 6 x 269
## RegionID SizeRank RegionName RegionType StateName State City Metro CountyName
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 61639 0 10025 Zip NY NY New York New ~ New York ~
## 2 61637 2 10023 Zip NY NY New York New ~ New York ~
## 3 61616 6 10002 Zip NY NY New York New ~ New York ~
## 4 62037 10 11226 Zip NY NY New York New ~ Kings Cou~
## 5 61807 11 10467 Zip NY NY New York New ~ Bronx Cou~
## 6 61630 15 10016 Zip NY NY New York New ~ New York ~
## # ... with 260 more variables: 36556 <dbl>, 36585 <dbl>, 36616 <dbl>,
## # 36646 <dbl>, 36677 <dbl>, 36707 <dbl>, 36738 <dbl>, 36769 <dbl>,
## # 36799 <dbl>, 36830 <dbl>, 36860 <dbl>, 36891 <dbl>, 36922 <dbl>,
## # 36950 <dbl>, 36981 <dbl>, 37011 <dbl>, 37042 <dbl>, 37072 <dbl>,
## # 37103 <dbl>, 37134 <dbl>, 37164 <dbl>, 37195 <dbl>, 37225 <dbl>,
## # 37256 <dbl>, 37287 <dbl>, 37315 <dbl>, 37346 <dbl>, 37376 <dbl>,
## # 37407 <dbl>, 37437 <dbl>, 37468 <dbl>, 37499 <dbl>, 37529 <dbl>, ...
## # A tibble: 6 x 51
## RegionID SizeRank RegionName RegionType StateName `11/30/2017` `12/31/2017`
## <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 102001 0 United States Country <NA> 274900 269900
## 2 394913 1 New York, NY Msa NY 519900 515000
## 3 753899 2 Los Angeles-~ Msa CA 797000 799000
## 4 394463 3 Chicago, IL Msa IL 305000 299000
## 5 394514 4 Dallas-Fort ~ Msa TX 339999 340990
## 6 394974 5 Philadelphia~ Msa PA 259000 249900
## # ... with 44 more variables: 1/31/2018 <dbl>, 2/28/2018 <dbl>,
## # 3/31/2018 <dbl>, 4/30/2018 <dbl>, 5/31/2018 <dbl>, 6/30/2018 <dbl>,
## # 7/31/2018 <dbl>, 8/31/2018 <dbl>, 9/30/2018 <dbl>, 10/31/2018 <dbl>,
## # 11/30/2018 <dbl>, 12/31/2018 <dbl>, 1/31/2019 <dbl>, 2/28/2019 <dbl>,
## # 3/31/2019 <dbl>, 4/30/2019 <dbl>, 5/31/2019 <dbl>, 6/30/2019 <dbl>,
## # 7/31/2019 <dbl>, 8/31/2019 <dbl>, 9/30/2019 <dbl>, 10/31/2019 <dbl>,
## # 11/30/2019 <dbl>, 12/31/2019 <dbl>, 1/31/2020 <dbl>, 2/29/2020 <dbl>, ...
Drilldown into Local Markets
metro_list <- c("Newark","Allentown","Philadelphia")
home_values_pivot_f <- home_values_pivot %>%
filter(str_detect(metro, metro_list))
rm(home_values)
rm(home_values_pivot)
cities_interest <- c("Allentown", "Bethlehem", "Easton", "North Whitehall Township")
home_values_pivot_f <- home_values_pivot_f %>% filter(city %in% cities_interest) %>%
mutate(zip = as.character(region_name)) %>%
select(-region_name)
hometown_values <- home_values_pivot_f %>% group_by(city,year) %>% summarize(avg_value = mean(house_value)) %>% ungroup()
hometown_values <- hometown_values %>%
mutate(percent_change = round((avg_value/ lag(avg_value)-1) * 100,2))
Subsetting the Data for Further Analysis
eight_nine <- home_values_pivot_f %>% filter(between(date_c, as.Date("2018-01-01"), as.Date("2019-12-31")))
ten_eleven <- home_values_pivot_f %>% filter(between(date_c, as.Date("2020-01-01"), as.Date("2021-12-31")))
by_zip <- home_values_pivot_f %>% filter( zip %in% c(18017,18018, 18101))
Plot YOY Percentage Change in Housing Value within Lehigh Valley Area
ggplot(hometown_values %>% filter(!year=="2000"), aes(year,percent_change)) + geom_col(aes(fill=city)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Plot - Housing Value Changes over Time
ggplot(eight_nine, aes(date_c, house_value)) + geom_line(aes(color=city)) + ylab("year")

ggplot(ten_eleven, aes(date_c, house_value)) + geom_line(aes(color=city)) + ylab("year")

ggplot(by_zip,aes(date_c, house_value)) + geom_point(aes(color=zip))

Plot - Average List Pricing Comparison to US Avg
compare %>% ggplot(aes(x=year_month,y= housing_list_price, color=RegionName)) + geom_line() + geom_smooth(method = "lm")

compare %>% filter(between(year_month, as.Date("2020-01-01"), as.Date("2021-12-31"))) %>% ggplot(aes(x=year_month,y= housing_list_price, color=RegionName)) + geom_line() + geom_smooth(method = "lm")

compare %>% filter(between(year_month, as.Date("2020-01-01"), as.Date("2021-12-31"))) %>% ggplot(aes(x=year_month, y=percent_change)) + geom_point(aes(color=RegionName))

usa_by_month %>% ggplot(aes(x=year,y=percent_change_avg)) + geom_col()
