Loading Dataset

home_values <- read_excel("home_valules_sf_PA_NY.xlsx")

home_list_prices <- read_csv("House Price Listings.csv")
hnames <- read_csv("home_valules_sf_PA_NY.csv", n_max=0, col_types = paste(rep("c", 9,"n",260) ,collapse="")) %>% names()
head(home_values)
## # A tibble: 6 x 269
##   RegionID SizeRank RegionName RegionType StateName State City     Metro CountyName
##      <dbl>    <dbl>      <dbl> <chr>      <chr>     <chr> <chr>    <chr> <chr>     
## 1    61639        0      10025 Zip        NY        NY    New York New ~ New York ~
## 2    61637        2      10023 Zip        NY        NY    New York New ~ New York ~
## 3    61616        6      10002 Zip        NY        NY    New York New ~ New York ~
## 4    62037       10      11226 Zip        NY        NY    New York New ~ Kings Cou~
## 5    61807       11      10467 Zip        NY        NY    New York New ~ Bronx Cou~
## 6    61630       15      10016 Zip        NY        NY    New York New ~ New York ~
## # ... with 260 more variables: 36556 <dbl>, 36585 <dbl>, 36616 <dbl>,
## #   36646 <dbl>, 36677 <dbl>, 36707 <dbl>, 36738 <dbl>, 36769 <dbl>,
## #   36799 <dbl>, 36830 <dbl>, 36860 <dbl>, 36891 <dbl>, 36922 <dbl>,
## #   36950 <dbl>, 36981 <dbl>, 37011 <dbl>, 37042 <dbl>, 37072 <dbl>,
## #   37103 <dbl>, 37134 <dbl>, 37164 <dbl>, 37195 <dbl>, 37225 <dbl>,
## #   37256 <dbl>, 37287 <dbl>, 37315 <dbl>, 37346 <dbl>, 37376 <dbl>,
## #   37407 <dbl>, 37437 <dbl>, 37468 <dbl>, 37499 <dbl>, 37529 <dbl>, ...
head(home_list_prices)
## # A tibble: 6 x 51
##   RegionID SizeRank RegionName    RegionType StateName `11/30/2017` `12/31/2017`
##      <dbl>    <dbl> <chr>         <chr>      <chr>            <dbl>        <dbl>
## 1   102001        0 United States Country    <NA>            274900       269900
## 2   394913        1 New York, NY  Msa        NY              519900       515000
## 3   753899        2 Los Angeles-~ Msa        CA              797000       799000
## 4   394463        3 Chicago, IL   Msa        IL              305000       299000
## 5   394514        4 Dallas-Fort ~ Msa        TX              339999       340990
## 6   394974        5 Philadelphia~ Msa        PA              259000       249900
## # ... with 44 more variables: 1/31/2018 <dbl>, 2/28/2018 <dbl>,
## #   3/31/2018 <dbl>, 4/30/2018 <dbl>, 5/31/2018 <dbl>, 6/30/2018 <dbl>,
## #   7/31/2018 <dbl>, 8/31/2018 <dbl>, 9/30/2018 <dbl>, 10/31/2018 <dbl>,
## #   11/30/2018 <dbl>, 12/31/2018 <dbl>, 1/31/2019 <dbl>, 2/28/2019 <dbl>,
## #   3/31/2019 <dbl>, 4/30/2019 <dbl>, 5/31/2019 <dbl>, 6/30/2019 <dbl>,
## #   7/31/2019 <dbl>, 8/31/2019 <dbl>, 9/30/2019 <dbl>, 10/31/2019 <dbl>,
## #   11/30/2019 <dbl>, 12/31/2019 <dbl>, 1/31/2020 <dbl>, 2/29/2020 <dbl>, ...

Transforming House Value Dataset

home_values_pivot <- home_values %>% pivot_longer(cols=-c("RegionID":"CountyName"),names_to = "date", values_to="house_value")
home_values_pivot <- home_values_pivot %>% mutate(date_n = as.numeric(date),
        date_d = as.Date(date_n, origin=lubridate::origin),
        date_c = date_d - lubridate::years(70))
home_values_pivot <- home_values_pivot %>%
  select(-c(date_n,date_d)) %>%
  mutate(year = str_sub(date_c,1,4))
home_values_pivot <- home_values_pivot %>% janitor::clean_names()

Drilldown into Local Markets

metro_list <- c("Newark","Allentown","Philadelphia")

home_values_pivot_f <- home_values_pivot %>%
  filter(str_detect(metro, metro_list))

rm(home_values)
rm(home_values_pivot)
cities_interest <- c("Allentown", "Bethlehem", "Easton", "North Whitehall Township")

home_values_pivot_f <- home_values_pivot_f %>% filter(city %in% cities_interest) %>%
  mutate(zip = as.character(region_name)) %>%
  select(-region_name)

hometown_values <- home_values_pivot_f %>% group_by(city,year) %>% summarize(avg_value = mean(house_value)) %>% ungroup()

hometown_values <- hometown_values %>%
mutate(percent_change = round((avg_value/ lag(avg_value)-1) * 100,2))

Subsetting the Data for Further Analysis

eight_nine <- home_values_pivot_f %>% filter(between(date_c, as.Date("2018-01-01"), as.Date("2019-12-31")))

ten_eleven <- home_values_pivot_f %>% filter(between(date_c, as.Date("2020-01-01"), as.Date("2021-12-31")))

by_zip <- home_values_pivot_f %>% filter( zip %in% c(18017,18018, 18101))

Plot YOY Percentage Change in Housing Value within Lehigh Valley Area

ggplot(hometown_values %>% filter(!year=="2000"), aes(year,percent_change)) + geom_col(aes(fill=city)) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Plot - Housing Value Changes over Time

ggplot(eight_nine, aes(date_c, house_value)) + geom_line(aes(color=city)) + ylab("year")

ggplot(ten_eleven, aes(date_c, house_value)) + geom_line(aes(color=city)) + ylab("year")

ggplot(by_zip,aes(date_c, house_value)) + geom_point(aes(color=zip))

Housing List Prices in Allentown

Transforming Housing List Price Dataset

usa <- home_list_prices %>% filter(str_detect(RegionName, "United States"))

atown <- home_list_prices %>% filter(str_detect( RegionName, "Allentown"))

atown <- atown %>% select(-c(RegionID,RegionType,StateName)) %>%
  pivot_longer(-c(SizeRank,RegionName),names_to="year_month", values_to="housing_list_price")

atown$year_month <- as.Date(atown$year_month, "%m/%d/%Y")

atown <- atown %>% mutate(month = substr(year_month,6,7),
                          year = lubridate::year(year_month))

Calculating YOY Percentage Change in Housing List Price

atown <- atown %>% group_by(year) %>% mutate(
  percent_change = round((housing_list_price/ lag(housing_list_price)-1) * 100,2)) %>% ungroup()

atown_by_month <- atown %>% group_by(year) %>% summarize(avg_value = mean(housing_list_price)) %>%
  ungroup()

atown_by_month <- atown_by_month %>% mutate(percent_change_avg = round((avg_value/ lag(avg_value)-1) * 100,2))

Transforming Housing List Price Dataset - US

usa <- usa %>% select(-c(RegionID,RegionType,StateName)) %>%
  pivot_longer(-c(SizeRank,RegionName),names_to="year_month", values_to="housing_list_price")

usa$year_month <- as.Date(usa$year_month, "%m/%d/%Y")

usa <- usa %>% mutate(month = substr(year_month,6,7),
                          year = lubridate::year(year_month))

usa <- usa %>% group_by(year) %>% mutate(
  percent_change = round((housing_list_price/ lag(housing_list_price)-1) * 100,2)) %>% ungroup()

usa_by_month <- usa %>% group_by(year) %>% summarize(avg_value = mean(housing_list_price)) %>%
  ungroup()

usa_by_month <- usa_by_month %>% mutate(percent_change_avg = round((avg_value/ lag(avg_value)-1) * 100,2))

compare <- rbind(atown,usa)
compare_by_month <- rbind(atown_by_month, usa_by_month)

Plot - Average Housing List Price By Year

atown %>% ggplot(aes(x=year_month,y= housing_list_price)) + geom_line()

atown %>% filter(between(year_month, as.Date("2020-01-01"), as.Date("2021-12-31"))) %>%  ggplot(aes(x=year_month, y=percent_change)) + geom_point(aes(color=month), size=2)

atown_by_month %>% ggplot(aes(x=year,y=percent_change_avg)) + geom_col()

Plot - Average List Pricing Comparison to US Avg

compare %>% ggplot(aes(x=year_month,y= housing_list_price, color=RegionName)) + geom_line() + geom_smooth(method = "lm")

compare %>% filter(between(year_month, as.Date("2020-01-01"), as.Date("2021-12-31"))) %>% ggplot(aes(x=year_month,y= housing_list_price, color=RegionName)) + geom_line() + geom_smooth(method = "lm")

compare %>% filter(between(year_month, as.Date("2020-01-01"), as.Date("2021-12-31"))) %>%  ggplot(aes(x=year_month, y=percent_change)) + geom_point(aes(color=RegionName))

usa_by_month %>% ggplot(aes(x=year,y=percent_change_avg)) + geom_col()