All the databases I use are from zillow.com. I will use them to analyze the house information, including sales count, inventory, price and value, in the San Francisco Metropolitan Statistical Area (MSA). San Francisco MSA consists of 5 counties: San Francisco, Alameda, Marin, Contra Costa, and San Mateo County. I will deliver the analysis through two parts: 1. Sales Count and Inventory; 2. Price and Value.
options(Ncpus = 8)
library(pacman)
p_load(fs, readr, lubridate, tidyverse, janitor, DataExplorer, summarytools, data.table, dtplyr, ggplot2, ggpubr, zoo)
sale_county <- read_csv("Sale_Counts_County.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## StateName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
head(sale_county, n=2)
sale_county <- sale_county %>%
pivot_longer(`2008-03`:`2020-03`, names_to = 'month', values_to = 'sold') %>%
filter(!is.na(sold))
head(sale_county, n=2)
sale_county$month <- as.yearmon(sale_county$month)
head(sale_county, n = 2)
msa <- c('Alameda County', 'Contra Costa County', 'San Francisco County',
'Marin County', 'San Mateo County')
sale_msa1 <- sale_county %>%
filter(RegionName %in% msa) %>%
select(month, sold) %>%
group_by(month) %>%
summarize(tot_sold = sum(sold))
## `summarise()` ungrouping output (override with `.groups` argument)
a <- ggplot(sale_msa1, aes(x=month, y=tot_sold)) +
geom_line(col = 'blue4') +
theme(aspect.ratio=0.3)
sale_msa2 <- sale_county %>%
filter(RegionName %in% msa) %>%
select(month, sold) %>%
mutate(year = year(month)) %>%
group_by(year) %>%
summarize(tot_sold = sum(sold))
## `summarise()` ungrouping output (override with `.groups` argument)
b <- ggplot(sale_msa2, aes(x=year, y=tot_sold)) +
geom_line(col = 'red3') +
theme(aspect.ratio=0.3)
ggarrange(a,b, nrow = 2)
city <- c('Oakland', 'San Francisco', 'Hayward', 'Redwood City', 'Concord', 'Berkeley', 'Castro Valley', 'Fremont')
sale_count <- read_csv("Sale_Counts_City.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## StateName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
sale_count2 <- sale_count %>%
pivot_longer(`2008-03`:`2020-03`, names_to = 'month', values_to = 'sold') %>%
filter(!is.na(sold))
sale_count2$month <- as.yearmon(sale_count2$month)
sale_count_city1 <- sale_count2 %>%
filter(RegionName %in% city) %>%
select(month, sold, RegionName) %>%
group_by(month, RegionName) %>%
summarize(tot_sold = sum(sold))
## `summarise()` regrouping output by 'month' (override with `.groups` argument)
a <- sale_count_city1 %>%
ggplot(aes(x=month, y=tot_sold, col = RegionName)) +
geom_line() +
theme(aspect.ratio=0.4)
sale_count_city2 <- sale_count2 %>%
filter(RegionName %in% city) %>%
select(month, sold, RegionName) %>%
mutate(year = year(month)) %>%
group_by(year, RegionName) %>%
summarize(tot_sold = sum(sold))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
b <- sale_count_city2 %>%
ggplot(aes(x=year, y=tot_sold, col = RegionName)) +
geom_line() +
theme(aspect.ratio=0.4)
ggarrange(a,b, nrow = 2)
# download.file('http://files.zillowstatic.com/research/public_v2/invt_fs/Metro_invt_fs_uc_SFRCondo_raw_month.csv', 'inventory_count_city.csv')
inventory_count <- read_csv('inventory_count_city.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
inventory_count <- separate(inventory_count, RegionName, c('RegionName', 'State'), sep = ',')
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
inventory_count2 <- inventory_count %>%
filter(SizeRank > 0) %>%
pivot_longer(`2017-10-31`:`2020-05-31`, names_to = 'month', values_to = 'inventory') %>%
filter(!is.na(inventory))
inventory_count2$month <- as.yearmon(inventory_count2$month)
inventory_msa <- inventory_count2 %>%
filter(RegionName == 'San Francisco')
ggplot(inventory_msa,aes(x=month, y=inventory)) +
geom_line(col = 'dark blue')
sale_inventory_msa <- inventory_msa %>%
inner_join(sale_msa1, by = c( 'month')) %>%
select(month, inventory, tot_sold)
head(sale_inventory_msa, n = 3)
sale_inventory_msa <- sale_inventory_msa %>%
pivot_longer(inventory:tot_sold, names_to = 'type', values_to = 'count')
ggplot(sale_inventory_msa,aes(x=month, y=count, col = type)) +
geom_line()
price <- read_csv("Sale_Prices_City.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## StateName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
price <- price %>%
pivot_longer(`2008-03`:`2020-03`, names_to = 'month', values_to = 'price') %>%
filter(!is.na(price))
price$month <- as.yearmon(price$month)
msa <- c('Alameda','Albany','Berkeley','Dublin','Emeryville','Fremont','Hayward','Livermore','Newark','Oakland','Piedmont','Pleasanton','San Leandro','Union City','Antioch','Brentwood','Clayton','Concord','El Cerrito','Hercules','Lafayette','Martinez','Oakley','Orinda','Pinole','Pittsburg','Pleasant Hill','Richmond','San Pablo','San Ramon','Walnut Creek','Belmont','Brisbane','Burlingame','Daly City','East Palo Alto','Redwood City' ,'San Bruno','San Carlos','San Mateo','South San Francisco','Foster City','Half Moon Bay','Menlo Park','Millbrae','Pacifica','Belvedere','Corte Madera','Fairfax','Larkspur','Mill Valley','Novato','Ross','San Anselmo','San Rafael','Sausalito','Tiburon','San Francisco', 'Castro Valley')
price_msa <- price %>%
filter(RegionName %in% msa) %>%
mutate(year = year(month)) %>%
select(RegionName, month, year, price)
head(price_msa, n = 2)
price_msa1 <- price_msa %>%
select(RegionName, month, price) %>%
group_by(month) %>%
summarize(mean_price = mean(price))
## `summarise()` ungrouping output (override with `.groups` argument)
a <- ggplot(price_msa1,aes(x=month, y=mean_price)) +
geom_line(col = 'dark blue')+
theme(aspect.ratio=0.37)
price_msa2 <- price_msa %>%
mutate(year = year(month)) %>%
select(RegionName, RegionName, year, month, price) %>%
group_by(year) %>%
summarize(mean_price = mean(price))
## `summarise()` ungrouping output (override with `.groups` argument)
b <- ggplot(price_msa2,aes(x=year, y=mean_price)) +
geom_line(col = 'orange') +
theme(aspect.ratio=0.37)
ggarrange(a,b, nrow = 2)
city <- c('Oakland', 'San Francisco', 'Hayward', 'Redwood City', 'Concord', 'Berkeley', 'Castro Valley', 'Fremont')
price_city <- price_msa %>%
filter(RegionName %in% city)
price_city1 <- price_city %>%
select(month, price, RegionName) %>%
group_by(month, RegionName) %>%
summarize(mean_price = mean(price))
## `summarise()` regrouping output by 'month' (override with `.groups` argument)
a <- price_city1 %>%
ggplot(aes(x=month, y=mean_price, col = RegionName)) +
geom_line()+
theme(aspect.ratio=0.35)
price_city2 <- price_city %>%
select(year, price, RegionName) %>%
group_by(year, RegionName) %>%
summarize(mean_price = mean(price))
## `summarise()` regrouping output by 'year' (override with `.groups` argument)
b <- price_city2 %>%
ggplot(aes(x=year, y=mean_price, col = RegionName)) +
geom_line() +
theme(aspect.ratio=0.35)
ggarrange(a,b, nrow = 2)
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv', 'value_average.csv')
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_uc_sfr_tier_0.33_0.67_sm_sa_mon.csv', 'value_singlefamily.csv')
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_uc_condo_tier_0.33_0.67_sm_sa_mon.csv', 'value_condo.csv')
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_bdrmcnt_1_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv', 'value_one_room.csv')
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_bdrmcnt_2_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv', 'value_two_room.csv')
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_bdrmcnt_3_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv', 'value_three_room.csv')
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_bdrmcnt_4_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv', 'value_four_room.csv')
#download.file('http://files.zillowstatic.com/research/public_v2/zhvi/City_zhvi_bdrmcnt_5_uc_sfrcondo_tier_0.33_0.67_sm_sa_mon.csv', 'value_five_room.csv')
value_ave <- read_csv('value_average.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
value_sg <- read_csv('value_singlefamily.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
value_cd <- read_csv('value_condo.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
value_1 <- read_csv('value_one_room.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
value_2 <- read_csv('value_two_room.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
value_3 <- read_csv('value_three_room.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
value_4 <- read_csv('value_four_room.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
value_5 <- read_csv('value_five_room.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## RegionName = col_character(),
## RegionType = col_character(),
## StateName = col_character(),
## State = col_character(),
## Metro = col_character(),
## CountyName = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
head(value_ave, n =2)
value_1 <- value_1 %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'room1') %>%
filter(!is.na(room1))
value_2 <- value_2 %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'room2') %>%
filter(!is.na(room2))
value_3 <- value_3 %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'room3') %>%
filter(!is.na(room3))
value_4 <- value_4 %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'room4') %>%
filter(!is.na(room4))
value_5 <- value_5 %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'room5') %>%
filter(!is.na(room5))
value_ave <- value_ave %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'valuea') %>%
filter(!is.na(valuea))
value_sg <- value_sg %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'values') %>%
filter(!is.na(values))
value_cd <- value_cd %>%
pivot_longer(`1996-01-31`:`2020-05-31` , names_to = 'month', values_to = 'valuec') %>%
filter(!is.na(valuec))
head(value_1, n=2)
value_s <- value_sg %>%
filter(RegionName %in% msa)
value_c <- value_cd %>%
filter(RegionName %in% msa)
value_a <- value_ave %>%
filter(RegionName %in% msa)
value_1 <- value_1 %>%
filter(RegionName %in% city)
value_2 <- value_2 %>%
filter(RegionName %in% city)
value_3 <- value_3 %>%
filter(RegionName %in% city)
value_4 <- value_4 %>%
filter(RegionName %in% city)
value_5 <- value_5 %>%
filter(RegionName %in% city)
value_1$month <- as.yearmon(value_1$month)
value_2$month <- as.yearmon(value_2$month)
value_3$month <- as.yearmon(value_3$month)
value_4$month <- as.yearmon(value_4$month)
value_5$month <- as.yearmon(value_5$month)
value_a$month <- as.yearmon(value_a$month)
value_s$month <- as.yearmon(value_s$month)
value_c$month <- as.yearmon(value_c$month)
head(value_1, n =2)
value_s_msa <- value_s %>%
mutate(year=year(month))%>%
select(RegionName, month, year, values)
value_c_msa <- value_c %>%
mutate(year=year(month))%>%
select(RegionName, month, year, valuec)
value_a_msa <- value_a %>%
mutate(year=year(month))%>%
select(RegionName, month, year, valuea)
head(value_a_msa, n = 2)
value_price_msa <-value_a_msa %>%
inner_join(price_msa, by = c('RegionName', 'month')) %>%
inner_join(value_s_msa, by = c('RegionName', 'month')) %>%
inner_join(value_c_msa, by = c('RegionName', 'month')) %>%
select(year.x, valuea, price, values, valuec) %>%
group_by(year.x) %>%
summarize(median_price = mean(price), average = mean(valuea), single_family=mean(values), condo=mean(valuec))
## `summarise()` ungrouping output (override with `.groups` argument)
value_price_msa <- value_price_msa %>%
pivot_longer(median_price: condo, names_to = 'type', values_to = 'house_value')
ggplot(value_price_msa, aes(x = year.x, y = house_value, col = type)) +
geom_line()
value_1_city <- value_1 %>%
mutate(year=year(month))%>%
select(RegionName, month, year, room1)
value_2_city <- value_2 %>%
mutate(year=year(month))%>%
select(RegionName, month, year, room2)
value_3_city <- value_3 %>%
mutate(year=year(month))%>%
select(RegionName, month, year, room3)
value_4_city <- value_4 %>%
mutate(year=year(month))%>%
select(RegionName, month, year, room4)
value_5_city <- value_5 %>%
mutate(year=year(month))%>%
select(RegionName, month, year, room5)
head(value_1_city, n = 2)
value_price_city1 <-price_city %>%
inner_join(value_1_city, by = c('RegionName', 'month')) %>%
inner_join(value_2_city, by = c('RegionName', 'month')) %>%
inner_join(value_3_city, by = c('RegionName', 'month')) %>%
inner_join(value_4_city, by = c('RegionName', 'month')) %>%
inner_join(value_5_city, by = c('RegionName', 'month')) %>%
select(RegionName, year.x, room1, room2, room3, room4, room5, price) %>%
group_by(RegionName, year.x) %>%
summarize(mprice = mean(price), mroom1 = mean(room1), mroom2=mean(room2), mroom3=mean(room3), mroom4=mean(room4), mroom5=mean(room5))
## `summarise()` regrouping output by 'RegionName' (override with `.groups` argument)
head(value_price_city1, n = 2)
value_price_city2 <- value_price_city1 %>%
pivot_longer(mroom1:mroom5, names_to = 'room', values_to = 'value') %>%
mutate(room = as.factor(room))
head(value_price_city2, n = 2)
value_price_bc <- value_price_city2 %>%
filter(RegionName %in% c('Berkeley', 'Castro Valley'))
ggplot(value_price_bc, aes(x = year.x, y = value, col = room)) +
geom_line(size = 0.55, linetype = 'dashed') +
geom_line(aes(x=year.x, y = mprice), col = 'blue3') +
facet_grid(.~RegionName)
value_price_cf <- value_price_city2 %>%
filter(RegionName %in% c('Concord', 'Fremont'))
ggplot(value_price_cf, aes(x = year.x, y = value, col = room)) +
geom_line(size = 0.55, linetype = 'dashed') +
geom_line(aes(x=year.x, y = mprice), col = 'blue3') +
facet_grid(.~RegionName)
value_price_ho <- value_price_city2 %>%
filter(RegionName %in% c('Hayward', 'Oakland'))
ggplot(value_price_ho, aes(x = year.x, y = value, col = room)) +
geom_line(size = 0.55, linetype = 'dashed') +
geom_line(aes(x=year.x, y = mprice), col = 'blue3') +
facet_grid(.~RegionName)
value_price_ss <- value_price_city2 %>%
filter(RegionName %in% c('San Francisco', 'Redwood City'))
ggplot(value_price_ss, aes(x = year.x, y = value, col = room)) +
geom_line(size = 0.55, linetype = 'dashed') +
geom_line(aes(x=year.x, y = mprice), col = 'blue3') +
facet_grid(.~RegionName)
value_price_city3 <- value_1_city %>%
inner_join(value_2_city, by = c('RegionName', 'month')) %>%
inner_join(value_3_city, by = c('RegionName', 'month')) %>%
inner_join(value_4_city, by = c('RegionName', 'month')) %>%
inner_join(value_5_city, by = c('RegionName', 'month')) %>%
select(RegionName, year.x, room1, room2, room3, room4, room5) %>%
filter(RegionName %in% c('Castro Valley', 'San Francisco', 'Redwood City')) %>%
group_by(RegionName, year.x) %>%
summarize( mroom1 = mean(room1), mroom2=mean(room2), mroom3=mean(room3), mroom4=mean(room4), mroom5=mean(room5))
## `summarise()` regrouping output by 'RegionName' (override with `.groups` argument)
head(value_price_city3, n = 2)
value_price_city4 <- value_price_city3 %>%
pivot_longer(mroom1:mroom5, names_to = 'room', values_to = 'value') %>%
mutate(room = as.factor(room))
head(value_price_city4, n = 2)
ggplot(value_price_city4, aes(year.x, value, colour = room)) +
geom_point(size=0.3) +
geom_smooth(method = 'lm', se= FALSE, size = 0.5) +
facet_wrap(~ RegionName)
## `geom_smooth()` using formula 'y ~ x'
model1 <- lm(value ~ year.x + RegionName + room, data = value_price_city4)
summary(model1)
##
## Call:
## lm(formula = value ~ year.x + RegionName + room, data = value_price_city4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -364210 -143731 -23337 117256 868403
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -83252669 2962699 -28.100 < 2e-16 ***
## year.x 41542 1475 28.157 < 2e-16 ***
## RegionNameRedwood City 410503 26060 15.752 < 2e-16 ***
## RegionNameSan Francisco 451915 26060 17.341 < 2e-16 ***
## roommroom2 174444 33644 5.185 3.58e-07 ***
## roommroom3 346786 33644 10.308 < 2e-16 ***
## roommroom4 524780 33644 15.598 < 2e-16 ***
## roommroom5 674369 33644 20.044 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 206000 on 367 degrees of freedom
## Multiple R-squared: 0.8199, Adjusted R-squared: 0.8165
## F-statistic: 238.7 on 7 and 367 DF, p-value: < 2.2e-16
par(mfrow=c(1,2))
plot(model1,1:2)
library(MASS)
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
summary(powerTransform(model1))
## bcPower Transformation to Normality
## Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
## Y1 0.081 0.08 0.0106 0.1514
##
## Likelihood ratio test that transformation parameter is equal to 0
## (log transformation)
## LRT df pval
## LR test, lambda = (0) 5.098636 1 0.023945
##
## Likelihood ratio test that no transformation is needed
## LRT df pval
## LR test, lambda = (1) 423.2382 1 < 2.22e-16
model2 <- lm(value^0.1 ~ year.x + RegionName + room, data = value_price_city4)
summary(model2)
##
## Call:
## lm(formula = value^0.1 ~ year.x + RegionName + room, data = value_price_city4)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.170019 -0.044222 0.000257 0.048448 0.163964
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.848e+01 9.657e-01 -39.85 <2e-16 ***
## year.x 2.090e-02 4.809e-04 43.46 <2e-16 ***
## RegionNameRedwood City 2.116e-01 8.495e-03 24.91 <2e-16 ***
## RegionNameSan Francisco 2.419e-01 8.495e-03 28.48 <2e-16 ***
## roommroom2 1.186e-01 1.097e-02 10.81 <2e-16 ***
## roommroom3 2.131e-01 1.097e-02 19.43 <2e-16 ***
## roommroom4 2.910e-01 1.097e-02 26.53 <2e-16 ***
## roommroom5 3.436e-01 1.097e-02 31.33 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06715 on 367 degrees of freedom
## Multiple R-squared: 0.918, Adjusted R-squared: 0.9164
## F-statistic: 586.7 on 7 and 367 DF, p-value: < 2.2e-16
par(mfrow=c(1,2))
plot(model2,1:2)
new_x <- data.frame(RegionName = 'Castro Valley', year.x = 2030, room = c('mroom1', 'mroom2', 'mroom3', 'mroom4', 'mroom5'))
c <- predict(model2, newdata = new_x, interval = 'prediction')
c^10
## fit lwr upr
## 1 914750.4 645994.1 1280258
## 2 1229907.4 877567.4 1704816
## 3 1547744.3 1112996.9 2129770
## 4 1863335.6 1348223.3 2549267
## 5 2108248.4 1531599.1 2873432
new_x <- data.frame(RegionName = 'San Francisco', year.x = 2030, room = c('mroom1', 'mroom2', 'mroom3', 'mroom4', 'mroom5'))
s <- predict(model2, newdata = new_x, interval = 'prediction')
s^10
## fit lwr upr
## 1 1658582 1195460 2277351
## 2 2192714 1594990 2984983
## 3 2724337 1995501 3684572
## 4 3246840 2391333 4368577
## 5 3649316 2697474 4893448
new_x <- data.frame(RegionName = 'Redwood City', year.x = 2030, room = c('mroom1', 'mroom2', 'mroom3', 'mroom4', 'mroom5'))
r <- predict(model2, newdata = new_x, interval = 'prediction')
r^10
## fit lwr upr
## 1 1542468 1109076 2122737
## 2 2043299 1482905 2787572
## 3 2542561 1858278 3445819
## 4 3033855 2229747 4090141
## 5 3412627 2517317 4584975