Code
sf_week_sales<- read_excel("SF_Zillow_Weekly_Sales.xlsx")
sales_tbl_ts <- sf_week_sales %>% select(date, Median_Sales_Price) %>%
mutate(value = Median_Sales_Price) %>%
mutate(date = yearweek(date)) %>%
as_tsibble(index = date)Read the File
sf_week_sales<- read_excel("SF_Zillow_Weekly_Sales.xlsx")
sales_tbl_ts <- sf_week_sales %>% select(date, Median_Sales_Price) %>%
mutate(value = Median_Sales_Price) %>%
mutate(date = yearweek(date)) %>%
as_tsibble(index = date)Visualize the data
sales_tbl_ts %>% ggplot() +
geom_line(aes(date, value)) +
theme_bw() +
xlab("Year Week") +
ylab("Sales Price")Summary Stats
summary(sales_tbl_ts$value) Min. 1st Qu. Median Mean 3rd Qu. Max.
846500 927840 977750 971991 1004750 1099750
sd(sales_tbl_ts$value)[1] 58875.3
EDA Plots
hist <- sales_tbl_ts %>%
ggplot() +
geom_histogram(aes(value),colour = 4, fill = "white", bins = 20) +
theme_bw()
dens <- sales_tbl_ts %>%
ggplot() +
geom_density(aes(value),colour = 4, fill = "white") +
theme_bw()
violin <- sales_tbl_ts %>%
ggplot() +
geom_violin(aes("", value),colour = 4, fill = "white") +
theme_bw()
boxplot <- sales_tbl_ts %>%
ggplot() +
geom_boxplot(aes("", value),colour = 4, fill = "white") +
theme_bw()
ggarrange(hist, dens, violin,boxplot ,
labels = c("A", "B", "C","D"),
ncol = 2, nrow = 2)LR
sales_tbl_ts %>%
as.data.frame() %>%
ggplot(aes(x=date, y=value)) +
ylab("Weekly PriceMedian") +
xlab("Weekly Date") +
geom_line() +
geom_smooth(method="lm", se=FALSE)`geom_smooth()` using formula = 'y ~ x'
Moving Avg.
sales_ma_data <- sales_tbl_ts %>%
arrange(date) %>%
mutate(
ma_13_left = rollapply(
value,
13,
FUN = mean,
align = "left", fill = NA
),
ma_13_right = rollapply(
value,
13,
FUN = mean,
align = "right", fill = NA
),
ma_13_center = rollapply(
value,
13,
FUN = mean,
align = "center", fill = NA
)
) %>%
mutate(
value_ma_3 = rollapply(value, 3, FUN = mean, align = "center", fill = NA),
value_ma_5 = rollapply(value, 5, FUN = mean, align = "center", fill = NA),
value_ma_7 = rollapply(value, 7, FUN = mean, align = "center", fill = NA),
value_ma_13 = rollapply(value, 13, FUN = mean, align = "center", fill = NA),
value_ma_25 = rollapply(value, 25, FUN = mean, align = "center", fill = NA),
value_ma_49 = rollapply(value, 49, FUN = mean, align = "center", fill = NA)
)
sales_tbl_pivot <- sales_ma_data %>%
pivot_longer(
cols = ma_13_left:value_ma_49,
values_to = "value_ma",
names_to = "ma_order"
) %>%
mutate(ma_order = factor(
ma_order,
levels = c(
"ma_13_center",
"ma_13_left",
"ma_13_right",
"value_ma_3",
"value_ma_5",
"value_ma_7",
"value_ma_13",
"value_ma_25",
"value_ma_49"
),
labels = c(
"ma_13_center",
"ma_13_left",
"ma_13_right",
"value_ma_3",
"value_ma_5",
"value_ma_7",
"value_ma_13",
"value_ma_25",
"value_ma_49"
)
))
x <- sales_tbl_pivot %>%
filter(
!ma_order %in% c(
"ma_13_center",
"ma_13_left",
"ma_13_right",
"value_ma_7",
"value_ma_49"
)
) %>%
mutate(ma_order = case_when(
ma_order=='value_ma_3'~'3rd Order',
ma_order=='value_ma_5'~'5th Order',
ma_order=='value_ma_13'~'13th Order',
ma_order=='value_ma_25'~'25th Order')
) %>%
mutate(
ma_order = factor(
ma_order,
labels = c('3rd Order',
'5th Order',
'13th Order',
'25th Order'),
levels = c('3rd Order',
'5th Order',
'13th Order',
'25th Order')
)
) %>%
ggplot() +
geom_line(aes(date, value), size = 1) +
geom_line(aes(date, value_ma, color = ma_order), size = 1) +
scale_color_discrete(name = 'MA Order')+
theme_bw()+
ylab('Median Sales Price')Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
xWarning: Removed 42 rows containing missing values (`geom_line()`).
Classical Decomposition
sales_tbl_ts %>%
model(
classical_decomposition(value)
) %>%
components() %>%
autoplot()Warning: Removed 26 rows containing missing values (`geom_line()`).
STL Decompisition
sales_tbl_ts %>%
model(
STL(value)
) %>%
components() %>%
autoplot()Naive Forecast
file_ts <- ts(sales_tbl_ts$value, start = c(2018, 1), frequency = 52)
naive_forecast <- naive(file_ts, h = 12)
autoplot(naive_forecast) + theme_bw() + ylab('Median Sales Price')