San Fransico Zillow Weekly Sales Forecasting

Read the File

Code
sf_week_sales<- read_excel("SF_Zillow_Weekly_Sales.xlsx")

 sales_tbl_ts <- sf_week_sales %>% select(date, Median_Sales_Price) %>%
  mutate(value = Median_Sales_Price) %>%
  mutate(date = yearweek(date)) %>%
  as_tsibble(index = date)

Visualize the data

Code
sales_tbl_ts %>% ggplot() +
  geom_line(aes(date, value)) +
  theme_bw() +
  xlab("Year Week") +
  ylab("Sales Price")

Summary Stats

Code
summary(sales_tbl_ts$value)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 846500  927840  977750  971991 1004750 1099750 
Code
sd(sales_tbl_ts$value)
[1] 58875.3

EDA Plots

Code
hist <- sales_tbl_ts %>%
  ggplot() +
  geom_histogram(aes(value),colour = 4, fill = "white", bins = 20) +
  theme_bw()

dens <- sales_tbl_ts %>%
  ggplot() +
  geom_density(aes(value),colour = 4, fill = "white") +
  theme_bw()

violin <- sales_tbl_ts %>%
  ggplot() +
  geom_violin(aes("", value),colour = 4, fill = "white") +
  theme_bw()

boxplot <- sales_tbl_ts %>%
  ggplot() +
  geom_boxplot(aes("", value),colour = 4, fill = "white") +
  theme_bw()

ggarrange(hist, dens, violin,boxplot , 
          labels = c("A", "B", "C","D"),
          ncol = 2, nrow = 2)

LR

Code
sales_tbl_ts %>%
  as.data.frame() %>%
  ggplot(aes(x=date, y=value)) +
    ylab("Weekly PriceMedian") +
    xlab("Weekly Date") +
    geom_line() +
    geom_smooth(method="lm", se=FALSE)
`geom_smooth()` using formula = 'y ~ x'

Moving Avg.

Code
sales_ma_data <- sales_tbl_ts %>%
  arrange(date) %>%
  mutate(
    ma_13_left = rollapply(
      value,
      13,
      FUN = mean,
      align = "left", fill = NA
    ),
    ma_13_right = rollapply(
      value,
      13,
      FUN = mean,
      align = "right", fill = NA
    ),
    ma_13_center = rollapply(
      value,
      13,
      FUN = mean,
      align = "center", fill = NA
    )
  ) %>%
  mutate(
    value_ma_3 = rollapply(value, 3, FUN = mean, align = "center", fill = NA),
    value_ma_5 = rollapply(value, 5, FUN = mean, align = "center", fill = NA),
    value_ma_7 = rollapply(value, 7, FUN = mean, align = "center", fill = NA),
    value_ma_13 = rollapply(value, 13, FUN = mean, align = "center", fill = NA),
    value_ma_25 = rollapply(value, 25, FUN = mean, align = "center", fill = NA),
    value_ma_49 = rollapply(value, 49, FUN = mean, align = "center", fill = NA)
  )


sales_tbl_pivot <- sales_ma_data %>%
  pivot_longer(
    cols = ma_13_left:value_ma_49,
    values_to = "value_ma",
    names_to = "ma_order"
  ) %>%
  mutate(ma_order = factor(
    ma_order,
    levels = c(
      "ma_13_center",
      "ma_13_left",
      "ma_13_right",
      "value_ma_3",
      "value_ma_5",
      "value_ma_7",
      "value_ma_13",
      "value_ma_25",
      "value_ma_49"
    ),
    labels = c(
      "ma_13_center",
      "ma_13_left",
      "ma_13_right",
      "value_ma_3",
      "value_ma_5",
      "value_ma_7",
      "value_ma_13",
      "value_ma_25",
      "value_ma_49"
    )
  ))

x <- sales_tbl_pivot %>%
  filter(
    !ma_order %in% c(
      "ma_13_center",
      "ma_13_left",
      "ma_13_right",
      "value_ma_7",
      "value_ma_49"
    )
  ) %>%
  mutate(ma_order = case_when(
    ma_order=='value_ma_3'~'3rd Order',
    ma_order=='value_ma_5'~'5th Order',
    ma_order=='value_ma_13'~'13th Order',
    ma_order=='value_ma_25'~'25th Order')
  ) %>%
  mutate(
    ma_order = factor(
      ma_order,
      labels = c('3rd Order',
      '5th Order',
      '13th Order',
      '25th Order'),
      levels = c('3rd Order',
      '5th Order',
      '13th Order',
      '25th Order')
    )
  ) %>%
  ggplot() +
  geom_line(aes(date, value), size = 1) +
  geom_line(aes(date, value_ma, color = ma_order), size = 1) +
    scale_color_discrete(name = 'MA Order')+
  theme_bw()+
  ylab('Median Sales Price')
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Code
x
Warning: Removed 42 rows containing missing values (`geom_line()`).

Classical Decomposition

Code
sales_tbl_ts %>%
  model(
    classical_decomposition(value)
  ) %>%
  components() %>%
  autoplot()
Warning: Removed 26 rows containing missing values (`geom_line()`).

STL Decompisition

Code
sales_tbl_ts %>%
  model(
    STL(value)
  ) %>%
  components() %>%
  autoplot()

Naive Forecast

Code
file_ts <- ts(sales_tbl_ts$value, start = c(2018, 1), frequency = 52)

naive_forecast <- naive(file_ts, h = 12)
autoplot(naive_forecast) + theme_bw() + ylab('Median Sales Price')