```{r setup, include=FALSE} knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE)

Load necessary libraries

# Loading libraries for analysis
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.4.2
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## 
## Attaching package: 'tsibble'
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(forecast)
## Warning: package 'forecast' was built under R version 4.4.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

Loading and Preparing the Data

# Loading the Ames dataset
ames <- read.csv('D:/Stats for DS/ames.csv', header = TRUE)

# Create a Sale.Date column using Yr.Sold and Mo.Sold
ames <- ames %>%
  mutate(Sale.Date = as.Date(paste(Yr.Sold, Mo.Sold, "01", sep = "-")))

# Select relevant columns for analysis
time_series_data <- ames %>%
  select(Sale.Date, SalePrice) %>%
  arrange(Sale.Date)

Aggregating Data by Time

# Aggregate data by month to calculate average SalePrice
monthly_data <- time_series_data %>%
  group_by(Sale.Date) %>%
  summarise(Average_SalePrice = mean(SalePrice, na.rm = TRUE))

Creating a tsibble Object

# Convert data to a tsibble for time series analysis
monthly_tsibble <- monthly_data %>%
  as_tsibble(index = Sale.Date)

# Inspect the tsibble
monthly_tsibble
## # A tsibble: 55 x 2 [1D]
##    Sale.Date  Average_SalePrice
##    <date>                 <dbl>
##  1 2006-01-01           202997.
##  2 2006-02-01           188865.
##  3 2006-03-01           182009.
##  4 2006-04-01           158864.
##  5 2006-05-01           169166.
##  6 2006-06-01           174234.
##  7 2006-07-01           178638.
##  8 2006-08-01           202379.
##  9 2006-09-01           219365.
## 10 2006-10-01           165379.
## # ℹ 45 more rows

Plotting the Time Series

# Plot the average sale price over time
ggplot(data = monthly_tsibble, aes(x = Sale.Date, y = Average_SalePrice)) +
  geom_line(color = "blue") +
  labs(title = "Average Sale Price Over Time", x = "Date", y = "Average Sale Price") +
  theme_minimal()

What stands out immediately?

Linear Regression for Trend Detection

# Linear regression to detect trends
lm_trend <- lm(Average_SalePrice ~ Sale.Date, data = monthly_tsibble)

# Summary of the regression model
summary(lm_trend)
## 
## Call:
## lm(formula = Average_SalePrice ~ Sale.Date, data = monthly_tsibble)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -29732  -8242    597   6995  32633 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 317156.227  54315.001   5.839 3.26e-07 ***
## Sale.Date       -9.739      3.886  -2.506   0.0153 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13930 on 53 degrees of freedom
## Multiple R-squared:  0.106,  Adjusted R-squared:  0.0891 
## F-statistic: 6.282 on 1 and 53 DF,  p-value: 0.0153
# Add trend line to the plot
ggplot(data = monthly_tsibble, aes(x = Sale.Date, y = Average_SalePrice)) +
  geom_line(color = "blue") +
  geom_smooth(method = "lm", color = "red", se = FALSE) +
  labs(title = "Trend Detection in Sale Prices", x = "Date", y = "Average Sale Price") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Do you need to subset the data for multiple trends?

How strong are the trends?

Detecting Seasonality

# Smoothing to detect seasonality
monthly_tsibble <- monthly_tsibble %>%
  mutate(Smoothed = stats::filter(Average_SalePrice, rep(1/12, 12), sides = 2))

# Plotting smoothed values
ggplot(data = monthly_tsibble, aes(x = Sale.Date)) +
  geom_line(aes(y = Average_SalePrice), color = "blue") +
  geom_line(aes(y = Smoothed), color = "red", linetype = "dashed") +
  labs(title = "Seasonality in Sale Prices", x = "Date", y = "Average Sale Price") +
  theme_minimal()
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_line()`).

ACF and PACF Analysis

# ACF and PACF plots to analyze seasonality
forecast::ggAcf(monthly_tsibble$Average_SalePrice, lag.max = 24) +
  ggtitle("ACF of Sale Prices")

forecast::ggPacf(monthly_tsibble$Average_SalePrice, lag.max = 24) +
  ggtitle("PACF of Sale Prices")

Can you illustrate the seasonality using ACF or PACF?

Insights