# Group by date and average points
nba_tsibble <- nba_data %>%
group_by(Date) %>%
summarise(avg_points = mean(PTS, na.rm = TRUE)) %>%
ungroup() %>%
as_tsibble(index = Date)
# Plot average points over time
nba_tsibble %>%
ggplot(aes(x = Date, y = avg_points)) +
geom_line(color = "darkblue") +
labs(title = "Average Points Over Time", y = "Average Points", x = "Date")
# Fit linear model
trend_model <- nba_tsibble %>%
model(lm_trend = TSLM(avg_points ~ trend()))
# Plot with fitted trend
trend_model %>%
forecast(h = 0) %>%
autoplot(nba_tsibble) +
labs(title = "Linear Trend in Average Points")
# Apply smoothing using moving averages
nba_tsibble %>%
mutate(smoothed = slider::slide_dbl(avg_points, mean, .before = 15, .after = 15, .complete = TRUE)) %>%
ggplot(aes(x = Date)) +
geom_line(aes(y = avg_points), alpha = 0.4) +
geom_line(aes(y = smoothed), color = "red", size = 1.2) +
labs(title = "Smoothed Average Points", y = "Points")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 30 rows containing missing values or values outside the scale range
## (`geom_line()`).
library(dplyr)
library(tsibble)
library(lubridate)
# Convert date column to proper Date format
nba_data$Date <- as.Date(nba_data$Date, format = "%m/%d/%Y")
# Step 1: Create a sequence of all dates in the range
full_dates <- tibble(Date = seq(min(nba_data$Date), max(nba_data$Date), by = "day"))
# Step 2: Group by date to calculate average points
nba_avg <- nba_data %>%
group_by(Date) %>%
summarise(avg_points = mean(PTS, na.rm = TRUE)) %>%
ungroup()
# Step 3: Left join full date sequence to insert NAs for missing days
nba_filled <- full_dates %>%
left_join(nba_avg, by = "Date")
# Step 4: Convert to tsibble (now no implicit gaps!)
nba_tsibble <- nba_filled %>%
as_tsibble(index = Date)
library(fable)
library(feasts)
# Plot time series
autoplot(nba_tsibble, avg_points) +
ggtitle("Average Points Over Time")
# Model the trend
model_trend <- nba_tsibble %>%
model(lm_trend = TSLM(avg_points ~ trend()))
report(model_trend)
## Series: avg_points
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -21.985 -6.985 -1.549 5.286 54.993
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.345e+01 6.157e-01 38.094 < 2e-16 ***
## trend() 3.311e-04 6.987e-05 4.739 2.37e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.934 on 1373 degrees of freedom
## Multiple R-squared: 0.01609, Adjusted R-squared: 0.01538
## F-statistic: 22.46 on 1 and 1373 DF, p-value: 2.3706e-06
# Forecast
model_trend %>%
forecast() %>%
autoplot(nba_tsibble)
# Seasonality checks
nba_tsibble %>% ACF(avg_points) %>% autoplot()
nba_tsibble %>% PACF(avg_points) %>% autoplot()
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE
parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.