Select a column of your data that encodes time (e.g., “date”, “timestamp”, “year”, etc.). Convert this into a Date in R

data$date_x <- as.Date(data$date_x, format = "%m/%d/%Y")

Choose a column of data to analyze over time. This should be a “response-like” variable that is of particular interest

response_variable <- "score"

Create a tsibble object of just the date and response variable. Then, plot your data over time. Consider different windows of time.

library(tsibble)
library(ggplot2)


response_variable <- "score"

data <- na.omit(data, cols = c("date_x", response_variable))

data$date_x <- as.Date(data$date_x, format = "%m/%d/%Y")

data$ID <- seq_len(nrow(data))

my_tsibble <- as_tsibble(data, key = "date_x", index = "ID")

ggplot(my_tsibble, aes(x = date_x, y = !!sym(response_variable))) +
  geom_line() +
  labs(title = paste("Time Series Plot of", response_variable),
       x = "Date",
       y = response_variable)

my_tsibble <- na.omit(my_tsibble, cols = c("date_x", "score"))
# Converting score to numeric
my_tsibble$score <- as.numeric(my_tsibble$score)
head(my_tsibble)
## # A tsibble: 6 x 13 [1]
## # Key:       date_x [5]
##   names        date_x     score genre overview crew  orig_title status orig_lang
##   <chr>        <date>     <dbl> <chr> <chr>    <chr> <chr>      <chr>  <chr>    
## 1 The Life an… 1903-05-15    63 Dram… The sto… Mada… " La vie … " Rel… " French"
## 2 A Trip to t… 1907-06-20    80 Adve… Profess… Geor… " Le Voya… " Rel… " French"
## 3 The Birth o… 1915-02-08    61 Dram… Two fam… Lill… "The Birt… " Rel… " Englis…
## 4 The Birth o… 1915-02-08    61 Dram… Two fam… Lill… "The Birt… " Rel… " Englis…
## 5 The Cabinet… 1920-02-27    80 Dram… Francis… Wern… " Das Cab… " Rel… " German"
## 6 Safety Last! 1923-04-01    79 Come… When a … Haro… "Safety L… " Rel… " Englis…
## # ℹ 4 more variables: budget_x <dbl>, revenue <dbl>, country <chr>, ID <int>
my_tsibble <- na.omit(my_tsibble, cols = c("date_x", "score"))

# Converting date_x to Date format
my_tsibble$date_x <- as.Date(my_tsibble$date_x, format = "%m/%d/%Y")

my_tsibble$score <- as.numeric(my_tsibble$score)
summary(my_tsibble)
##     names               date_x               score          genre          
##  Length:10178       Min.   :1903-05-15   Min.   :  0.0   Length:10178      
##  Class :character   1st Qu.:2001-12-25   1st Qu.: 59.0   Class :character  
##  Mode  :character   Median :2013-05-09   Median : 65.0   Mode  :character  
##                     Mean   :2008-06-15   Mean   : 63.5                     
##                     3rd Qu.:2019-10-17   3rd Qu.: 71.0                     
##                     Max.   :2023-12-31   Max.   :100.0                     
##    overview             crew            orig_title           status         
##  Length:10178       Length:10178       Length:10178       Length:10178      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##   orig_lang            budget_x            revenue            country         
##  Length:10178       Min.   :        1   Min.   :0.000e+00   Length:10178      
##  Class :character   1st Qu.: 15000000   1st Qu.:2.859e+07   Class :character  
##  Mode  :character   Median : 50000000   Median :1.529e+08   Mode  :character  
##                     Mean   : 64882379   Mean   :2.531e+08                     
##                     3rd Qu.:105000000   3rd Qu.:4.178e+08                     
##                     Max.   :460000000   Max.   :2.924e+09                     
##        ID       
##  Min.   :    1  
##  1st Qu.: 2545  
##  Median : 5090  
##  Mean   : 5090  
##  3rd Qu.: 7634  
##  Max.   :10178

Use smoothing to detect at least one season in your data, and interpret your results

# Applying the  seasonal decomposition of time series using stl
my_stl <- stl(ts(my_tsibble$score, frequency = 12), s.window = "periodic")

plot(my_stl)

library(forecast)
## Warning: package 'forecast' was built under R version 4.3.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
response_variable <- "score"

# Creating a time series object
my_ts <- ts(my_tsibble$score, frequency = 12)

# Applying smoothing 
my_smoothed <- ma(my_ts, order = 3) 

plot(my_ts, main = "Original vs Smoothed Time Series", col = "blue")
lines(my_smoothed, col = "red", lwd = 2)

# Explanation
cat("
The plot compares the original time series (in blue) with a smoothed version (in red), obtained using a moving average with an order of 3.
The purpose of smoothing is to reveal underlying trends by reducing noise and highlighting patterns.

In this case, the smoothed plot indicates the general trend in movie scores over time.
While attempting to identify seasonality, the data did not exhibit a clear repeating pattern.
Instead, it shows variations in movie scores over time, with no discernible seasonal component.

Further analysis could explore trends, outliers, or autocorrelation to uncover additional insights into the dynamics of movie scores.
")
## 
## The plot compares the original time series (in blue) with a smoothed version (in red), obtained using a moving average with an order of 3.
## The purpose of smoothing is to reveal underlying trends by reducing noise and highlighting patterns.
## 
## In this case, the smoothed plot indicates the general trend in movie scores over time.
## While attempting to identify seasonality, the data did not exhibit a clear repeating pattern.
## Instead, it shows variations in movie scores over time, with no discernible seasonal component.
## 
## Further analysis could explore trends, outliers, or autocorrelation to uncover additional insights into the dynamics of movie scores.
response_variable <- "score"

my_tsibble <- as_tsibble(data, key = "date_x", index = "ID")

lm_model <- lm(score ~ date_x, data = my_tsibble)

my_residuals <- residuals(lm_model)

str(my_residuals)
##  Named num [1:10178] -13.9 3.63 -14.4 -14.4 5.24 ...
##  - attr(*, "names")= chr [1:10178] "1" "2" "3" "4" ...
head(my_residuals)
##          1          2          3          4          5          6 
## -13.895961   3.626615 -14.399446 -14.399446   5.244611   4.638724
# Converting residuals to ts object
my_residuals_ts <- ts(my_residuals, frequency = 12)
# ACF plot for the residuals
acf(my_residuals_ts, main = "ACF for Residuals")