data$date_x <- as.Date(data$date_x, format = "%m/%d/%Y")
response_variable <- "score"
library(tsibble)
library(ggplot2)
response_variable <- "score"
data <- na.omit(data, cols = c("date_x", response_variable))
data$date_x <- as.Date(data$date_x, format = "%m/%d/%Y")
data$ID <- seq_len(nrow(data))
my_tsibble <- as_tsibble(data, key = "date_x", index = "ID")
ggplot(my_tsibble, aes(x = date_x, y = !!sym(response_variable))) +
geom_line() +
labs(title = paste("Time Series Plot of", response_variable),
x = "Date",
y = response_variable)
my_tsibble <- na.omit(my_tsibble, cols = c("date_x", "score"))
# Converting score to numeric
my_tsibble$score <- as.numeric(my_tsibble$score)
head(my_tsibble)
## # A tsibble: 6 x 13 [1]
## # Key: date_x [5]
## names date_x score genre overview crew orig_title status orig_lang
## <chr> <date> <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 The Life an… 1903-05-15 63 Dram… The sto… Mada… " La vie … " Rel… " French"
## 2 A Trip to t… 1907-06-20 80 Adve… Profess… Geor… " Le Voya… " Rel… " French"
## 3 The Birth o… 1915-02-08 61 Dram… Two fam… Lill… "The Birt… " Rel… " Englis…
## 4 The Birth o… 1915-02-08 61 Dram… Two fam… Lill… "The Birt… " Rel… " Englis…
## 5 The Cabinet… 1920-02-27 80 Dram… Francis… Wern… " Das Cab… " Rel… " German"
## 6 Safety Last! 1923-04-01 79 Come… When a … Haro… "Safety L… " Rel… " Englis…
## # ℹ 4 more variables: budget_x <dbl>, revenue <dbl>, country <chr>, ID <int>
my_tsibble <- na.omit(my_tsibble, cols = c("date_x", "score"))
# Converting date_x to Date format
my_tsibble$date_x <- as.Date(my_tsibble$date_x, format = "%m/%d/%Y")
my_tsibble$score <- as.numeric(my_tsibble$score)
summary(my_tsibble)
## names date_x score genre
## Length:10178 Min. :1903-05-15 Min. : 0.0 Length:10178
## Class :character 1st Qu.:2001-12-25 1st Qu.: 59.0 Class :character
## Mode :character Median :2013-05-09 Median : 65.0 Mode :character
## Mean :2008-06-15 Mean : 63.5
## 3rd Qu.:2019-10-17 3rd Qu.: 71.0
## Max. :2023-12-31 Max. :100.0
## overview crew orig_title status
## Length:10178 Length:10178 Length:10178 Length:10178
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## orig_lang budget_x revenue country
## Length:10178 Min. : 1 Min. :0.000e+00 Length:10178
## Class :character 1st Qu.: 15000000 1st Qu.:2.859e+07 Class :character
## Mode :character Median : 50000000 Median :1.529e+08 Mode :character
## Mean : 64882379 Mean :2.531e+08
## 3rd Qu.:105000000 3rd Qu.:4.178e+08
## Max. :460000000 Max. :2.924e+09
## ID
## Min. : 1
## 1st Qu.: 2545
## Median : 5090
## Mean : 5090
## 3rd Qu.: 7634
## Max. :10178
# Fitting linear regression model
lm_model <- lm(score ~ date_x, data = my_tsibble)
summary(lm_model)
##
## Call:
## lm(formula = score ~ date_x, data = my_tsibble)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.716 -4.525 1.763 7.850 38.411
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.840e+01 3.590e-01 190.5 <2e-16 ***
## date_x -3.491e-04 2.375e-05 -14.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.4 on 10176 degrees of freedom
## Multiple R-squared: 0.02079, Adjusted R-squared: 0.02069
## F-statistic: 216 on 1 and 10176 DF, p-value: < 2.2e-16
The linear regression analysis results indicate the following:
Intercept: The intercept is approximately 68.40. Date_x Coefficient: The coefficient for ‘date_x’ is approximately -0.0003491, suggesting a negative trend over time. Residuals: The residuals have a minimum of -67.716, a maximum of 38.411, and are centered around 0. R-squared: The R-squared value is 0.02079, indicating that the model explains about 2.08% of the variance in the ‘score’ variable. P-value: The p-value for both the intercept and ‘date_x’ is less than 0.05, indicating that they are statistically significant.
# Applying the seasonal decomposition of time series using stl
my_stl <- stl(ts(my_tsibble$score, frequency = 12), s.window = "periodic")
plot(my_stl)
library(forecast)
## Warning: package 'forecast' was built under R version 4.3.2
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
response_variable <- "score"
# Creating a time series object
my_ts <- ts(my_tsibble$score, frequency = 12)
# Applying smoothing
my_smoothed <- ma(my_ts, order = 3)
plot(my_ts, main = "Original vs Smoothed Time Series", col = "blue")
lines(my_smoothed, col = "red", lwd = 2)
# Explanation
cat("
The plot compares the original time series (in blue) with a smoothed version (in red), obtained using a moving average with an order of 3.
The purpose of smoothing is to reveal underlying trends by reducing noise and highlighting patterns.
In this case, the smoothed plot indicates the general trend in movie scores over time.
While attempting to identify seasonality, the data did not exhibit a clear repeating pattern.
Instead, it shows variations in movie scores over time, with no discernible seasonal component.
Further analysis could explore trends, outliers, or autocorrelation to uncover additional insights into the dynamics of movie scores.
")
##
## The plot compares the original time series (in blue) with a smoothed version (in red), obtained using a moving average with an order of 3.
## The purpose of smoothing is to reveal underlying trends by reducing noise and highlighting patterns.
##
## In this case, the smoothed plot indicates the general trend in movie scores over time.
## While attempting to identify seasonality, the data did not exhibit a clear repeating pattern.
## Instead, it shows variations in movie scores over time, with no discernible seasonal component.
##
## Further analysis could explore trends, outliers, or autocorrelation to uncover additional insights into the dynamics of movie scores.
response_variable <- "score"
my_tsibble <- as_tsibble(data, key = "date_x", index = "ID")
lm_model <- lm(score ~ date_x, data = my_tsibble)
my_residuals <- residuals(lm_model)
str(my_residuals)
## Named num [1:10178] -13.9 3.63 -14.4 -14.4 5.24 ...
## - attr(*, "names")= chr [1:10178] "1" "2" "3" "4" ...
head(my_residuals)
## 1 2 3 4 5 6
## -13.895961 3.626615 -14.399446 -14.399446 5.244611 4.638724
# Converting residuals to ts object
my_residuals_ts <- ts(my_residuals, frequency = 12)
# ACF plot for the residuals
acf(my_residuals_ts, main = "ACF for Residuals")