library(ggplot2) library(readr) library(forecast)
data <- read.csv(“~/TCD Study/Data Mining/ShampooSales.csv”)
print(head(data))
print(names(data))
data\(Month <- as.Date(paste0("01-", data\)Month), format=“%d-%b-%y”)
missing_month <- which(is.na(data\(Month)) missing_sales <- which(is.na(data\)Shampoo.Sales)) infinite_sales <- which(!is.finite(data$Shampoo.Sales))
if (length(missing_month) > 0) print(data[missing_month, ]) if (length(missing_sales) > 0) print(data[missing_sales, ]) if (length(infinite_sales) > 0) print(data[infinite_sales, ])
data_clean <- data[complete.cases(data) & is.finite(data$Shampoo.Sales), ]
print(head(data_clean))
ggplot(data_clean, aes(x = Month, y = Shampoo.Sales)) + geom_line(color = “blue”) + labs( title = “Shampoo Sales Over Time”, x = “Month”, y = “Shampoo Sales” ) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, face = “bold”) )
shampoo_ts <- ts(data_clean$Shampoo.Sales, frequency = 12, start = c(1980, 1))
shampoo_decomp <- decompose(shampoo_ts)
autoplot(shampoo_decomp) + labs( title = “Decomposition of Shampoo Sales Time Series”, x = “Year”, y = “Shampoo Sales” ) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, face = “bold”) )