library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)
# Load required libraries
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.2
##
## Attaching package: 'tsibble'
##
## The following object is masked from 'package:lubridate':
##
## interval
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, union
library(ggplot2)
library(dplyr)
library(tidyr)
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tsibbledata 0.4.1 ✔ fable 0.3.3
## ✔ feasts 0.3.1 ✔ fabletools 0.3.4
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
# Load the readxl package
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
# Replace 'your_file.xlsx' with the actual path to your Excel file
excel_data <- read_excel("C:/Users/ABHIRAM/Downloads/msleep.xlsx")
# Convert the "date" column to Date format:
excel_data$date <- as.Date(excel_data$date)
# Check for duplicates in the "date" column
duplicates(excel_data, date)
## Using `date` as index variable.
## # A tibble: 56 × 12
## name genus vore order conservation sleep_total sleep_rem sleep_cycle awake
## <chr> <chr> <chr> <chr> <chr> <dbl> <chr> <chr> <dbl>
## 1 Cheet… Acin… carni Carn… lc 12.1 NA NA 11.9
## 2 Owl m… Aotus omni Prim… NA 17 1.8 NA 7
## 3 Mount… Aplo… herbi Rode… nt 14.4 2.4 NA 9.6
## 4 Great… Blar… omni Sori… lc 14.9 2.299999… 0.133333333 9.1
## 5 Cow Bos herbi Arti… domesticated 4 0.7 0.66666666… 20
## 6 Three… Brad… herbi Pilo… NA 14.4 2.200000… 0.76666666… 9.6
## 7 North… Call… carni Carn… vu 8.7 1.4 0.383333333 15.3
## 8 Vespe… Calo… NA Rode… NA 7 NA NA 17
## 9 Dog Canis carni Carn… domesticated 10.1 2.9 0.33333333… 13.9
## 10 Roe d… Capr… herbi Arti… lc 3 NA NA 21
## # ℹ 46 more rows
## # ℹ 3 more variables: brainwt <chr>, bodywt <dbl>, date <date>
# Remove duplicates based on the "date" column
excel_data <- distinct(excel_data, date, .keep_all = TRUE)
# Create a tsibble object:
msleep_tsibble <- tsibble(
date = excel_data$date, # specify the time index as the variable name
sleep_total = excel_data$sleep_total
)
## Using `date` as index variable.
# Fill gaps in the time series data
msleep_tsibble <- fill_gaps(msleep_tsibble)
# Plot your data over time:
ggplot(msleep_tsibble, aes(x = date, y = sleep_total)) +
geom_line() +
labs(title = "Sleep Patterns Over Time", x = "Date", y = "Sleep Total")

# Linear regression to detect trends:
lm_model <- lm(sleep_total ~ date, data = msleep_tsibble)
summary(lm_model)
##
## Call:
## lm(formula = sleep_total ~ date, data = msleep_tsibble)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.429 -3.015 -0.249 3.923 9.538
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.028e+01 3.111e+00 3.304 0.00171 **
## date 8.684e-06 3.781e-04 0.023 0.98176
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.772 on 53 degrees of freedom
## (7323 observations deleted due to missingness)
## Multiple R-squared: 9.952e-06, Adjusted R-squared: -0.01886
## F-statistic: 0.0005275 on 1 and 53 DF, p-value: 0.9818
# Subset the data for multiple trends (if needed):
# Strength of trends:
# Smoothing to detect seasonality:
msleep_tsibble %>%
mutate(month = month(date)) %>%
group_by(month) %>%
summarise(avg_sleep = mean(sleep_total)) %>%
ggplot(aes(x = month, y = avg_sleep)) +
geom_line() +
labs(title = "Average Sleep Patterns by Month", x = "Month", y = "Average Sleep")
## Warning: Removed 241 rows containing missing values (`geom_line()`).

# Illustrate seasonality using ACF or PACF:
msleep_tsibble %>%
ACF(sleep_total)
## # A tsibble: 1 x 2 [1D]
## lag acf
## <cf_lag> <dbl>
## 1 1D -0.5