library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.3     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggthemes)
library(ggrepel)

# Load required libraries
library(tsibble)
## Warning: package 'tsibble' was built under R version 4.3.2
## 
## Attaching package: 'tsibble'
## 
## The following object is masked from 'package:lubridate':
## 
##     interval
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, union
library(ggplot2)
library(dplyr)
library(tidyr)
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.2
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tsibbledata 0.4.1     ✔ fable       0.3.3
## ✔ feasts      0.3.1     ✔ fabletools  0.3.4
## Warning: package 'tsibbledata' was built under R version 4.3.2
## Warning: package 'feasts' was built under R version 4.3.2
## Warning: package 'fabletools' was built under R version 4.3.2
## Warning: package 'fable' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
# Load the readxl package
library(readxl)
## Warning: package 'readxl' was built under R version 4.3.2
# Replace 'your_file.xlsx' with the actual path to your Excel file
excel_data <- read_excel("C:/Users/ABHIRAM/Downloads/msleep.xlsx")

# Convert the "date" column to Date format:
excel_data$date <- as.Date(excel_data$date)

# Check for duplicates in the "date" column
duplicates(excel_data, date)
## Using `date` as index variable.
## # A tibble: 56 × 12
##    name   genus vore  order conservation sleep_total sleep_rem sleep_cycle awake
##    <chr>  <chr> <chr> <chr> <chr>              <dbl> <chr>     <chr>       <dbl>
##  1 Cheet… Acin… carni Carn… lc                  12.1 NA        NA           11.9
##  2 Owl m… Aotus omni  Prim… NA                  17   1.8       NA            7  
##  3 Mount… Aplo… herbi Rode… nt                  14.4 2.4       NA            9.6
##  4 Great… Blar… omni  Sori… lc                  14.9 2.299999… 0.133333333   9.1
##  5 Cow    Bos   herbi Arti… domesticated         4   0.7       0.66666666…  20  
##  6 Three… Brad… herbi Pilo… NA                  14.4 2.200000… 0.76666666…   9.6
##  7 North… Call… carni Carn… vu                   8.7 1.4       0.383333333  15.3
##  8 Vespe… Calo… NA    Rode… NA                   7   NA        NA           17  
##  9 Dog    Canis carni Carn… domesticated        10.1 2.9       0.33333333…  13.9
## 10 Roe d… Capr… herbi Arti… lc                   3   NA        NA           21  
## # ℹ 46 more rows
## # ℹ 3 more variables: brainwt <chr>, bodywt <dbl>, date <date>
# Remove duplicates based on the "date" column
excel_data <- distinct(excel_data, date, .keep_all = TRUE)

# Create a tsibble object:
msleep_tsibble <- tsibble(
  date = excel_data$date,   # specify the time index as the variable name
  sleep_total = excel_data$sleep_total
)
## Using `date` as index variable.
# Fill gaps in the time series data
msleep_tsibble <- fill_gaps(msleep_tsibble)

# Plot your data over time:
ggplot(msleep_tsibble, aes(x = date, y = sleep_total)) +
  geom_line() +
  labs(title = "Sleep Patterns Over Time", x = "Date", y = "Sleep Total")

# Linear regression to detect trends:
lm_model <- lm(sleep_total ~ date, data = msleep_tsibble)
summary(lm_model)
## 
## Call:
## lm(formula = sleep_total ~ date, data = msleep_tsibble)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.429 -3.015 -0.249  3.923  9.538 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 1.028e+01  3.111e+00   3.304  0.00171 **
## date        8.684e-06  3.781e-04   0.023  0.98176   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.772 on 53 degrees of freedom
##   (7323 observations deleted due to missingness)
## Multiple R-squared:  9.952e-06,  Adjusted R-squared:  -0.01886 
## F-statistic: 0.0005275 on 1 and 53 DF,  p-value: 0.9818
# Subset the data for multiple trends (if needed):

# Strength of trends:

# Smoothing to detect seasonality:
msleep_tsibble %>%
  mutate(month = month(date)) %>%
  group_by(month) %>%
  summarise(avg_sleep = mean(sleep_total)) %>%
  ggplot(aes(x = month, y = avg_sleep)) +
  geom_line() +
  labs(title = "Average Sleep Patterns by Month", x = "Month", y = "Average Sleep")
## Warning: Removed 241 rows containing missing values (`geom_line()`).

# Illustrate seasonality using ACF or PACF:
msleep_tsibble %>%
  ACF(sleep_total)
## # A tsibble: 1 x 2 [1D]
##        lag   acf
##   <cf_lag> <dbl>
## 1       1D  -0.5