#Name: Omotunde S. Segun

library(dplyr) library(tidyverse) library(broom) library(lme4) library(ggplot2) #DATA EXPLORATION

#load the sleep dataset data(“sleepstudy”) sleep <- tibble(sleepstudy)

#Check for na’s sum(is.na(sleepstudy)) #no na’s

#check first six rows of the tibble to view a subset of the tibble head(sleep) #Examine the structure of the data frame: there are 180 observations str(sleep) #view data completely View(sleep)

#What is the data about? ?sleepstudy

#The code below created a new column, named sleep condition where the day 1 and 0 were recoded categorically as “training”, day 2 = baseline and day3 upward = sleep restriction

sleep <- sleep %>% mutate( sleep_condition = case_when( Days <= 1 ~ “training”,
Days == 2 ~ “baseline”,
Days >= 3 ~ “sleep restriction” ), sleep_condition = factor(sleep_condition, levels = c(“training”, “baseline”, “sleep restriction”)) )

#recheck data structure str(sleep)

#b. DATA VISUALIZATION 1: plots to show relationships #BOXPLOTS #BOXPLOTS of reaction time over different days

#the plot shows the boxplot of reaction times by days bpsr <- ggplot(sleep, aes (x = sleep_condition, y= Reaction, fill = sleep_condition)) + geom_boxplot(color = “black”) + labs(title = “Box plot of sleep condition by reaction time”, x = “sleep condition”, y = “Reaction Time (ms)”) + theme_minimal() #ggsave(“bpsr.png”, plot = bpsr, width = 8, height = 6, dpi = 300) ggsave(“bpsr.pdf”, plot = bpsr, width = 8, height = 6, dpi = 300)

#scatter plot of reaction time over number of days sp <- ggplot(sleep, aes(x = Days, y = Reaction, color = sleep_condition)) + geom_point(size = 2) + labs( title = “Reaction Times Across Days”, x = “Days”, y = “Reaction Time (ms)”) + theme_minimal()

#ggsave(“reaction_scatter.png”, plot = sp, width = 8, height = 6, dpi = 300) ggsave(“reaction_scatter.pdf”, plot = sp, width = 8, height = 6, dpi = 300) #DESCRIPTIVE STATISTICS #a. summary statistics by sleep_condition

desc_sc <- sleep %>% group_by (sleep_condition) %>% summarise(Count = n(), Mean_Reaction = mean(Reaction), Median_Reaction = median(Reaction), Sd_Reaction = sd(Reaction), Min_Reaction = min(Reaction), Max_Reaction = max(Reaction))

##sleep_condition Count Mean_Reaction Median_Reaction Sd_Reaction Min_Reaction ## #1 training 36 261. 267. 32.6 194. #2 baseline 18 265. 264. 29.5 203. #3 sleep restriction 126 314. 313. 57.5 205

#Grouping by subjects

desc_subj <- sleep %>% group_by (Subject) %>% summarise(Count = n(), Mean_Reaction = mean(Reaction), Median_Reaction = median(Reaction), Sd_Reaction = sd(Reaction), Min_Reaction = min(Reaction), Max_Reaction = max(Reaction))

#There are 18 subjects each had 10 days

##DATA VISUALIZATION

#b. Visualize data with appropriate plots hs <- ggplot(sleep, aes(x = Reaction)) + geom_histogram(binwidth = 25, fill = “yellow”, color = “red”) + labs( title = “Distribution of Reaction Times”, x = “Reaction Time (ms)”, y = “Frequency” ) + theme_minimal() #save image #ggsave(“hs.png”, plot = hs, width = 8, height = 6, dpi = 300) ggsave(“hs.pdf”, plot = hs, width = 8, height = 6, dpi = 300)

#density_plot #the plot shows the density of reaction times by sleep_condition dp <- ggplot(sleep, aes (x = Reaction, fill = sleep_condition)) + geom_density(alpha = 0.5) + labs(title = “Density Plot of Reaction Times by sleep_condition”, x = “Reaction Time (ms)”, y = “Density”) + scale_fill_brewer(palette = “Set3”) + theme_minimal()

#save image #ggsave(“dp.png”, plot = dp, width = 8, height = 6, dpi = 300) ggsave(“dp.pdf”, plot = dp, width = 8, height = 6, dpi = 300)

#the plot shows the density of reaction times by days dpd <- ggplot(sleep, aes (x = Reaction, fill = as.factor(Days))) + geom_density(alpha = 0.5) + labs(title = “Density Plot of Reaction Times by Days”, x = “Reaction Time (ms)”, y = “Density”) + scale_fill_brewer(palette = “Set3”) + theme_minimal()

#save image #ggsave(“dpd.png”, plot = dpd, width = 8, height = 6, dpi = 300)

#shows the days of sleep deprivation against average reaction time per subject require(lattice)
xyplot<- xyplot(Reaction ~ Days | Subject, sleep, type = c(“g”,“p”,“r”), index = function(x,y) coef(lm(y ~ x))[1], xlab = “Days of sleep deprivation”, ylab = “Average reaction time (ms)”, aspect = “xy”)

#Fit an adequate Models

#1. # Model with Days as a continuous variable. using days as fixed effect and controlling for variation in days and subject #Model with correlated random intercept and slope per subject #summary null_model <- lmer(Reaction ~ Days + (Days|Subject), data = subset(sleep, Days >= 2), REML = FALSE)

#summary(null_model)

Model 2:Independent Model with uncorrelated random intercept and slope per subject

model2 <- lmer(Reaction ~ Days + (Days || Subject), data = subset(sleep, Days >= 2), REML = FALSE)

#summary(model2)

anova(null_model, model2)

#estimated correlation between slope for days and intercept is very low (0.45)

#the new model is not doing well and has a chisquare of nearly zero i.e no significant improvement in model

#Residual Plots #5: Residual Analysis res1 <- residuals(null_model) res2 <- residuals(model2) #set graphical parameters to generate plot matrix par(mfrow =c(1,3))

Plot 1 histogram

hist(res1) hist(res2) #normality assumption violated # Plot 2 Q-Q Plot qqnorm(res1)

qqline(res1)

qqnorm(res2) qqline(res2)

#Plot 3 Residual plot

rp1 <- plot(fitted(null_model), res1)

plot(fitted(model2), res2)

#constancy of variance holds true, also residuals follow a normal distribution