#Name: Omotunde S. Segun
library(dplyr) library(tidyverse) library(broom) library(lme4) library(ggplot2) #DATA EXPLORATION
#load the sleep dataset data(“sleepstudy”) sleep <- tibble(sleepstudy)
#Check for na’s sum(is.na(sleepstudy)) #no na’s
#check first six rows of the tibble to view a subset of the tibble head(sleep) #Examine the structure of the data frame: there are 180 observations str(sleep) #view data completely View(sleep)
#What is the data about? ?sleepstudy
#The code below created a new column, named sleep condition where the day 1 and 0 were recoded categorically as “training”, day 2 = baseline and day3 upward = sleep restriction
sleep <- sleep %>% mutate( sleep_condition = case_when( Days
<= 1 ~ “training”,
Days == 2 ~ “baseline”,
Days >= 3 ~ “sleep restriction” ), sleep_condition =
factor(sleep_condition, levels = c(“training”, “baseline”, “sleep
restriction”)) )
#recheck data structure str(sleep)
#b. DATA VISUALIZATION 1: plots to show relationships #BOXPLOTS #BOXPLOTS of reaction time over different days
#the plot shows the boxplot of reaction times by days bpsr <- ggplot(sleep, aes (x = sleep_condition, y= Reaction, fill = sleep_condition)) + geom_boxplot(color = “black”) + labs(title = “Box plot of sleep condition by reaction time”, x = “sleep condition”, y = “Reaction Time (ms)”) + theme_minimal() #ggsave(“bpsr.png”, plot = bpsr, width = 8, height = 6, dpi = 300) ggsave(“bpsr.pdf”, plot = bpsr, width = 8, height = 6, dpi = 300)
#scatter plot of reaction time over number of days sp <- ggplot(sleep, aes(x = Days, y = Reaction, color = sleep_condition)) + geom_point(size = 2) + labs( title = “Reaction Times Across Days”, x = “Days”, y = “Reaction Time (ms)”) + theme_minimal()
#ggsave(“reaction_scatter.png”, plot = sp, width = 8, height = 6, dpi = 300) ggsave(“reaction_scatter.pdf”, plot = sp, width = 8, height = 6, dpi = 300) #DESCRIPTIVE STATISTICS #a. summary statistics by sleep_condition
desc_sc <- sleep %>% group_by (sleep_condition) %>% summarise(Count = n(), Mean_Reaction = mean(Reaction), Median_Reaction = median(Reaction), Sd_Reaction = sd(Reaction), Min_Reaction = min(Reaction), Max_Reaction = max(Reaction))
##sleep_condition Count Mean_Reaction Median_Reaction Sd_Reaction
Min_Reaction ##
#Grouping by subjects
desc_subj <- sleep %>% group_by (Subject) %>% summarise(Count = n(), Mean_Reaction = mean(Reaction), Median_Reaction = median(Reaction), Sd_Reaction = sd(Reaction), Min_Reaction = min(Reaction), Max_Reaction = max(Reaction))
#There are 18 subjects each had 10 days
##DATA VISUALIZATION
#b. Visualize data with appropriate plots hs <- ggplot(sleep, aes(x = Reaction)) + geom_histogram(binwidth = 25, fill = “yellow”, color = “red”) + labs( title = “Distribution of Reaction Times”, x = “Reaction Time (ms)”, y = “Frequency” ) + theme_minimal() #save image #ggsave(“hs.png”, plot = hs, width = 8, height = 6, dpi = 300) ggsave(“hs.pdf”, plot = hs, width = 8, height = 6, dpi = 300)
#density_plot #the plot shows the density of reaction times by sleep_condition dp <- ggplot(sleep, aes (x = Reaction, fill = sleep_condition)) + geom_density(alpha = 0.5) + labs(title = “Density Plot of Reaction Times by sleep_condition”, x = “Reaction Time (ms)”, y = “Density”) + scale_fill_brewer(palette = “Set3”) + theme_minimal()
#save image #ggsave(“dp.png”, plot = dp, width = 8, height = 6, dpi = 300) ggsave(“dp.pdf”, plot = dp, width = 8, height = 6, dpi = 300)
#the plot shows the density of reaction times by days dpd <- ggplot(sleep, aes (x = Reaction, fill = as.factor(Days))) + geom_density(alpha = 0.5) + labs(title = “Density Plot of Reaction Times by Days”, x = “Reaction Time (ms)”, y = “Density”) + scale_fill_brewer(palette = “Set3”) + theme_minimal()
#save image #ggsave(“dpd.png”, plot = dpd, width = 8, height = 6, dpi = 300)
#shows the days of sleep deprivation against average reaction time
per subject require(lattice)
xyplot<- xyplot(Reaction ~ Days | Subject, sleep, type =
c(“g”,“p”,“r”), index = function(x,y) coef(lm(y ~ x))[1], xlab = “Days
of sleep deprivation”, ylab = “Average reaction time (ms)”, aspect =
“xy”)
#Fit an adequate Models
#1. # Model with Days as a continuous variable. using days as fixed effect and controlling for variation in days and subject #Model with correlated random intercept and slope per subject #summary null_model <- lmer(Reaction ~ Days + (Days|Subject), data = subset(sleep, Days >= 2), REML = FALSE)
#summary(null_model)
hist(res1) hist(res2) #normality assumption violated # Plot 2 Q-Q Plot qqnorm(res1)
qqline(res1)
qqnorm(res2) qqline(res2)
#Plot 3 Residual plot
rp1 <- plot(fitted(null_model), res1)
plot(fitted(model2), res2)
#constancy of variance holds true, also residuals follow a normal distribution