library(readxl)
library(psych)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following objects are masked from 'package:psych':
##
## %+%, alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
ss <- function(x) sum((x - mean(x, na.rm = TRUE))^2, na.rm = TRUE)
df <- read_excel("/Users/katbeachy/Library/Mobile Documents/com~apple~CloudDocs/Documents/Documents/UCT/Courses/PSY3009F/Project/data_times_updated.xlsx")
## New names:
## • `` -> `...1`
df$C1 <- df$psqi_q6
df$q2_score <- ifelse(df$`psqi_q2 (minutes)` <= 15, 0,
ifelse(df$`psqi_q2 (minutes)` <= 30, 1,
ifelse(df$`psqi_q2 (minutes)` <= 60, 2, 3)))
df$C2 <- ifelse((df$q2_score + df$psqi_q5a) == 0, 0,
ifelse((df$q2_score + df$psqi_q5a) <= 2, 1,
ifelse((df$q2_score + df$psqi_q5a) <= 4, 2, 3)))
df$C3 <- ifelse(df$`psqi_q4 (hours)` > 7, 0,
ifelse(df$`psqi_q4 (hours)` >= 6, 1,
ifelse(df$`psqi_q4 (hours)` >= 5, 2, 3)))
df$bedtime <- as.POSIXct(df$`psqi_q1 (time)`, format = "%H:%M")
df$waketime <- as.POSIXct(df$`psqi_q3 (time)`, format = "%H:%M")
df$time_in_bed <- as.numeric(difftime(df$waketime, df$bedtime, units = "hours"))
df$time_in_bed <- ifelse(df$time_in_bed < 0, df$time_in_bed + 24, df$time_in_bed)
df$efficiency <- (df$`psqi_q4 (hours)` / df$time_in_bed) * 100
df$C4 <- ifelse(df$efficiency >= 85, 0,
ifelse(df$efficiency >= 75, 1,
ifelse(df$efficiency >= 65, 2, 3)))
df$disturbance_sum <- df$psqi_q5b + df$psqi_q5c + df$psqi_q5d + df$psqi_q5e +
df$psqi_q5f + df$psqi_q5g + df$psqi_q5h + df$psqi_q5i
df$C5 <- ifelse(df$disturbance_sum == 0, 0,
ifelse(df$disturbance_sum <= 9, 1,
ifelse(df$disturbance_sum <= 18, 2, 3)))
df$C6 <- df$psqi_q7
df$C7 <- ifelse((df$psqi_q8 + df$psqi_q9) == 0, 0,
ifelse((df$psqi_q8 + df$psqi_q9) <= 2, 1,
ifelse((df$psqi_q8 + df$psqi_q9) <= 4, 2, 3)))
df$PSQI_global <- df$C1 + df$C2 + df$C3 + df$C4 + df$C5 + df$C6 + df$C7
df$Accuracy <- df[[40]]
df$MeanRT <- df[[38]]
df$SDRT <- df[[39]]
describe(df$PSQI_global)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 38 11.05 2.75 11 10.94 2.97 6 17 11 0.44 -0.45 0.45
IQR(df$PSQI_global, na.rm = TRUE)
## [1] 3
ss(df$PSQI_global)
## [1] 279.8947
describe(df$Accuracy)
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 37 91.86 2.37 91.6 91.72 2.67 88.4 96.9 8.5 0.51 -0.97 0.39
IQR(df$Accuracy, na.rm = TRUE)
## [1] 3.6
ss(df$Accuracy)
## [1] 202.487
describe(df$MeanRT)
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 37 178.79 65.85 157.9 175.41 57.82 83 312.4 229.4 0.52 -0.92
## se
## X1 10.83
IQR(df$MeanRT, na.rm = TRUE)
## [1] 85.8
ss(df$MeanRT)
## [1] 156116.6
shapiro.test(df$PSQI_global)
##
## Shapiro-Wilk normality test
##
## data: df$PSQI_global
## W = 0.95835, p-value = 0.1677
shapiro.test(df$Accuracy)
##
## Shapiro-Wilk normality test
##
## data: df$Accuracy
## W = 0.93287, p-value = 0.0275
shapiro.test(df$MeanRT)
##
## Shapiro-Wilk normality test
##
## data: df$MeanRT
## W = 0.9323, p-value = 0.0264
cor.test(df$PSQI_global, df$Accuracy)
##
## Pearson's product-moment correlation
##
## data: df$PSQI_global and df$Accuracy
## t = 1.3113, df = 35, p-value = 0.1983
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.1157295 0.5050123
## sample estimates:
## cor
## 0.2164043
cor.test(df$PSQI_global, df$MeanRT)
##
## Pearson's product-moment correlation
##
## data: df$PSQI_global and df$MeanRT
## t = 0.49689, df = 35, p-value = 0.6224
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2470229 0.3969492
## sample estimates:
## cor
## 0.08369503
ggplot(df, aes(x = PSQI_global, y = Accuracy)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Sleep Quality vs. SART Accuracy",
x = "PSQI Global Score",
y = "SART Accuracy (%)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(df, aes(x = PSQI_global, y = MeanRT)) +
geom_point() +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Sleep Quality vs. Mean Reaction Time",
x = "PSQI Global Score",
y = "Mean RT (ms)")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1 row containing non-finite outside the scale range (`stat_smooth()`).
## Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

# Boxplots to visualise outliers
par(mfrow = c(1, 3))
boxplot(df$PSQI_global, main = "PSQI Global", ylab = "Score")
boxplot(df$Accuracy, main = "SART Accuracy", ylab = "%")
boxplot(df$MeanRT, main = "Mean RT", ylab = "ms")

# Identify outliers statistically (1.5 x IQR rule)
find_outliers <- function(x, label) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_val
upper <- Q3 + 1.5 * IQR_val
outliers <- x[x < lower | x > upper]
cat("\n---", label, "---\n")
cat("Lower fence:", lower, "\n")
cat("Upper fence:", upper, "\n")
cat("Outliers:", if(length(outliers) == 0) "None" else outliers, "\n")
}
find_outliers(df$PSQI_global, "PSQI Global")
##
## --- PSQI Global ---
## Lower fence: 4.5
## Upper fence: 16.5
## Outliers: 17 17
find_outliers(df$Accuracy, "SART Accuracy")
##
## --- SART Accuracy ---
## Lower fence: 84.8
## Upper fence: 99.2
## Outliers: NA
find_outliers(df$MeanRT, "Mean RT")
##
## --- Mean RT ---
## Lower fence: 2.6
## Upper fence: 345.8
## Outliers: NA