library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
theme_set(
theme_minimal(base_size = 14, base_family = "Georgia") +
theme(
plot.title = element_text(face = "bold", color = "#f8f8f8", hjust = 0.5),
plot.subtitle = element_text(color = "#cccccc", hjust = 0.5),
axis.title.x = element_text(color = "#f8f8f8", margin = margin(t = 10)),
axis.title.y = element_text(color = "#f8f8f8", margin = margin(r = 10)),
axis.text.x = element_text(color = "#f8f8f8"),
axis.text.y = element_text(color = "#f8f8f8"),
panel.background = element_rect(fill = "#222222", color = NA),
plot.background = element_rect(fill = "#222222", color = NA),
panel.grid.major.x = element_line(color = "grey30"),
panel.grid.major.y = element_line(color = "grey30"),
panel.grid.minor = element_blank(),
legend.position = "none"
)
)
pastel_colors <- c("Physical" = "#C3E2C2", "Eink" = "#D8C7FF")
SleepandReadData <- read_excel("C:/Users/kathryne/OneDrive/School/Data211/Final Project/Reading and Sleep Data.xlsx")
SleepandReadData <- SleepandReadData %>%
mutate(Format = str_to_title(Format))
\(\mu_e =\) Average minutes spent reading eink
\(\mu_p =\) Average minutes spent reading physical
\(H_0: \mu_e = \mu_p\)
\(H_a: \mu_e \neq \mu_p\)
t.test(MinutesReading ~ Format, data = SleepandReadData, alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: MinutesReading by Format
## t = -0.8722, df = 11.854, p-value = 0.4004
## alternative hypothesis: true difference in means between group Eink and group Physical is not equal to 0
## 95 percent confidence interval:
## -75.03211 32.17497
## sample estimates:
## mean in group Eink mean in group Physical
## 50.57143 72.00000
Conclusion:
p-value = .4004 > \(\alpha = .05\), therefore with a \(5\%\) significance level, we fail to reject \(H_0\). There is not sufficient evidence to support the claim that the format used significantly affects the amount of minutes spent reading.
ColChart <- ggplot(SleepandReadData, aes(x = Format, y = MinutesReading, fill = Format)) +
stat_summary(fun = "mean", geom = "col", width = 0.6) +
scale_fill_manual(values = pastel_colors) +
labs(title = "Average Reading Time by Format",
y = "Minutes Reading",
x = "Format") +
coord_flip()
ggplotly(ColChart)
\(\mu_e =\) Average sleep minutes after eink
\(\mu_p =\) Average sleep minutes after physical
\(H_0: \mu_e = \mu_p\)
\(H_a: \mu_e \neq \mu_p\)
t.test(TotalTimeAsleep ~ Format, data = SleepandReadData, alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: TotalTimeAsleep by Format
## t = -1.8106, df = 11.784, p-value = 0.09576
## alternative hypothesis: true difference in means between group Eink and group Physical is not equal to 0
## 95 percent confidence interval:
## -213.96426 19.96426
## sample estimates:
## mean in group Eink mean in group Physical
## 295.4286 392.4286
Conclusion:
p-value = .09576 > \(\alpha = .05\), therefore with a \(5\%\) significance level, we fail to reject \(H_0\). There is not sufficient evidence to support the claim that the format chosen greatly affects the amount of sleep I get each night.
SummaryTable <- SleepandReadData %>%
group_by(Format) %>%
summarise(
ave = mean(TotalTimeAsleep, na.rm = TRUE),
se = sd(TotalTimeAsleep, na.rm = TRUE) / sqrt(n()),
n = n(),
tstar = qt(1 - 0.05/2, df = n() - 1)
)
SleepvsFormat <- ggplot(SummaryTable, aes(x = Format, y = ave, fill = Format)) +
geom_col(width = 0.5, color = "white") +
geom_errorbar(aes(ymin = ave - tstar * se, ymax = ave + tstar * se), width = 0.2, linewidth = 0.8) +
geom_point(shape = 21, size = 4, color = "black", fill = "white", stroke = 1.2) +
scale_fill_manual(values = pastel_colors) +
labs(title = "Average Total Sleep by Format",y="Average Minutes Slept")
ggplotly(SleepvsFormat)
\(\mu_e =\) Average pages per minute with eink.
\(\mu_p =\) Average pages per minute with physical
\(H_0: \mu_e = \mu_p\)
\(H_a: \mu_e \neq \mu_p\)
t.test(PagesPerMinute ~ Format, data = SleepandReadData, alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: PagesPerMinute by Format
## t = -0.60393, df = 8.2984, p-value = 0.562
## alternative hypothesis: true difference in means between group Eink and group Physical is not equal to 0
## 95 percent confidence interval:
## -1.608408 0.937481
## sample estimates:
## mean in group Eink mean in group Physical
## 1.543333 1.878797
Conclusion:
p-value = .562 > \(\alpha = .05\), therefore with a \(5\%\) significance level, we fail to reject \(H_0\). There is not sufficient evidence to support the claim that the format used greatly affects reading speed.
Barchart <- SleepandReadData %>%
group_by(Format) %>%
summarise(AvgPages = mean(Pages)) %>%
ggplot(aes(x = Format, y = AvgPages, fill = Format)) +
geom_col() +
scale_fill_manual(values = pastel_colors) +
labs(title = "Average Pages Read by Format",
y = "Average Pages",
x = "Format")
ggplotly(Barchart)
\(\mu_e =\) Average minutes to fall asleep after eink
\(\mu_p =\) Average minutes to fall asleep after physical
\(H_0: \mu_e = \mu_p\)
\(H_a: \mu_e \neq \mu_p\)
t.test(MinutesToSleep ~ Format, data = SleepandReadData, alternative = "two.sided")
##
## Welch Two Sample t-test
##
## data: MinutesToSleep by Format
## t = -0.92872, df = 6.0311, p-value = 0.3887
## alternative hypothesis: true difference in means between group Eink and group Physical is not equal to 0
## 95 percent confidence interval:
## -112.57453 50.57453
## sample estimates:
## mean in group Eink mean in group Physical
## 9.142857 40.142857
Conclusion:
p-value = .3887 > \(\alpha = .05\), therefore with a \(5\%\) significance level, we fail to reject \(H_0\). There is not sufficient evidence to support the claim that the format used greatly affects how long it takes to fall asleep.
TimetoSleepPlot <- ggplot(SleepandReadData, aes(x = Format, y = MinutesToSleep, fill = Format)) +
geom_jitter(width = 0.2, size = 3.5, shape = 21, color = "black", stroke = 1.2) +
stat_summary(fun = mean, geom = "point", shape = 23, size = 5, fill = "white", color = "black", stroke = 1.2) +
stat_summary(fun.data = mean_se, geom = "errorbar", width = 0.2, linewidth = 1) +
scale_fill_manual(values = pastel_colors) +
labs(title = "Minutes to Fall Asleep by Format.\n (Note: one extreme outlier clipped for clarity)",y="Average Minutes to Fall Asleep")+
coord_cartesian(ylim = c(0, 100))
ggplotly(TimetoSleepPlot)
Scatterplot <- ggplot(SleepandReadData, aes(x = MinutesReading, y = TotalTimeAsleep, color = Format)) +
geom_point(size = 4, alpha = 0.8) +
scale_color_manual(values = pastel_colors) +
labs(title = "Total Time Asleep vs Reading Time",
x = "Minutes Reading",
y = "Minutes Asleep")
ggplotly(Scatterplot)
SleepStageAvg <- SleepandReadData %>%
group_by(Format) %>%
summarise(
REM = mean(REM),
Light = mean(Light),
Deep = mean(Deep)
) %>%
pivot_longer(cols = c(REM, Light, Deep), names_to = "Stage", values_to = "AvgMinutes")
StackedBars <- ggplot(SleepStageAvg, aes(x = Format, y = AvgMinutes, fill = Stage)) +
geom_bar(stat = "identity") +
scale_fill_manual(values = c("REM" = "#CBBBDD", "Light" = "#BFD8B8", "Deep" = "#FFD8BE")) +
labs(title = "Average Minutes in Sleep Stages by Format",
x = "Reading Format",
y = "Average Minutes",
fill = "Sleep Stage")
ggplotly(StackedBars)
Lineplot <- ggplot(SleepandReadData, aes(x = Date, y = MinutesReading)) +
geom_line(linewidth = 1.2, color = "#c3e2c2") +
geom_point(size = 3, color = "#c3e2c2") +
labs(title = "Reading Time Over Two Weeks",
x = "Date",
y = "Minutes Reading")
ggplotly(Lineplot)
DayOfWeekSummary <- SleepandReadData %>%
mutate(DayOfWeek = factor(DayofWeek, levels = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"))) %>%
group_by(DayOfWeek) %>%
summarise(AvgPages = mean(Pages, na.rm = TRUE))
DayofWeekPlot <- ggplot(DayOfWeekSummary, aes(x = DayOfWeek, y = AvgPages, fill = DayOfWeek)) +
geom_col() +
scale_fill_manual(values = c("#C3E2C2", "#D8C7FF", "#FFD8BE", "#BFD8B8", "#FBE4C9", "#CBBBDD", "#A3C9A8")) +
labs(title = "Average Pages Read by Day of Week",
y = "Average Pages",
x = "Day of Week")
ggplotly(DayofWeekPlot)