library(lessR)
##
## lessR 4.4.1 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data,
## graphics, testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation from pivot tables
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including time series forecasting
## Enter: news(package="lessR")
##
## Interactive data analysis
## Enter: interact()
##
## Attaching package: 'lessR'
## The following object is masked from 'package:base':
##
## sort_by
sleep = Read("https://www.lock5stat.com/datasets3e/SleepStudy.csv")
##
## >>> Suggestions
## Recommended binary format for data files: feather
## Create with Write(d, "your_file", format="feather")
## To read a csv or Excel file of variable labelsvar_labels=TRUE
## Each row of the file: Variable Name, Variable Label
## Read into a data frame named l (the letter el)
##
## More details about your data, Enter: details() for d, or details(name)
##
## Data Types
## ------------------------------------------------------------
## character: Non-numeric data values
## integer: Numeric data values, integers only
## double: Numeric data values with decimal digits
## ------------------------------------------------------------
##
## Variable Missing Unique
## Name Type Values Values Values First and last values
## ------------------------------------------------------------------------------------------
## 1 Gender integer 253 0 2 0 0 0 ... 0 1 1
## 2 ClassYear integer 253 0 4 4 4 4 ... 3 2 2
## 3 LarkOwl character 253 0 3 Neither Neither ... Neither Neither
## 4 NumEarlyClass integer 253 0 6 0 2 0 ... 2 2 5
## 5 EarlyClass integer 253 0 2 0 1 0 ... 1 1 1
## 6 GPA double 253 0 62 3.6 3.24 2.97 ... 3.5 2.6 2.5
## 7 ClassesMissed integer 253 0 15 0 0 12 ... 1 0 3
## 8 CognitionZscore double 253 0 160 -0.26 1.39 0.38 ... 1.3 -0.29 -1
## 9 PoorSleepQuality integer 253 0 16 4 6 18 ... 2 5 3
## 10 DepressionScore integer 253 0 27 4 1 18 ... 1 1 2
## 11 AnxietyScore integer 253 0 24 3 0 18 ... 1 1 1
## 12 StressScore integer 253 0 32 8 3 9 ... 1 3 2
## 13 DepressionStatus character 253 0 3 normal normal ... normal normal
## 14 AnxietyStatus character 253 0 3 normal normal ... normal normal
## 15 Stress character 253 0 2 normal normal ... normal normal
## 16 DASScore integer 253 0 61 15 4 45 ... 3 5 5
## 17 Happiness integer 253 0 27 28 25 17 ... 35 31 32
## 18 AlcoholUse character 253 0 4 Moderate Moderate ... Moderate Moderate
## 19 Drinks integer 253 0 18 10 6 3 ... 5 7 13
## 20 WeekdayBed double 253 0 98 25.75 25.7 27.44 ... 24.45 24.38 23.35
## 21 WeekdayRise double 253 0 138 8.7 8.2 6.55 ... 8.25 9.2 7.75
## 22 WeekdaySleep double 253 0 150 7.7 6.8 3 ... 7.8 8.97 8.4
## 23 WeekendBed double 253 0 51 25.75 26 28 ... 25 26 25
## 24 WeekendRise double 253 0 68 9.5 10 12.59 ... 9.75 9.88 10.88
## 25 WeekendSleep double 253 0 69 5.88 7.25 10.09 ... 8.75 7.5 9.88
## 26 AverageSleep double 253 0 160 7.18 6.93 5.02 ... 8.07 8.55 8.82
## 27 AllNighter integer 253 0 2 0 0 0 ... 0 0 0
## ------------------------------------------------------------------------------------------
head(sleep)
## Gender ClassYear LarkOwl NumEarlyClass EarlyClass GPA ClassesMissed
## 1 0 4 Neither 0 0 3.60 0
## 2 0 4 Neither 2 1 3.24 0
## 3 0 4 Owl 0 0 2.97 12
## 4 0 1 Lark 5 1 3.76 0
## 5 0 4 Owl 0 0 3.20 4
## 6 1 4 Neither 0 0 3.50 0
## CognitionZscore PoorSleepQuality DepressionScore AnxietyScore StressScore
## 1 -0.26 4 4 3 8
## 2 1.39 6 1 0 3
## 3 0.38 18 18 18 9
## 4 1.39 9 1 4 6
## 5 1.22 9 7 25 14
## 6 -0.04 6 14 8 28
## DepressionStatus AnxietyStatus Stress DASScore Happiness AlcoholUse Drinks
## 1 normal normal normal 15 28 Moderate 10
## 2 normal normal normal 4 25 Moderate 6
## 3 moderate severe normal 45 17 Light 3
## 4 normal normal normal 11 32 Light 2
## 5 normal severe normal 46 15 Moderate 4
## 6 moderate moderate high 50 22 Abstain 0
## WeekdayBed WeekdayRise WeekdaySleep WeekendBed WeekendRise WeekendSleep
## 1 25.75 8.70 7.70 25.75 9.50 5.88
## 2 25.70 8.20 6.80 26.00 10.00 7.25
## 3 27.44 6.55 3.00 28.00 12.59 10.09
## 4 23.50 7.17 6.77 27.00 8.00 7.25
## 5 25.90 8.67 6.09 23.75 9.50 7.00
## 6 23.80 8.95 9.05 26.00 10.75 9.00
## AverageSleep AllNighter
## 1 7.18 0
## 2 6.93 0
## 3 5.02 0
## 4 6.90 0
## 5 6.35 0
## 6 9.04 0
This report examines various factors related to student behavior, academic performance, and mental well-being using statistical hypothesis testing.
All data was gathered from https://www.lock5stat.com/datasets3e/SleepStudy.csv
I will be performing an analysis of the data relating to the following questions:
t.test(sleep$GPA ~ sleep$Gender, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$GPA by sleep$Gender
## t = 3.9139, df = 200.9, p-value = 0.0001243
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## 0.09982254 0.30252780
## sample estimates:
## mean in group 0 mean in group 1
## 3.324901 3.123725
Plot(GPA, by=Gender, data=sleep, fill="blue")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(GPA, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(GPA, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ttest(GPA ~ Gender) # Add the data parameter if not the d data frame
## GPA
## - by levels of -
## Gender
##
## n miss mean sd min mdn max
## 0 151 0 3.325 0.375 2.000 3.300 4.000
## 1 102 0 3.124 0.418 2.000 3.150 4.000
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## 0 14 3.3 3.5
## 1 14 3
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.39 size of plotted points
## out_size: 0.74 size of plotted outlier points
## jitter_y: 3.14 random vertical movement of points
## jitter_x: 0.79 random horizontal movement of points
## bw: 0.15 set bandwidth higher for smoother edges
t.test(sleep$NumEarlyClass[sleep$ClassYear %in% c(1, 2)], sleep$NumEarlyClass[sleep$ClassYear %in% c(3, 4)], var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$NumEarlyClass[sleep$ClassYear %in% c(1, 2)] and sleep$NumEarlyClass[sleep$ClassYear %in% c(3, 4)]
## t = 4.1813, df = 250.69, p-value = 0.00004009
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.4042016 1.1240309
## sample estimates:
## mean of x mean of y
## 2.070423 1.306306
Plot(NumEarlyClass, by=ClassYear, data=sleep, fill="green")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(NumEarlyClass, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(NumEarlyClass, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ANOVA(NumEarlyClass ~ ClassYear) # Add the data parameter if not the d data frame
## Joint and Marginal Frequencies
## ------------------------------
##
## NumEarlyClass
## 0 1 2 3 4 5 Sum
## 1 8 7 11 8 7 6 47
## 2 31 4 26 20 3 11 95
## 3 21 1 26 3 0 3 54
## 4 25 2 25 4 1 0 57
## Sum 85 14 88 35 11 20 253
##
## Cramer's V: 0.269
##
## Chi-square Test of Independence:
## Chisq = 54.952, df = 15, p-value = 0.000
## >>> Low cell expected frequencies, chi-squared approximation may not be accurate
##
## Max Dupli-
## Level cations Values
## ------------------------------
## 1 11 2
## 2 31 0
## 3 26 2
## 4 25 0 2
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.29 size of plotted points
## out_size: 0.70 size of plotted outlier points
## jitter_y: 4.63 random vertical movement of points
## jitter_x: 0.73 random horizontal movement of points
## bw: 0.89 set bandwidth higher for smoother edges
t.test(sleep$CognitionZscore[sleep$LarkOwl == "Lark"], sleep$CognitionZscore[sleep$LarkOwl == "Owl"], var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$CognitionZscore[sleep$LarkOwl == "Lark"] and sleep$CognitionZscore[sleep$LarkOwl == "Owl"]
## t = 0.80571, df = 75.331, p-value = 0.4229
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1893561 0.4465786
## sample estimates:
## mean of x mean of y
## 0.09024390 -0.03836735
Plot(CognitionZscore, by=LarkOwl, data=sleep, fill="yellow")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(CognitionZscore, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(CognitionZscore, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ANOVA(CognitionZscore ~ LarkOwl) # Add the data parameter if not the d data frame
## CognitionZscore
## - by levels of -
## LarkOwl
##
## n miss mean sd min mdn max
## Lark 41 0 0.090 0.830 -1.620 0.120 1.890
## Neither 163 0 -0.011 0.692 -1.620 -0.040 1.960
## Owl 49 0 -0.038 0.653 -1.450 -0.010 1.220
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## Lark 2 0.91
## Neither 4 -0.05 -0.04
## Owl 2 -0.93 -0.04 0.52 0.79
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.31 size of plotted points
## out_size: 0.70 size of plotted outlier points
## jitter_y: 1.81 random vertical movement of points
## jitter_x: random horizontal movement of points
## bw: 0.20 set bandwidth higher for smoother edges
t.test(sleep$ClassesMissed[sleep$EarlyClass == 1], sleep$ClassesMissed[sleep$EarlyClass == 0], var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$ClassesMissed[sleep$EarlyClass == 1] and sleep$ClassesMissed[sleep$EarlyClass == 0]
## t = -1.4755, df = 152.78, p-value = 0.1421
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.5412830 0.2233558
## sample estimates:
## mean of x mean of y
## 1.988095 2.647059
Plot(ClassesMissed, by=EarlyClass, data=sleep, fill="red")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(ClassesMissed, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(ClassesMissed, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ttest(ClassesMissed ~ EarlyClass) # Add the data parameter if not the d data frame
## ClassesMissed
## - by levels of -
## EarlyClass
##
## n miss mean sd min mdn max
## 0 85 0 2.65 3.48 0.00 2.00 20.00
## 1 168 0 1.99 3.10 0.00 1.00 20.00
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## 0 28 0
## 1 66 0
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.20 size of plotted points
## out_size: 0.66 size of plotted outlier points
## jitter_y: 3.95 random vertical movement of points
## jitter_x: 0.81 random horizontal movement of points
## bw: 1.73 set bandwidth higher for smoother edges
t.test(sleep$Happiness[sleep$AlcoholUse == "Abstain"],
sleep$Happiness[sleep$AlcoholUse == "Heavy"],
var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$Happiness[sleep$AlcoholUse == "Abstain"] and sleep$Happiness[sleep$AlcoholUse == "Heavy"]
## t = 0.052077, df = 25.095, p-value = 0.9589
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -3.258919 3.428036
## sample estimates:
## mean of x mean of y
## 26.64706 26.56250
Plot(Happiness, by=AlcoholUse, data=sleep, fill="gray")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(Happiness, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(Happiness, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ANOVA(Happiness ~ AlcoholUse) # Add the data parameter if not the d data frame
## Happiness
## - by levels of -
## AlcoholUse
##
## n miss mean sd min mdn max
## Abstain 34 0 26.65 4.68 13.00 28.00 35.00
## Heavy 16 0 26.56 5.64 11.00 28.50 34.00
## Light 83 0 25.93 5.58 5.00 27.00 35.00
## Moderate 120 0 26.02 5.67 0.00 28.00 35.00
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## Abstain 7 28
## Heavy 4 29
## Light 10 29
## Moderate 18 30
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.36 size of plotted points
## out_size: 0.72 size of plotted outlier points
## jitter_y: 4.76 random vertical movement of points
## jitter_x: 0.76 random horizontal movement of points
## bw: 2.60 set bandwidth higher for smoother edges
t.test(sleep$PoorSleepQuality[sleep$AllNighter == 1], sleep$PoorSleepQuality[sleep$AllNighter == 0], var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$PoorSleepQuality[sleep$AllNighter == 1] and sleep$PoorSleepQuality[sleep$AllNighter == 0]
## t = 1.7068, df = 44.708, p-value = 0.09479
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1608449 1.9456958
## sample estimates:
## mean of x mean of y
## 7.029412 6.136986
Plot(PoorSleepQuality, by=AllNighter, data=sleep, fill="blue")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(PoorSleepQuality, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(PoorSleepQuality, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ttest(PoorSleepQuality ~ AllNighter) # Add the data parameter if not the d data frame
## PoorSleepQuality
## - by levels of -
## AllNighter
##
## n miss mean sd min mdn max
## 0 219 0 6.14 2.92 1.00 6.00 18.00
## 1 34 0 7.03 2.82 2.00 7.00 12.00
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## 0 37 4
## 1 5 5
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.27 size of plotted points
## out_size: 0.69 size of plotted outlier points
## jitter_y: 4.49 random vertical movement of points
## jitter_x: 0.85 random horizontal movement of points
## bw: 1.05 set bandwidth higher for smoother edges
t.test(sleep$StressScore[sleep$AlcoholUse == "Abstain"], sleep$StressScore[sleep$AlcoholUse == "Heavy"], var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$StressScore[sleep$AlcoholUse == "Abstain"] and sleep$StressScore[sleep$AlcoholUse == "Heavy"]
## t = -0.62604, df = 28.733, p-value = 0.5362
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -6.261170 3.327346
## sample estimates:
## mean of x mean of y
## 8.970588 10.437500
Plot(StressScore, by=AlcoholUse, data=sleep, fill="darkgreen")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(StressScore, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(StressScore, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ANOVA(StressScore ~ AlcoholUse) # Add the data parameter if not the d data frame
## StressScore
## - by levels of -
## AlcoholUse
##
## n miss mean sd min mdn max
## Abstain 34 0 8.97 7.58 0.00 7.00 28.00
## Heavy 16 0 10.44 7.80 0.00 10.00 27.00
## Light 83 0 9.48 7.80 0.00 8.00 37.00
## Moderate 120 0 9.47 8.26 0.00 7.00 37.00
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## Abstain 4 2 8
## Heavy 2 0 16
## Light 9 4
## Moderate 10 2
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.32 size of plotted points
## out_size: 0.71 size of plotted outlier points
## jitter_y: 1.66 random vertical movement of points
## jitter_x: random horizontal movement of points
## bw: 3.15 set bandwidth higher for smoother edges
t.test(sleep$Drinks ~ sleep$Gender, var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$Drinks by sleep$Gender
## t = -6.1601, df = 142.75, p-value = 0.000000007002
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
## -4.360009 -2.241601
## sample estimates:
## mean in group 0 mean in group 1
## 4.238411 7.539216
Plot(Drinks, by=Gender, data=sleep, fill="purple")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(Drinks, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(Drinks, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ttest(Drinks ~ Gender) # Add the data parameter if not the d data frame
## Drinks
## - by levels of -
## Gender
##
## n miss mean sd min mdn max
## 0 151 0 4.24 2.72 0.00 4.00 12.00
## 1 102 0 7.54 4.93 0.00 8.00 24.00
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## 0 25 3
## 1 20 10
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.32 size of plotted points
## out_size: 0.71 size of plotted outlier points
## jitter_y: 3.73 random vertical movement of points
## jitter_x: 0.79 random horizontal movement of points
## bw: 1.79 set bandwidth higher for smoother edges
t.test(sleep$WeekdayBed[sleep$EarlyClass == 1],
sleep$WeekdayBed[sleep$EarlyClass == 0],
var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$WeekdayBed[sleep$EarlyClass == 1] and sleep$WeekdayBed[sleep$EarlyClass == 0]
## t = -2.4271, df = 162.34, p-value = 0.01631
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.60840445 -0.06253673
## sample estimates:
## mean of x mean of y
## 24.73500 25.07047
Plot(WeekdayBed, by=EarlyClass, data=sleep, fill="cyan")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(WeekdayBed, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(WeekdayBed, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ttest(WeekdayBed ~ EarlyClass) # Add the data parameter if not the d data frame
## WeekdayBed
## - by levels of -
## EarlyClass
##
## n miss mean sd min mdn max
## 0 85 0 25.070 1.053 22.300 24.950 29.100
## 1 168 0 24.735 1.008 21.800 24.675 27.670
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## 0 4 24.2 24.3 24.7
## 1 12 24.4
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.33 size of plotted points
## out_size: 0.71 size of plotted outlier points
## jitter_y: 1.79 random vertical movement of points
## jitter_x: random horizontal movement of points
## bw: 0.56 set bandwidth higher for smoother edges
t.test(sleep$WeekendSleep[sleep$ClassYear %in% c(1, 2)], sleep$WeekendSleep[sleep$ClassYear %in% c(3, 4)], var.equal = FALSE)
##
## Welch Two Sample t-test
##
## data: sleep$WeekendSleep[sleep$ClassYear %in% c(1, 2)] and sleep$WeekendSleep[sleep$ClassYear %in% c(3, 4)]
## t = -0.047888, df = 237.36, p-value = 0.9618
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3497614 0.3331607
## sample estimates:
## mean of x mean of y
## 8.213592 8.221892
Plot(WeekendSleep, by=ClassYear, data=sleep, fill="orange")
## [Violin/Box/Scatterplot graphics from Deepayan Sarkar's lattice package]
##
## >>> Suggestions
## Plot(WeekendSleep, out_cut=2, fences=TRUE, vbs_mean=TRUE) # Label two outliers ...
## Plot(WeekendSleep, box_adj=TRUE) # Adjust boxplot whiskers for asymmetry
## ANOVA(WeekendSleep ~ ClassYear) # Add the data parameter if not the d data frame
## WeekendSleep
## - by levels of -
## ClassYear
##
## n miss mean sd min mdn max
## 1 47 0 8.226 1.459 5.000 8.250 10.750
## 2 95 0 8.208 1.339 4.000 8.250 11.000
## 3 54 0 8.160 1.458 4.380 8.315 11.500
## 4 57 0 8.281 1.276 5.750 8.250 12.750
##
##
## Max Dupli-
## Level cations Values
## ------------------------------
## 1 3 6.75 8.25 8.5
## 2 9 7.5
## 3 7 8.5
## 4 7 9
##
## Parameter values (can be manually set)
## -------------------------------------------------------
## size: 0.34 size of plotted points
## out_size: 0.72 size of plotted outlier points
## jitter_y: 1.53 random vertical movement of points
## jitter_x: random horizontal movement of points
## bw: 0.54 set bandwidth higher for smoother edges