gripclean <- read.csv("gripclean.csv")
str(gripclean)
## 'data.frame': 62 obs. of 18 variables:
## $ name : chr "Student46" "Student47" "Student48" "Student49" ...
## $ semester : chr "2022_Spring" "2022_Spring" "2022_Spring" "2022_spring" ...
## $ dom.hand : chr "Right" "Left" "Right" "Right" ...
## $ height.in : num 65 65 61.5 62 64 66 70 65 66 65 ...
## $ arm.circumference.in : num 9.3 9.4 11.8 12.5 9.1 ...
## $ dom.max.grip.lbs : num 79.6 52.5 61 42.8 67 73 75 43.8 61 82.8 ...
## $ non.dom.max.grip.lbs : num 43.2 57.8 57 47 53 56 65 32.4 67 59.6 ...
## $ dom.fatigue.secs : num 12.3 21.3 6.5 2.5 22.3 ...
## $ non.dom.fatigue.secs : chr "7.9" "16.25" "2.5" "" ...
## $ athlete.status : chr "No" "Yes" "No" "No" ...
## $ Avg.sleep.per.night.hrs : int 7 7 NA 6 7 7 7 7 7 7 ...
## $ age.yrs : int 20 20 19 19 19 19 19 19 22 20 ...
## $ birth.month : chr "July" "February" "April" "June" ...
## $ step.length.heeltoheel.in : num 27.3 24.7 22.4 24.1 26.3 24.8 33.3 22.1 26.5 29.8 ...
## $ step.length.heeltoheel.backward.in: num 19.3 14.6 18.4 15.3 24.7 23.2 26.2 16 20.2 24.5 ...
## $ drinkscaffiene : chr "Yes" "Yes" "Yes" "No" ...
## $ head.circumference.in : num 21 22.9 23.5 23.2 23.5 ...
## $ horoscopesign : chr "Cancer" "Aquarius" "Aries" "Gemini" ...
gripclean2 <- na.omit(gripclean)
str(gripclean2)
## 'data.frame': 14 obs. of 18 variables:
## $ name : chr "Student46" "Student47" "Student49" "Student50" ...
## $ semester : chr "2022_Spring" "2022_Spring" "2022_spring" "2022_Spring" ...
## $ dom.hand : chr "Right" "Left" "Right" "Right" ...
## $ height.in : num 65 65 62 64 66 70 65 65 64 69 ...
## $ arm.circumference.in : num 9.3 9.4 12.5 9.1 9.25 9.75 10.5 13.8 13.6 9.2 ...
## $ dom.max.grip.lbs : num 79.6 52.5 42.8 67 73 75 43.8 82.8 67.4 54.2 ...
## $ non.dom.max.grip.lbs : num 43.2 57.8 47 53 56 65 32.4 59.6 48.8 66 ...
## $ dom.fatigue.secs : num 12.3 21.3 2.5 22.3 44.7 ...
## $ non.dom.fatigue.secs : chr "7.9" "16.25" "" "42.55" ...
## $ athlete.status : chr "No" "Yes" "No" "No" ...
## $ Avg.sleep.per.night.hrs : int 7 7 6 7 7 7 7 7 8 8 ...
## $ age.yrs : int 20 20 19 19 19 19 19 20 20 20 ...
## $ birth.month : chr "July" "February" "June" "June" ...
## $ step.length.heeltoheel.in : num 27.3 24.7 24.1 26.3 24.8 33.3 22.1 29.8 25.7 28.8 ...
## $ step.length.heeltoheel.backward.in: num 19.3 14.6 15.3 24.7 23.2 26.2 16 24.5 20.6 23.3 ...
## $ drinkscaffiene : chr "Yes" "Yes" "No" "Yes" ...
## $ head.circumference.in : num 21 22.9 23.2 23.5 22.5 ...
## $ horoscopesign : chr "Cancer" "Aquarius" "Gemini" "Gemini" ...
## - attr(*, "na.action")= 'omit' Named int [1:48] 3 9 17 18 19 20 21 22 23 24 ...
## ..- attr(*, "names")= chr [1:48] "3" "9" "17" "18" ...
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
grip_complete <- filter(gripclean, !is.na(Avg.sleep.per.night.hrs))
str(grip_complete)
## 'data.frame': 61 obs. of 18 variables:
## $ name : chr "Student46" "Student47" "Student49" "Student50" ...
## $ semester : chr "2022_Spring" "2022_Spring" "2022_spring" "2022_Spring" ...
## $ dom.hand : chr "Right" "Left" "Right" "Right" ...
## $ height.in : num 65 65 62 64 66 70 65 66 65 64 ...
## $ arm.circumference.in : num 9.3 9.4 12.5 9.1 9.25 9.75 10.5 NA 13.8 13.6 ...
## $ dom.max.grip.lbs : num 79.6 52.5 42.8 67 73 75 43.8 61 82.8 67.4 ...
## $ non.dom.max.grip.lbs : num 43.2 57.8 47 53 56 65 32.4 67 59.6 48.8 ...
## $ dom.fatigue.secs : num 12.3 21.3 2.5 22.3 44.7 ...
## $ non.dom.fatigue.secs : chr "7.9" "16.25" "" "42.55" ...
## $ athlete.status : chr "No" "Yes" "No" "No" ...
## $ Avg.sleep.per.night.hrs : int 7 7 6 7 7 7 7 7 7 8 ...
## $ age.yrs : int 20 20 19 19 19 19 19 22 20 20 ...
## $ birth.month : chr "July" "February" "June" "June" ...
## $ step.length.heeltoheel.in : num 27.3 24.7 24.1 26.3 24.8 33.3 22.1 26.5 29.8 25.7 ...
## $ step.length.heeltoheel.backward.in: num 19.3 14.6 15.3 24.7 23.2 26.2 16 20.2 24.5 20.6 ...
## $ drinkscaffiene : chr "Yes" "Yes" "No" "Yes" ...
## $ head.circumference.in : num 21 22.9 23.2 23.5 22.5 ...
## $ horoscopesign : chr "Cancer" "Aquarius" "Gemini" "Gemini" ...
grip_complete$Avg.sleep.per.night.hrs <- as.factor(grip_complete$Avg.sleep.per.night.hrs)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
summarySE(data=grip_complete,
measurevar = "dom.fatigue.secs",
groupvars = "Avg.sleep.per.night.hrs")
## Avg.sleep.per.night.hrs N dom.fatigue.secs sd se ci
## 1 5 2 24.28500 17.98173 12.715000 161.559393
## 2 6 16 16.06000 13.20704 3.301759 7.037533
## 3 7 24 13.92833 10.57706 2.159034 4.466301
## 4 8 19 15.48316 16.89077 3.875008 8.141090
boxgrip <- ggplot(data = grip_complete,
aes(x=Avg.sleep.per.night.hrs,
y=dom.fatigue.secs)) # first layer
boxgrip +
geom_boxplot(fill=c(rainbow(4))) # add boxplot, add some color
boxgrip +
geom_boxplot(fill=c(rainbow(4))) +
geom_jitter(shape=16, position=position_jitter(0.2)) + # add some points
xlab("hours of sleep") +
ylab("fatigue of dominant hand (seconds)")+
ggtitle("does sleep time affect fatigue")
sleep.lm <- lm(dom.fatigue.secs ~
Avg.sleep.per.night.hrs, data = grip_complete)
anova(sleep.lm)
## Analysis of Variance Table
##
## Response: dom.fatigue.secs
## Df Sum Sq Mean Sq F value Pr(>F)
## Avg.sleep.per.night.hrs 3 216.5 72.162 0.3863 0.7633
## Residuals 57 10648.2 186.811
kruskal.test(dom.fatigue.secs ~
Avg.sleep.per.night.hrs, data = grip_complete)
##
## Kruskal-Wallis rank sum test
##
## data: dom.fatigue.secs by Avg.sleep.per.night.hrs
## Kruskal-Wallis chi-squared = 1.5004, df = 3, p-value = 0.6822
elliot <- read.csv("elliotplant2.csv")
str(elliot)
## 'data.frame': 18 obs. of 7 variables:
## $ Plant : int 1 2 4 6 7 10 11 12 13 14 ...
## $ Sun : chr "High" "High" "High" "High" ...
## $ Water : chr "High" "High" "High" "Low" ...
## $ Height.in: num 8.7 8.2 7.7 5.5 3.5 4.1 4.5 3.9 4.1 4.8 ...
## $ Weight.g : num 6.2 6.4 5 5 3.5 3.3 4.5 6.9 6.2 4.8 ...
## $ Plant2 : num 4.5 4.5 4.7 4.5 3.5 4.1 3 3.9 3.5 4 ...
## $ Plant3 : num 4.1 4.5 4.1 4.2 4.7 3.5 4 4.2 4.1 3 ...
library(Rmisc)
elliotSUM1 <- summarySE(data=elliot,
measurevar = "Height.in",
groupvars = c("Sun", "Water")) # notice there are two nominal variables
elliotSUM1
## Sun Water N Height.in sd se ci
## 1 High High 3 8.200000 0.5000000 0.2886751 1.2420689
## 2 High Low 3 4.366667 1.0263203 0.5925463 2.5495209
## 3 Low High 5 4.560000 0.6308724 0.2821347 0.7833316
## 4 Low Low 7 4.357143 0.4685337 0.1770891 0.4333214
sh <- ggplot(data=elliotSUM1,
aes(y=Height.in, x=Water, group=Sun)) +
geom_line(aes(linetype=Sun, color=Sun)) +
geom_point(aes(color=Sun))
sh + geom_errorbar(aes(ymax=Height.in+sd, ymin=Height.in-sd,
color=Sun), width=0.1, size=0.5) +
scale_x_discrete(limits=c("Low", "High")) +
ylab("Plant Height (in)")
elliot.lm <- lm(elliot$Height.in~elliot$Sun*elliot$Water)
anova(elliot.lm)
## Analysis of Variance Table
##
## Response: elliot$Height.in
## Df Sum Sq Mean Sq F value Pr(>F)
## elliot$Sun 1 13.5669 13.5669 34.435 4.088e-05 ***
## elliot$Water 1 9.1057 9.1057 23.112 0.0002786 ***
## elliot$Sun:elliot$Water 1 13.0560 13.0560 33.138 4.967e-05 ***
## Residuals 14 5.5158 0.3940
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
In simpler terms, sun is good for some growth. Water is good for some growth. Sun+Water is really really good for growth.
TukeyHSD(aov(elliot$Height.in~elliot$Sun*elliot$Water))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = elliot$Height.in ~ elliot$Sun * elliot$Water)
##
## $`elliot$Sun`
## diff lwr upr p adj
## Low-High -1.841667 -2.51479 -1.168543 4.09e-05
##
## $`elliot$Water`
## diff lwr upr p adj
## Low-High -1.426875 -2.065456 -0.788294 0.0002866
##
## $`elliot$Sun:elliot$Water`
## diff lwr upr p adj
## Low:High-High:High -3.64000000 -4.972356 -2.3076439 0.0000080
## High:Low-High:High -3.83333333 -5.322953 -2.3437140 0.0000158
## Low:Low-High:High -3.84285714 -5.101815 -2.5838990 0.0000022
## High:Low-Low:High -0.19333333 -1.525689 1.1390227 0.9738155
## Low:Low-Low:High -0.20285714 -1.271119 0.8654043 0.9444848
## Low:Low-High:Low -0.00952381 -1.268482 1.2494343 0.9999960
Great!
ggplot(elliot, aes(y=Height.in, x=Water, fill=Sun)) +
geom_boxplot() +
geom_point(pch = 21,
position = position_jitterdodge(0.2)) # note that this is jitterdodge, used for multiple variables
Notice that in some samples - High water, high sun - there are only 3 datapoints and that some have a bit more. Seems like unequal sample size, huh! Well, we need a test for UNBALANCED designs. This is actually very common.
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
my_anova <- lm(elliot$Height.in ~ elliot$Sun * elliot$Water)
Anova(my_anova, type = "III")
## Anova Table (Type III tests)
##
## Response: elliot$Height.in
## Sum Sq Df F value Pr(>F)
## (Intercept) 201.720 1 511.997 2.006e-12 ***
## elliot$Sun 24.843 1 63.056 1.493e-06 ***
## elliot$Water 22.042 1 55.945 2.969e-06 ***
## elliot$Sun:elliot$Water 13.056 1 33.138 4.967e-05 ***
## Residuals 5.516 14
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
What if we only wanted to visualize Sun, or water.
ggplot(elliot, aes(y=Height.in, x=Water)) +
geom_boxplot() +
geom_point(pch = 21,
position = position_jitter(0.2))
ggplot(elliot, aes(y=Height.in, x=Sun)) +
geom_boxplot() +
geom_point(pch = 21,
position = position_jitter(0.2))
By looking at these, you might suspect that high sun or high water have an effect on plant growth. Looking at the interaction though, you can see that there are additive effects of high sun AND high water.