### Loading the data
DATA <- read.csv("~/Downloads/barnacle.csv")
# check that data is loaded
names(DATA)
## [1] "Trial" "Time" "Feeding" "Site"
head(DATA)
## Trial Time Feeding Site
## 1 T1 Two 0.077 Polluted
## 2 T1 Four 0.058 Polluted
## 3 T1 Six 0.115 Polluted
## 4 T1 Eight 0.212 Polluted
## 5 T1 Ten 0.250 Polluted
## 6 T1 Twelve 0.288 Polluted
dim(DATA)
## [1] 60 4
# preliminary boxplot
boxplot(Feeding~Site, data=DATA, xlab = "Site", ylab = "Proportion Feeding")
# running the test
fit<-lm(Feeding~Site, data=DATA)
In order to run the t-test we must test that determine that data is
approximately normal. To do this we look at the following graph and
test.
lattice::densityplot(~residuals(fit), group=Site, data=DATA, auto.key=TRUE)
# run test for polluted site
with(DATA, shapiro.test(Feeding[Site == "Polluted"]))
##
## Shapiro-Wilk normality test
##
## data: Feeding[Site == "Polluted"]
## W = 0.97877, p-value = 0.7919
# run the test for unpolluted site
with(DATA, shapiro.test(Feeding[Site == "Unpolluted"]))
##
## Shapiro-Wilk normality test
##
## data: Feeding[Site == "Unpolluted"]
## W = 0.94772, p-value = 0.1468
After looking at each of these three graphs, it can be determied that
the data is approximately normal and does not need to be
transformed.
# running the t-test
t.test(Feeding~Site, data=DATA, var.equal=TRUE)
##
## Two Sample t-test
##
## data: Feeding by Site
## t = -2.1686, df = 58, p-value = 0.03423
## alternative hypothesis: true difference in means between group Polluted and group Unpolluted is not equal to 0
## 95 percent confidence interval:
## -0.099101788 -0.003964879
## sample estimates:
## mean in group Polluted mean in group Unpolluted
## 0.2177000 0.2692333
# loading psych
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
# summary response data
describeBy(DATA$Feeding, group=DATA$Site)
##
## Descriptive statistics by group
## group: Polluted
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 30 0.22 0.1 0.23 0.22 0.09 0 0.44 0.44 -0.14 -0.23 0.02
## ------------------------------------------------------------
## group: Unpolluted
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 30 0.27 0.09 0.27 0.27 0.11 0.13 0.44 0.31 0.3 -1.11 0.02
# loading dplyr and ggplot2
library(dplyr)
library(ggplot2)
# creating and saving means and se as a list
means <- DATA %>%
group_by(Site) %>%
summarise(mean = mean(Feeding),
se = sd(Feeding)/sqrt(length(Feeding)))
# sets treatment groups
means$Feeding <- factor(means$Site, levels = c("Unpolluted","Polluted"))
ggplot(means, aes(x=Site, y=mean)) +
geom_bar(stat="identity", color="black") +
geom_errorbar(aes(ymin=mean, ymax=mean+se), width=0.2) +
labs(x="Site", y = "Proportion Feeding") +
theme_classic()
DATA <- read.csv("~/Downloads/finaldry.csv")
# check that data is loaded
names(DATA)
## [1] "Trial" "Time" "Feeding" "Site"
head(DATA)
## Trial Time Feeding Site
## 1 T3 Four 0.178 Polluted
## 2 T3 Six 0.289 Polluted
## 3 T3 Eight 0.422 Polluted
## 4 T3 Ten 0.400 Polluted
## 5 T3 Twelve 0.467 Polluted
## 6 T3 Fourteen 0.556 Polluted
dim(DATA)
## [1] 59 4
# preliminary boxplot
boxplot(Feeding~Site, data=DATA, xlab = "Site", ylab = "Proportion Feeding")
# running the test
fit<-lm(Feeding~Site, data=DATA)
In order to run the t-test we must test that determine that data is
approximately normal. To do this we look at the following graph and
test.
lattice::densityplot(~residuals(fit), group=Site, data=DATA, auto.key=TRUE)
# run test for polluted site
with(DATA, shapiro.test(Feeding[Site == "Polluted"]))
##
## Shapiro-Wilk normality test
##
## data: Feeding[Site == "Polluted"]
## W = 0.98273, p-value = 0.9014
# run the test for unpolluted site
with(DATA, shapiro.test(Feeding[Site == "Unpolluted"]))
##
## Shapiro-Wilk normality test
##
## data: Feeding[Site == "Unpolluted"]
## W = 0.96547, p-value = 0.4238
After looking at each of these three graphs, it can be determied that
the data is normal and does not need to be transformed.
# running the t-test
t.test(Feeding~Site, data=DATA, var.equal=TRUE)
##
## Two Sample t-test
##
## data: Feeding by Site
## t = 0.7713, df = 57, p-value = 0.4437
## alternative hypothesis: true difference in means between group Polluted and group Unpolluted is not equal to 0
## 95 percent confidence interval:
## -0.04833411 0.10889503
## sample estimates:
## mean in group Polluted mean in group Unpolluted
## 0.4404138 0.4101333
# loading psych
library(psych)
# summary response data
describeBy(DATA$Feeding, group=DATA$Site)
##
## Descriptive statistics by group
## group: Polluted
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 29 0.44 0.15 0.44 0.44 0.13 0.14 0.76 0.62 -0.01 -0.5 0.03
## ------------------------------------------------------------
## group: Unpolluted
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 30 0.41 0.15 0.42 0.42 0.16 0.05 0.64 0.59 -0.44 -0.38 0.03
# loading dplyr and ggplot2
library(dplyr)
library(ggplot2)
# creating and saving means and se as a list
means <- DATA %>%
group_by(Site) %>%
summarise(mean = mean(Feeding),
se = sd(Feeding)/sqrt(length(Feeding)))
# sets treatment groups
means$Feeding <- factor(means$Site, levels = c("Unpolluted","Polluted"))
ggplot(means, aes(x=Site, y=mean)) +
geom_bar(stat="identity", color="black") +
geom_errorbar(aes(ymin=mean, ymax=mean+se), width=0.2) +
labs(x="Site", y = "Proportion Feeding") +
theme_classic()
### Loading the data
DATA <- read.csv("~/Downloads/normdry11.csv")
# check that data is loaded
names(DATA)
## [1] "Trial" "Time" "Feeding" "Site"
head(DATA)
## Trial Time Feeding Site
## 1 normal Two 0.077 Polluted
## 2 normal Four 0.058 Polluted
## 3 normal Six 0.115 Polluted
## 4 normal Eight 0.212 Polluted
## 5 normal Ten 0.250 Polluted
## 6 normal Twelve 0.288 Polluted
dim(DATA)
## [1] 119 4
# preliminary boxplot
boxplot(Feeding~Trial, data=DATA, xlab = "Trial type", ylab = "Proportion Feeding")
# running the test
fit<-lm(Feeding~Trial, data=DATA)
In order to run the t-test we must test that determine that data is
approximately normal. To do this we look at the following graph and
test.
lattice::densityplot(~residuals(fit), group=Trial, data=DATA, auto.key=TRUE)
# run test for polluted site
with(DATA, shapiro.test(Feeding[Trial == "normal"]))
##
## Shapiro-Wilk normality test
##
## data: Feeding[Trial == "normal"]
## W = 0.98681, p-value = 0.7633
# run the test for unpolluted site
with(DATA, shapiro.test(Feeding[Trial == "dry"]))
##
## Shapiro-Wilk normality test
##
## data: Feeding[Trial == "dry"]
## W = 0.9898, p-value = 0.903
After looking at each of these three graphs, it can be determined that
the data is not normal and needs to be transformed.
# running the t-test
t.test(Feeding~Trial, data=DATA, var.equal=TRUE)
##
## Two Sample t-test
##
## data: Feeding by Trial
## t = 7.8958, df = 117, p-value = 1.73e-12
## alternative hypothesis: true difference in means between group dry and group normal is not equal to 0
## 95 percent confidence interval:
## 0.1360134 0.2270872
## sample estimates:
## mean in group dry mean in group normal
## 0.4250169 0.2434667
# loading psych
library(psych)
# summary response data
describeBy(DATA$Feeding, group=DATA$Trial)
##
## Descriptive statistics by group
## group: dry
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 59 0.43 0.15 0.42 0.43 0.15 0.05 0.76 0.7 -0.21 -0.26 0.02
## ------------------------------------------------------------
## group: normal
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 60 0.24 0.09 0.23 0.24 0.09 0 0.44 0.44 -0.01 -0.25 0.01
# loading dplyr and ggplot2
library(dplyr)
library(ggplot2)
# creating and saving means and se as a list
means <- DATA %>%
group_by(Trial) %>%
summarise(mean = mean(Feeding),
se = sd(Feeding)/sqrt(length(Feeding)))
# sets treatment groups
means$Feeding <- factor(means$Trial, levels = c("normal","dry"))
ggplot(means, aes(x=Trial, y=mean)) +
geom_bar(stat="identity", color="black") +
geom_errorbar(aes(ymin=mean, ymax=mean+se), width=0.2) +
labs(x="Trial", y = "Proportion Feeding") +
theme_classic()