knitr::opts_chunk$set(echo = TRUE)
#install.packages("dplyr")
#install.packages("ggplot2")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Load all three data tables
invasiveData<-read.csv("snail_invasive_data.csv")
snailRegression<-read.csv("snails regression for R.csv")
marshSnails<-read.csv("Marsh_Snail_Data_5.csv")
ggplot(snailRegression,aes(x=DISTANCE,y=EGGS))+
geom_point()+
xlab("Distance from High Tide (m)")+
ylab("Number of Eggs")+
theme_classic()
# A): Null hypothesis: the distance from the high-tide mark has no effect on the number of eggs in a capsule.
modl <- lm(EGGS~DISTANCE, data=snailRegression)
summary(modl)
##
## Call:
## lm(formula = EGGS ~ DISTANCE, data = snailRegression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2442 -1.3498 -0.0587 1.2352 6.4465
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.1204 1.1016 19.172 < 2e-16 ***
## DISTANCE -3.2989 0.4076 -8.093 2.16e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.477 on 46 degrees of freedom
## Multiple R-squared: 0.5874, Adjusted R-squared: 0.5785
## F-statistic: 65.5 on 1 and 46 DF, p-value: 2.156e-10
# B):
## EQUATION: y = -3.299x + 21.12
## R^2 VALUE: 0.579
## F-STATISTIC: 65.5 on 1 and 4 degrees of freedom
## p-value: 2.156 x 10^(-10), which is a pretty small number
#C): We shuld reject the null hypothesis.
# IT'S TIME TO MAKE A PLOT.
ggplot(snailRegression, aes(x=DISTANCE,y=EGGS))+
geom_point()+
ylab("Number of Eggs")+
xlab("Distance from high-tide mark (m)")+
theme_classic()+
geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
#5:
## In the document.
#PART II
#The current thought is that body size has a correlation with latitude. (All of the latitudes given are above the equator, so higher value = closer to the North Pole)
marshModl <- lm(Height~Latitude, data=marshSnails)
summary(marshModl)
##
## Call:
## lm(formula = Height ~ Latitude, data = marshSnails)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3020 -1.5387 -0.2064 1.6549 6.6886
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -9.6213 4.3186 -2.228 0.028770 *
## Latitude 0.5164 0.1326 3.894 0.000207 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.298 on 78 degrees of freedom
## Multiple R-squared: 0.1628, Adjusted R-squared: 0.152
## F-statistic: 15.16 on 1 and 78 DF, p-value: 0.0002068
#The graph.
ggplot(marshSnails, aes(y=Height,x=Latitude))+
geom_point()+
ylab("Snail height (mm)")+
xlab("Latitude")+
theme_classic()
#The same graph with a LM line
ggplot(marshSnails, aes(y=Height,x=Latitude))+
geom_point()+
ylab("Snail height (mm)")+
xlab("Latitude")+
theme_classic()+
geom_smooth(method="lm")
## `geom_smooth()` using formula = 'y ~ x'
##
#PART III
#I already read in the invasive snail data during the first chunk.
ggplot(invasiveData,aes(x=DISTANCE,y=EGGS,color=SNAIL))+
geom_point()+
geom_smooth(method="lm")+
ylab("Number of Eggs")+
xlab("Distance from High Tide (m)")+
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
invasiveModl <- lm(EGGS~DISTANCE*SNAIL, data=invasiveData)
summary(invasiveModl)
##
## Call:
## lm(formula = EGGS ~ DISTANCE * SNAIL, data = invasiveData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.6400 -1.8837 -0.2113 1.9823 6.4465
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 30.6239 1.3324 22.983 < 2e-16 ***
## DISTANCE -2.9866 0.4930 -6.058 2.98e-08 ***
## SNAILNative -9.5035 1.8843 -5.043 2.29e-06 ***
## DISTANCE:SNAILNative -0.3123 0.6972 -0.448 0.655
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.996 on 92 degrees of freedom
## Multiple R-squared: 0.7988, Adjusted R-squared: 0.7922
## F-statistic: 121.7 on 3 and 92 DF, p-value: < 2.2e-16
anova(invasiveModl)
## Analysis of Variance Table
##
## Response: EGGS
## Df Sum Sq Mean Sq F value Pr(>F)
## DISTANCE 1 729.67 729.67 81.2683 2.7e-14 ***
## SNAIL 1 2547.11 2547.11 283.6874 < 2e-16 ***
## DISTANCE:SNAIL 1 1.80 1.80 0.2007 0.6552
## Residuals 92 826.03 8.98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##DISTANCE:SNAIL has a high Pr(>F), indicating that there's not a statistically-significant difference between the snails' patterns of reproduction. This makes sense; in the graph above, the lines look extremely similar.
#PART IV
#Latitude increases mean that body size increases.
# The null hypotheses are: 1.) latitude has no effect on height, and 2.) species has no impact on the regression line of height-latitude.
marshModl2 <-lm(Height~Latitude*Snail, data=marshSnails)
summary(marshModl2)
##
## Call:
## lm(formula = Height ~ Latitude * Snail, data = marshSnails)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.4725 -1.1743 -0.2343 1.3067 4.6076
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -27.4977 5.0937 -5.398 7.39e-07 ***
## Latitude 1.0866 0.1564 6.947 1.10e-09 ***
## SnailMelampus 35.7529 7.2036 4.963 4.14e-06 ***
## Latitude:SnailMelampus -1.1405 0.2212 -5.156 1.94e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.916 on 76 degrees of freedom
## Multiple R-squared: 0.4326, Adjusted R-squared: 0.4102
## F-statistic: 19.31 on 3 and 76 DF, p-value: 2.093e-09
anova(marshModl2)
## Analysis of Variance Table
##
## Response: Height
## Df Sum Sq Mean Sq F value Pr(>F)
## Latitude 1 80.065 80.065 21.7986 1.278e-05 ***
## Snail 1 35.074 35.074 9.5492 0.002795 **
## Latitude:Snail 1 97.650 97.650 26.5864 1.941e-06 ***
## Residuals 76 279.143 3.673
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#It looks like we can reject both of the null hypotheses...? Well, maybe the latitude has no effect on the size of Melampus. We'll see.
ggplot(marshSnails,aes(x=Latitude,y=Height,color=Snail))+
geom_point()+
geom_smooth(method="lm")+
ylab("Snail Height (mm)")+
xlab("Latitude")+
theme_classic()
## `geom_smooth()` using formula = 'y ~ x'