Part 1
# libraries
library(ggplot2)
library(readr)
# read in data
snail_regression <- read.csv("snails_regression_for_R.csv")
print(snail_regression)
## DISTANCE ZONE EGGS ZONE_NUM
## 1 4.0 lowtide 9 1
## 2 4.0 lowtide 8 1
## 3 3.9 lowtide 12 1
## 4 3.9 lowtide 9 1
## 5 3.8 lowtide 11 1
## 6 3.8 lowtide 11 1
## 7 3.7 lowtide 10 1
## 8 3.6 lowtide 5 1
## 9 3.5 lowtide 7 1
## 10 3.5 lowtide 9 1
## 11 3.4 lowtide 10 1
## 12 3.4 lowtide 9 1
## 13 3.3 lowtide 6 1
## 14 3.2 lowtide 12 1
## 15 3.2 lowtide 11 1
## 16 3.1 lowtide 8 1
## 17 3.0 midtide 11 2
## 18 2.9 midtide 8 2
## 19 2.9 midtide 18 2
## 20 2.8 midtide 10 2
## 21 2.7 midtide 9 2
## 22 2.7 midtide 13 2
## 23 2.6 midtide 15 2
## 24 2.5 midtide 12 2
## 25 2.5 midtide 9 2
## 26 2.5 midtide 15 2
## 27 2.4 midtide 11 2
## 28 2.3 midtide 13 2
## 29 2.3 midtide 10 2
## 30 2.2 midtide 13 2
## 31 2.1 midtide 14 2
## 32 2.1 midtide 17 2
## 33 2.0 hightide 15 3
## 34 2.0 hightide 14 3
## 35 1.9 hightide 14 3
## 36 1.9 hightide 20 3
## 37 1.8 hightide 17 3
## 38 1.7 hightide 16 3
## 39 1.7 hightide 19 3
## 40 1.6 hightide 15 3
## 41 1.5 hightide 15 3
## 42 1.5 hightide 17 3
## 43 1.4 hightide 20 3
## 44 1.3 hightide 18 3
## 45 1.3 hightide 18 3
## 46 1.2 hightide 14 3
## 47 1.1 hightide 15 3
## 48 1.0 hightide 17 3
# create a scatterplot of the data
p1 <- ggplot(snail_regression, aes(x = DISTANCE, y = EGGS)) + geom_point(size = 3, shape = 21, fill = "lightblue") +
# theme
theme_classic() +
theme(plot.title = element_text(size = 16, face = "bold"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14)) +
# labels
labs(title = "Snail Scatter Plot",
x = "Distance from High Tide (m)",
y = "Number of Eggs Produced") +
# add linear model regression line
geom_smooth(method = lm, color = "black", fill = "red")
p1
## `geom_smooth()` using formula = 'y ~ x'

# summary of regression line
modl <- lm (EGGS~DISTANCE, data = snail_regression)
summary(modl)
##
## Call:
## lm(formula = EGGS ~ DISTANCE, data = snail_regression)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2442 -1.3498 -0.0587 1.2352 6.4465
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 21.1204 1.1016 19.172 < 2e-16 ***
## DISTANCE -3.2989 0.4076 -8.093 2.16e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.477 on 46 degrees of freedom
## Multiple R-squared: 0.5874, Adjusted R-squared: 0.5785
## F-statistic: 65.5 on 1 and 46 DF, p-value: 2.156e-10
Part 2
# libraries
library(ggplot2)
library(readr)
# load data
marsh_snail2 <- read.csv("marsh-snail-data-two.csv")
print(marsh_snail2)
## State Latitude Height
## 1 Florida 29.97636 5.843091
## 2 Florida 30.43704 7.417985
## 3 Florida 30.32216 5.416990
## 4 Florida 29.96138 9.533414
## 5 Florida 30.65199 5.922261
## 6 Florida 30.65167 7.034838
## 7 Florida 29.92675 7.496119
## 8 Florida 30.55372 5.324035
## 9 Florida 30.22841 3.730801
## 10 Florida 30.30138 9.441726
## 11 Georgia 30.71591 10.440276
## 12 Georgia 30.92205 4.048434
## 13 Georgia 31.78489 10.421373
## 14 Georgia 31.86330 11.223543
## 15 Georgia 31.38938 6.888101
## 16 Georgia 31.54030 2.113660
## 17 Georgia 31.83065 8.238671
## 18 Georgia 31.08275 5.403010
## 19 Georgia 31.59380 8.650702
## 20 Georgia 30.90311 7.010132
## 21 SouthCarolina 33.43508 7.568949
## 22 SouthCarolina 33.64718 5.503450
## 23 SouthCarolina 33.17228 6.713329
## 24 SouthCarolina 32.51065 6.791677
## 25 SouthCarolina 33.59657 8.415815
## 26 SouthCarolina 32.83199 7.438263
## 27 SouthCarolina 32.74251 4.204901
## 28 SouthCarolina 32.87554 6.562090
## 29 SouthCarolina 32.43516 11.647446
## 30 SouthCarolina 32.15816 8.830496
## 31 NorthCarolina 36.49926 9.902741
## 32 NorthCarolina 34.92810 14.266457
## 33 NorthCarolina 34.88198 8.966930
## 34 NorthCarolina 35.37086 13.092618
## 35 NorthCarolina 35.10911 8.205240
## 36 NorthCarolina 34.38543 5.702840
## 37 NorthCarolina 35.00921 9.562529
## 38 NorthCarolina 34.10425 11.247638
## 39 NorthCarolina 34.16465 11.709933
## 40 NorthCarolina 35.04468 13.584341
# create scatter plot with LM
p2 <- ggplot(marsh_snail2, aes(x = Latitude, y = Height)) + geom_point(size = 3, shape = 21, fill = "lightblue") +
# labels
labs(title = "Body Size of Marsh Periwinkles by Location",
x = "U.S. State (Latitude)",
y = "Body Height") +
# theme
theme_classic() +
theme(plot.title = element_text(size = 16, face = "bold"),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14)) +
# linear model
geom_smooth(method = lm, color = "black", fill = "red")
p2
## `geom_smooth()` using formula = 'y ~ x'

#summary of linear model
modl2 <- lm (Height~Latitude, data = marsh_snail2)
summary(modl2)
##
## Call:
## lm(formula = Height ~ Latitude, data = marsh_snail2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5.3039 -1.4490 -0.4249 2.0594 4.3711
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -15.6503 6.9834 -2.241 0.03094 *
## Latitude 0.7314 0.2153 3.397 0.00161 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.47 on 38 degrees of freedom
## Multiple R-squared: 0.233, Adjusted R-squared: 0.2128
## F-statistic: 11.54 on 1 and 38 DF, p-value: 0.001608