library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ───────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Maine_temp = read.csv("Maine_Temp.csv")
head(Maine_temp)
ggplot(data = Maine_temp) + geom_point(aes(x = LONGITUDE, y = LATITUDE,
color = TMIN),size = 3) +
scale_color_gradient(low = "#1300C2", high = "#EC1C1C") +
labs(title = "Low Temperatures in Maine on January 1st, 2020",
x = "Longitude",
y = "Latitude")
ggplot(data = Maine_temp) +
geom_point(aes(x = LONGITUDE, y = LATITUDE, color = TMIN, size=ELEVATION)) +
scale_color_gradient(low = "#1300C2", high = "#EC1C1C") +
labs(title = "Low Temperatures in Maine on January 1st, 2020",
x = "Longitude",
y = "Latitude")
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data = Maine_temp) + geom_point(aes(x = LATITUDE,y = TMIN),size=3) +
labs(title = "Latitude and Minimum Temperature for Maine on January 1st, 2020", x = "Latitude", y = "Minimum Temperature (Degrees Fahrenheit)")
## Warning: Removed 79 rows containing missing values (geom_point).
ggplot(data = Maine_temp) + geom_point(aes(x = LONGITUDE,y = TMIN),size=3) +
labs(title = "Longitude and Minimum Temperature for Maine on January 1st, 2020", x = "Longitude", y = "Minimum Temperature (Degrees Fahrenheit)")
## Warning: Removed 79 rows containing missing values (geom_point).
ggplot(data = Maine_temp) + geom_point(aes(x = ELEVATION,y = TMIN,size=3)) +
labs(title = "Elevation and Minimum Temperature for Maine on January 1st, 2020", x = "Elevation (feet above sea level)", y = "Minimum Temperature (Degrees Fahrenheit)")
## Warning: Removed 79 rows containing missing values (geom_point).
Bangor_temp = read.csv("Bangor_Temp.csv")
head(Bangor_temp)
ggplot(data = Bangor_temp) + geom_qq(aes(sample = TMIN),size = 4) +
geom_qq_line(aes(sample = TMIN),size = 2) +
labs(title = "Normal QQ Plot for Low Temperatures at Bangor Airport",
subtitle = "January 1st, 2000-2020",
x = "Theoretical Quantiles",
y = "Sample Quantiles")
mean(Bangor_temp$TMIN)
## [1] 12.14286
sd(Bangor_temp$TMIN)
## [1] 13.14643
hist(Bangor_temp$TMIN) # left-skewed
boxplot(Bangor_temp$TMIN)
# Remove entries with missing data
Maine_temp = Maine_temp[is.na(Maine_temp$TMIN) == FALSE,]
# Fit a linear model
model <- lm(TMIN ~ LATITUDE + ELEVATION, data=Maine_temp)
# Add fitted values and fitted residuals to the sat data frame
Maine_temp['fitted_values'] = model$fitted.values
Maine_temp['fitted_residuals'] = model$residuals
min(Maine_temp$TMIN)
## [1] 12
max(Maine_temp$TMIN)
## [1] 32
# Make a diagnostic plot
ggplot(data = Maine_temp) + geom_point(aes(x = fitted_values, y = fitted_residuals)) +
geom_hline(yintercept = 0) +
labs(title = "Plot of Fitted Residuals against Fitted Values",
x = "Fitted Values", y = "Fitted Residuals") # can see a bit of a trough but nothing crazy
shapiro.test(Maine_temp$TMIN) # normal
##
## Shapiro-Wilk normality test
##
## data: Maine_temp$TMIN
## W = 0.96955, p-value = 0.1525
# Print out a summary of the model
summary(model)
##
## Call:
## lm(formula = TMIN ~ LATITUDE + ELEVATION, data = Maine_temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.1735 -1.2853 0.4133 1.3201 4.9524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 92.783984 16.671779 5.565 8.01e-07 ***
## LATITUDE -1.502828 0.375776 -3.999 0.000191 ***
## ELEVATION -0.010926 0.003284 -3.327 0.001570 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.581 on 55 degrees of freedom
## Multiple R-squared: 0.471, Adjusted R-squared: 0.4518
## F-statistic: 24.48 on 2 and 55 DF, p-value: 2.485e-08
# as you got up in latitude, tmin decreases, as you go up in elevation tmin decreases but less consequential
#Print confidence intervals
confint(model,level = 0.8)
## 10 % 90 %
## (Intercept) 71.15844809 114.409519577
## LATITUDE -1.99025909 -1.015395944
## ELEVATION -0.01518546 -0.006665677
# Fit a linear model
model <- lm(TMIN ~ LATITUDE + I(log(ELEVATION)), data=Maine_temp)
summary(model)
##
## Call:
## lm(formula = TMIN ~ LATITUDE + I(log(ELEVATION)), data = Maine_temp)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6137 -1.3048 0.4358 1.6841 4.0126
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 82.5471 15.3275 5.386 1.55e-06 ***
## LATITUDE -1.1402 0.3578 -3.187 0.00237 **
## I(log(ELEVATION)) -1.6756 0.3371 -4.970 6.89e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.35 on 55 degrees of freedom
## Multiple R-squared: 0.5615, Adjusted R-squared: 0.5455
## F-statistic: 35.21 on 2 and 55 DF, p-value: 1.427e-10
When only the dependent/response variable is log-transformed: Exponentiate the coefficient, subtract one from this number, and multiply by 100. This gives the percent increase (or decrease) in the response for every one-unit increase in the independent variable. Example: the coefficient is 0.198. (exp(0.198) – 1) * 100 = 21.9. For every one-unit increase in the independent variable, our dependent variable increases by about 22%.