year = c(1992, 1996, 2002, 2007)
gpa = c(2.85, 2.90, 2.97, 3.01)
gpa_data = data.frame(year, gpa)
# install.packages("ggplot2")
library(ggplot2)
gpa_scatter = ggplot(gpa_data, aes(x = year, y = gpa)) + geom_point()
gpa_scatter
#a linear increase does seem reasonable
gpa_data$y_by_g = year*gpa
gpa_data$year2 = year^2
gpa_data$gpa2 = gpa^2
y_bar_gpa = mean(gpa)
sd_gpa= sd(gpa)
x_bar_year = mean(year)
sd_year = sd(year)
n_gpa = 4
r_gpa = ((n_gpa)*sum(gpa_data$y_by_g) - sum(year)*sum(gpa))/(sqrt((n_gpa*sum(year^2) - sum(year)^2)*(n_gpa*sum(gpa^2) - sum(gpa)^2)))
r_gpa
## [1] 0.9959486
b1_gpa = r_gpa*(sd_gpa/sd_year)
b0_gpa = y_bar_gpa - b1_gpa*x_bar_year
gpa_data_fit = lm(gpa ~ year, data = gpa_data)
summary(gpa_data_fit)
##
## Call:
## lm(formula = gpa ~ year, data = gpa_data)
##
## Residuals:
## 1 2 3 4
## -0.004455 0.002486 0.007897 -0.005927
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.859e+01 1.374e+00 -13.53 0.00542 **
## year 1.076e-02 6.873e-04 15.66 0.00405 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.007859 on 2 degrees of freedom
## Multiple R-squared: 0.9919, Adjusted R-squared: 0.9879
## F-statistic: 245.3 on 1 and 2 DF, p-value: 0.004051
gpa_data$pred = b0_gpa+b1_gpa*year
gpa_data$residuals = gpa-gpa_data$pred
gpa_data$residuals2 = gpa_data$residuals^2
res_sum_y = sum(gpa_data$residuals2)
std_error_y = sqrt(res_sum_y/2)
#same for x
gpa_data$squares = (year-x_bar_year)^2
x_sum_squares = sum(gpa_data$squares)
#standard error of b1
SE_b1 = std_error_y/sqrt(x_sum_squares)
area = c(21, 34, 6, 47, 10, 49, 23, 32, 12, 16, 29, 49, 28, 8, 57, 9, 31, 10, 21, 26, 31, 52, 21, 8, 18, 5, 18, 26, 27, 26, 32, 2, 59, 58, 19, 14, 16, 9, 23, 28, 34, 70, 69, 54, 39, 9, 21, 54, 26)
ibi = c(47, 76, 33, 78, 62, 78, 33, 64, 83, 67, 61, 85, 46, 53, 55, 71, 59, 41, 82, 56, 39, 89, 32, 43, 29, 55, 81, 82, 82, 85, 59, 74, 80, 88, 29, 58, 71, 60, 86, 91, 72, 89, 80, 84, 54, 71, 75, 84, 79)
water = data.frame(area, ibi)
hist(area, breaks = 25)
qqnorm(area); qqline(area, col = 2,lwd=2,lty=2)
hist(ibi, breaks = 25)
qqnorm(ibi); qqline(ibi, col = 2,lwd=2,lty=2)
ggplot(water, aes(x = area, y =ibi)) + geom_point()
mean_area_x = mean(area)
sd_area_x = sd(area)
mean_ibi_y = mean(ibi)
sd_ibi_y = sd(ibi)
par(mfrow=c(1,2))
boxplot(area, main = "Area")
boxplot(ibi, main = "IBI")
cat("Area: ", c(mean_area_x, sd_area_x))
## Area: 28.28571 17.71417
print("\n")
## [1] "\n"
cat("IBI: ", c(mean_ibi_y, sd_ibi_y))
## IBI: 65.93878 18.27955
ggplot(water, aes(x = area, y =ibi)) + geom_point()
water$x_by_y = area*ibi
# gpa_data$year2 = year^2
water$ibi2 = ibi^2
# gpa_data$gpa2 = gpa^2
water$area2 = area^2
n_water = 49
#
r_water = ((n_water)*sum(water$x_by_y) - sum(area)*sum(ibi))/(sqrt((n_water*sum(area^2) - sum(area)^2)*(n_water*sum(ibi^2) - sum(ibi)^2)))
r_water
## [1] 0.4459226
b1_water = r_water*(sd_ibi_y/sd_area_x)
b0_water = mean_ibi_y - b1_water*mean_area_x
print(c(b1_water, b0_water))
## [1] 0.4601552 52.9229579
water_fit = lm(ibi ~ area, data = water)
summary(water_fit)
##
## Call:
## lm(formula = ibi ~ area, data = water)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.666 -8.887 3.432 12.414 25.193
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.9230 4.4835 11.804 1.17e-15 ***
## area 0.4602 0.1347 3.415 0.00132 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.53 on 47 degrees of freedom
## Multiple R-squared: 0.1988, Adjusted R-squared: 0.1818
## F-statistic: 11.67 on 1 and 47 DF, p-value: 0.001322
res = resid(water_fit)
plot(area, res); abline(0,0)
hist(res, breaks = 15)
qqnorm(res); qqline(res, col = 2,lwd=2,lty=2)
water$x_min_xbar = area - mean_area_x
water$x_min_xbar2 = water$x_min_xbar^2
sum_squares_x = sum(water$x_min_xbar2)
SE_mu_water = sqrt((1/n_water)+(40-mean_area_x)^2/sum_squares_x)
SE_yhat_water = sd_ibi_y*sqrt(1+(1/n_water)+(40-mean_area_x)^2/sum_squares_x)