10.30

year = c(1992, 1996, 2002, 2007)
gpa = c(2.85, 2.90, 2.97, 3.01)

gpa_data = data.frame(year, gpa)



# install.packages("ggplot2")
library(ggplot2)

gpa_scatter = ggplot(gpa_data, aes(x = year, y = gpa)) + geom_point()
gpa_scatter

#a linear increase does seem reasonable
gpa_data$y_by_g = year*gpa

gpa_data$year2 = year^2

gpa_data$gpa2 = gpa^2
y_bar_gpa = mean(gpa)

sd_gpa= sd(gpa)

x_bar_year = mean(year)

sd_year = sd(year)

n_gpa = 4
r_gpa = ((n_gpa)*sum(gpa_data$y_by_g) - sum(year)*sum(gpa))/(sqrt((n_gpa*sum(year^2) - sum(year)^2)*(n_gpa*sum(gpa^2) - sum(gpa)^2)))

r_gpa
## [1] 0.9959486
b1_gpa = r_gpa*(sd_gpa/sd_year)

b0_gpa = y_bar_gpa - b1_gpa*x_bar_year
gpa_data_fit = lm(gpa ~ year, data = gpa_data)
summary(gpa_data_fit)
## 
## Call:
## lm(formula = gpa ~ year, data = gpa_data)
## 
## Residuals:
##         1         2         3         4 
## -0.004455  0.002486  0.007897 -0.005927 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -1.859e+01  1.374e+00  -13.53  0.00542 **
## year         1.076e-02  6.873e-04   15.66  0.00405 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.007859 on 2 degrees of freedom
## Multiple R-squared:  0.9919, Adjusted R-squared:  0.9879 
## F-statistic: 245.3 on 1 and 2 DF,  p-value: 0.004051
gpa_data$pred = b0_gpa+b1_gpa*year

gpa_data$residuals = gpa-gpa_data$pred

gpa_data$residuals2 = gpa_data$residuals^2

res_sum_y = sum(gpa_data$residuals2)

std_error_y = sqrt(res_sum_y/2)

#same for x 
gpa_data$squares = (year-x_bar_year)^2

x_sum_squares = sum(gpa_data$squares)

#standard error of b1 

SE_b1 = std_error_y/sqrt(x_sum_squares)

10.48

a

area = c(21, 34, 6, 47, 10, 49, 23, 32, 12, 16, 29, 49, 28, 8, 57, 9, 31, 10, 21, 26, 31, 52, 21, 8, 18, 5, 18, 26, 27, 26, 32, 2, 59, 58, 19, 14, 16, 9, 23, 28, 34, 70, 69, 54, 39, 9, 21, 54, 26)

ibi = c(47, 76, 33, 78, 62, 78, 33, 64, 83, 67, 61, 85, 46, 53, 55, 71, 59, 41, 82, 56, 39, 89, 32, 43, 29, 55, 81, 82, 82, 85, 59, 74, 80, 88, 29, 58, 71, 60, 86, 91, 72, 89, 80, 84, 54, 71, 75, 84, 79)


water = data.frame(area, ibi)
hist(area, breaks = 25)

qqnorm(area); qqline(area, col = 2,lwd=2,lty=2)

hist(ibi, breaks = 25)

qqnorm(ibi); qqline(ibi, col = 2,lwd=2,lty=2)

ggplot(water, aes(x = area, y =ibi)) + geom_point()

mean_area_x = mean(area)
sd_area_x = sd(area)

mean_ibi_y = mean(ibi)
sd_ibi_y = sd(ibi)

par(mfrow=c(1,2))

boxplot(area, main = "Area")
boxplot(ibi, main = "IBI")

cat("Area: ", c(mean_area_x, sd_area_x))
## Area:  28.28571 17.71417
print("\n")
## [1] "\n"
cat("IBI: ", c(mean_ibi_y, sd_ibi_y))
## IBI:  65.93878 18.27955

b

ggplot(water, aes(x = area, y =ibi)) + geom_point()

c

water$x_by_y = area*ibi

# gpa_data$year2 = year^2

water$ibi2 = ibi^2

# gpa_data$gpa2 = gpa^2
water$area2 = area^2
n_water = 49
#

r_water = ((n_water)*sum(water$x_by_y) - sum(area)*sum(ibi))/(sqrt((n_water*sum(area^2) - sum(area)^2)*(n_water*sum(ibi^2) - sum(ibi)^2)))

r_water
## [1] 0.4459226
b1_water = r_water*(sd_ibi_y/sd_area_x)

b0_water = mean_ibi_y - b1_water*mean_area_x

print(c(b1_water, b0_water))
## [1]  0.4601552 52.9229579

e

water_fit = lm(ibi ~ area, data = water)
summary(water_fit)
## 
## Call:
## lm(formula = ibi ~ area, data = water)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -32.666  -8.887   3.432  12.414  25.193 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  52.9230     4.4835  11.804 1.17e-15 ***
## area          0.4602     0.1347   3.415  0.00132 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.53 on 47 degrees of freedom
## Multiple R-squared:  0.1988, Adjusted R-squared:  0.1818 
## F-statistic: 11.67 on 1 and 47 DF,  p-value: 0.001322

f

res = resid(water_fit)

plot(area, res); abline(0,0)

hist(res, breaks = 15)

qqnorm(res); qqline(res, col = 2,lwd=2,lty=2)

10.52

a

water$x_min_xbar = area - mean_area_x
water$x_min_xbar2 = water$x_min_xbar^2

sum_squares_x = sum(water$x_min_xbar2)

SE_mu_water = sqrt((1/n_water)+(40-mean_area_x)^2/sum_squares_x)
SE_yhat_water = sd_ibi_y*sqrt(1+(1/n_water)+(40-mean_area_x)^2/sum_squares_x)