alumni <- read.csv("C:/Users/Gokul/Downloads/alumni (1).csv")
head(alumni,5)
## school percent_of_classes_under_20
## 1 Boston College 39
## 2 Brandeis University 68
## 3 Brown University 60
## 4 California Institute of Technology 65
## 5 Carnegie Mellon University 67
## student_faculty_ratio alumni_giving_rate private
## 1 13 25 1
## 2 8 33 1
## 3 8 40 1
## 4 3 46 1
## 5 10 28 1
str(alumni)
## 'data.frame': 48 obs. of 5 variables:
## $ school : chr "Boston College" "Brandeis University " "Brown University" "California Institute of Technology" ...
## $ percent_of_classes_under_20: int 39 68 60 65 67 52 45 69 72 61 ...
## $ student_faculty_ratio : int 13 8 8 3 10 8 12 7 13 10 ...
## $ alumni_giving_rate : int 25 33 40 46 28 31 27 31 35 53 ...
## $ private : int 1 1 1 1 1 1 1 1 1 1 ...
summary(alumni)
## school percent_of_classes_under_20 student_faculty_ratio
## Length:48 Min. :29.00 Min. : 3.00
## Class :character 1st Qu.:44.75 1st Qu.: 8.00
## Mode :character Median :59.50 Median :10.50
## Mean :55.73 Mean :11.54
## 3rd Qu.:66.25 3rd Qu.:13.50
## Max. :77.00 Max. :23.00
## alumni_giving_rate private
## Min. : 7.00 Min. :0.0000
## 1st Qu.:18.75 1st Qu.:0.0000
## Median :29.00 Median :1.0000
## Mean :29.27 Mean :0.6875
## 3rd Qu.:38.50 3rd Qu.:1.0000
## Max. :67.00 Max. :1.0000
Y = beta_0 + beta_1* x1 + beta_2* x2 + epsilon
model <- lm(alumni_giving_rate ~ percent_of_classes_under_20 + student_faculty_ratio,
data = alumni)
summary(model)
##
## Call:
## lm(formula = alumni_giving_rate ~ percent_of_classes_under_20 +
## student_faculty_ratio, data = alumni)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.00 -6.57 -1.95 4.42 24.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.6556 13.5076 2.936 0.005225 **
## percent_of_classes_under_20 0.1662 0.1626 1.022 0.312128
## student_faculty_ratio -1.7021 0.4421 -3.850 0.000371 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.098 on 45 degrees of freedom
## Multiple R-squared: 0.5613, Adjusted R-squared: 0.5418
## F-statistic: 28.79 on 2 and 45 DF, p-value: 8.869e-09
new_obs <- data.frame(percent_of_classes_under_20 = 50, student_faculty_ratio = 10)
predict(model, new_obs, interval = "confidence", level = 0.95)
## fit lwr upr
## 1 30.94291 26.88777 34.99806
predict(model, new_obs, interval = "prediction", level = 0.95)
## fit lwr upr
## 1 30.94291 12.17513 49.71069
df_resid <- df.residual(model) # should be 45
df_resid
## [1] 45
# critical t for two-sided alpha=0.05
t_crit <- qt(0.975, df = df_resid)
t_crit
## [1] 2.014103
# coefficient table
coef_table <- summary(model)$coefficients
coef_table
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.6555835 13.5075774 2.935803 0.0052247868
## percent_of_classes_under_20 0.1661686 0.1625520 1.022249 0.3121275033
## student_faculty_ratio -1.7021103 0.4421271 -3.849821 0.0003709425
# 95% confidence intervals for coefficients
confint(model, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 12.449926 66.8612409
## percent_of_classes_under_20 -0.161228 0.4935653
## student_faculty_ratio -2.592600 -0.8116205
For X1: H0 = beta_1 = 0 Ha = beta_1 != 0
For X2: H0 = beta_2 = 0 Ha = beta_2 != 0
summary(model)$coefficients
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.6555835 13.5075774 2.935803 0.0052247868
## percent_of_classes_under_20 0.1661686 0.1625520 1.022249 0.3121275033
## student_faculty_ratio -1.7021103 0.4421271 -3.849821 0.0003709425
p = 0.312 > 0.05 > Fail to reject null hypothesis > Not statistically significant
Holding student–faculty ratio constant, there is no evidence that the percentage of small classes affects alumni giving rate.
p = 0.000 < 0.05 > Reject null hypothesis > Statistically significant
Controlling for percent-under-20, increasing the student–faculty ratio is associated with a significant decrease in alumni giving rate.