load libraries:
library(stats)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(ggthemes)
library(rgl)
## Warning: package 'rgl' was built under R version 3.6.3
library(predict3d)
## Warning: package 'predict3d' was built under R version 3.6.3
Import Data:
Alumni <- read.csv("~/Copy of Alumni2.csv")
Look at the data to see what needs cleaning:
head(Alumni)
## ï..School State Graduation.Rate
## 1 Boston College MA 85
## 2 Brandeis University MA 79
## 3 Brown University RI 93
## 4 California Institute of Technology CA 85
## 5 Carnegie Mellon University PA 75
## 6 Case Western Reserve University OH 72
## X..of.Classes.Under.20 Student.Faculty.Ratio Alumni.Giving.Rate
## 1 39 13 25
## 2 68 8 33
## 3 60 8 40
## 4 65 3 46
## 5 67 10 28
## 6 52 8 31
Clean the data:
I want to make the state a character, graduation rate a double, percent of class under 20 a double, and alumni giving rate a double.
Alumni$ï..School <- as.character(Alumni$ï..School)
Alumni$State <- as.character(Alumni$State)
Alumni$Graduation.Rate <- as.double(Alumni$Graduation.Rate)
Alumni$X..of.Classes.Under.20 <- as.double(Alumni$X..of.Classes.Under.20)
Alumni$Alumni.Giving.Rate <- as.double(Alumni$Alumni.Giving.Rate)
Alumni$Student.Faculty.Ratio <- as.double(Alumni$Student.Faculty.Ratio)
now check to make sure everything cleaned up nicely
head(Alumni)
## ï..School State Graduation.Rate
## 1 Boston College MA 85
## 2 Brandeis University MA 79
## 3 Brown University RI 93
## 4 California Institute of Technology CA 85
## 5 Carnegie Mellon University PA 75
## 6 Case Western Reserve University OH 72
## X..of.Classes.Under.20 Student.Faculty.Ratio Alumni.Giving.Rate
## 1 39 13 25
## 2 68 8 33
## 3 60 8 40
## 4 65 3 46
## 5 67 10 28
## 6 52 8 31
make a linear model of the alumni giving rate with respect to the student faculty ratio.
givingRateModel1 <- lm(Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio)
the model is stored in the variable “givingRateModel1”.
Print the summary of the model:
summary(givingRateModel1)
##
## Call:
## lm(formula = Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.328 -5.692 -1.471 4.058 24.272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.0138 3.4215 15.495 < 2e-16 ***
## Alumni$Student.Faculty.Ratio -2.0572 0.2737 -7.516 1.54e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.103 on 46 degrees of freedom
## Multiple R-squared: 0.5512, Adjusted R-squared: 0.5414
## F-statistic: 56.49 on 1 and 46 DF, p-value: 1.544e-09
find the critical value:
criticalValue <- qt(df = 47, p = .05/2)
criticalValue
## [1] -2.011741
Decision Rule: the test statistic is outside the critical value. The student faculty ratio does appear significant at a 5% level.
plot the relationship.
plot(Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio, xlab = "Student Faculty Ratio", ylab = "Alumni Giving Rate", main = "Giving Rate With Respect to the Student Faculty Ratio")
abline(givingRateModel1, col = "red")
now model it a different way
ggplot(Alumni, aes(x = Student.Faculty.Ratio, y = Alumni.Giving.Rate)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x) +
theme_bw() +
labs(x = "Student Faculty Ratio", y = "Alumni Giving Rate", title = "Giving Rate With Respect to the Student Faculty Ratio", caption = "M.G. Barclay")
Multiple Regression. make a linear model of the alumni giving rate with respect to the student faculty ratio, and the graduation rate.
givingRateModel2 <- lm(Alumni.Giving.Rate ~ Student.Faculty.Ratio + Graduation.Rate, data = Alumni)
Print the summary of the model:
summary(givingRateModel2)
##
## Call:
## lm(formula = Alumni.Giving.Rate ~ Student.Faculty.Ratio + Graduation.Rate,
## data = Alumni)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9304 -6.1594 -0.5521 3.5910 20.5412
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.1063 15.5501 -1.229 0.226
## Student.Faculty.Ratio -1.2460 0.2843 -4.382 6.95e-05 ***
## Graduation.Rate 0.7557 0.1602 4.717 2.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.528 on 45 degrees of freedom
## Multiple R-squared: 0.6996, Adjusted R-squared: 0.6863
## F-statistic: 52.41 on 2 and 45 DF, p-value: 1.765e-12
both variables appear significant.
plot it
#open3d()
plot3d(x = Alumni$Student.Faculty.Ratio, y = Alumni$Alumni.Giving.Rate, z = Alumni$Graduation.Rate, col = "red", type = "s")
show the linear model
predict3d(givingRateModel2, plane.color = "darkblue", show.subtitle = F)