load libraries:
library(stats)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3
library(ggthemes)
library(rgl)
## Warning: package 'rgl' was built under R version 3.6.3
library(predict3d)
## Warning: package 'predict3d' was built under R version 3.6.3
Import Data:
Alumni <- read.csv("~/Copy of Alumni2.csv")
Look at the data to see what needs cleaning:
head(Alumni)
## ï..School State Graduation.Rate
## 1 Boston College MA 85
## 2 Brandeis University MA 79
## 3 Brown University RI 93
## 4 California Institute of Technology CA 85
## 5 Carnegie Mellon University PA 75
## 6 Case Western Reserve University OH 72
## X..of.Classes.Under.20 Student.Faculty.Ratio Alumni.Giving.Rate
## 1 39 13 25
## 2 68 8 33
## 3 60 8 40
## 4 65 3 46
## 5 67 10 28
## 6 52 8 31
Clean the data:
I want to make the state a character, graduation rate a double, percent of class under 20 a double, and alumni giving rate a double.
Alumni$ï..School <- as.character(Alumni$ï..School)
Alumni$State <- as.character(Alumni$State)
Alumni$Graduation.Rate <- as.double(Alumni$Graduation.Rate)
Alumni$X..of.Classes.Under.20 <- as.double(Alumni$X..of.Classes.Under.20)
Alumni$Alumni.Giving.Rate <- as.double(Alumni$Alumni.Giving.Rate)
Alumni$Student.Faculty.Ratio <- as.double(Alumni$Student.Faculty.Ratio)
now check to make sure everything cleaned up nicely
head(Alumni)
## ï..School State Graduation.Rate
## 1 Boston College MA 85
## 2 Brandeis University MA 79
## 3 Brown University RI 93
## 4 California Institute of Technology CA 85
## 5 Carnegie Mellon University PA 75
## 6 Case Western Reserve University OH 72
## X..of.Classes.Under.20 Student.Faculty.Ratio Alumni.Giving.Rate
## 1 39 13 25
## 2 68 8 33
## 3 60 8 40
## 4 65 3 46
## 5 67 10 28
## 6 52 8 31
make a linear model of the alumni giving rate with respect to the student faculty ratio.
#The variable givingRateModel1 gets the linear model of alumni giving rate as a function of the student faculty ratio.
givingRateModel1 <- lm(Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio)
the model is stored in the variable “givingRateModel1”.
Print the summary of the model:
summary(givingRateModel1)
##
## Call:
## lm(formula = Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.328 -5.692 -1.471 4.058 24.272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 53.0138 3.4215 15.495 < 2e-16 ***
## Alumni$Student.Faculty.Ratio -2.0572 0.2737 -7.516 1.54e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.103 on 46 degrees of freedom
## Multiple R-squared: 0.5512, Adjusted R-squared: 0.5414
## F-statistic: 56.49 on 1 and 46 DF, p-value: 1.544e-09
find the critical value:
criticalValue <- qt(df = 47, p = .05/2)
criticalValue
## [1] -2.011741
Decision Rule: the test statistic is outside the critical value. The student faculty ratio does appear significant at a 5% level.
plot the relationship.
plot(Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio, xlab = "Student Faculty Ratio", ylab = "Alumni Giving Rate", main = "Giving Rate With Respect to the Student Faculty Ratio")
abline(givingRateModel1, col = "red")
plot(givingRateModel1, which = 1)
now model it a different way
ggplot(Alumni, aes(x = Student.Faculty.Ratio, y = Alumni.Giving.Rate)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x) +
theme_bw() +
labs(x = "Student Faculty Ratio", y = "Alumni Giving Rate", title = "Giving Rate With Respect to the Student Faculty Ratio", caption = "M.G. Barclay")
show residuals:
Plot from
https://drsimonj.svbtle.com/visualising-residuals
Alumni$predicted <- predict(givingRateModel1) # Save the predicted values
Alumni$residuals <- residuals(givingRateModel1)
ggplot(Alumni, aes(x = Student.Faculty.Ratio, y = Alumni.Giving.Rate)) +
geom_smooth(method = "lm", se = FALSE, color = "lightgrey", formula = y ~ x) +
geom_segment(aes(xend = Student.Faculty.Ratio, yend = predicted), alpha = .2) +
# > Color AND size adjustments made here...
geom_point(aes(color = abs(residuals), size = abs(residuals))) + # size also mapped
scale_color_continuous(low = "black", high = "red") +
guides(color = FALSE, size = FALSE) + # Size legend also removed
# <
geom_point(aes(y = predicted), shape = 1) +
theme_bw()
Multiple Regression. make a linear model of the alumni giving rate with respect to the student faculty ratio, and the graduation rate.
givingRateModel2 <- lm(Alumni.Giving.Rate ~ Student.Faculty.Ratio + Graduation.Rate, data = Alumni)
Print the summary of the model:
summary(givingRateModel2)
##
## Call:
## lm(formula = Alumni.Giving.Rate ~ Student.Faculty.Ratio + Graduation.Rate,
## data = Alumni)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.9304 -6.1594 -0.5521 3.5910 20.5412
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -19.1063 15.5501 -1.229 0.226
## Student.Faculty.Ratio -1.2460 0.2843 -4.382 6.95e-05 ***
## Graduation.Rate 0.7557 0.1602 4.717 2.35e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.528 on 45 degrees of freedom
## Multiple R-squared: 0.6996, Adjusted R-squared: 0.6863
## F-statistic: 52.41 on 2 and 45 DF, p-value: 1.765e-12
both variables appear significant.
plot it
#open3d()
#plot3d(x = Alumni$Student.Faculty.Ratio, y = Alumni$Alumni.Giving.Rate, z = Alumni$Graduation.Rate, col = "red", type = "s")
#this2 <- spin3d(axis = c(0,0,1))
#play3d(this2)
show the linear model
#myPrediction <- predict3d(givingRateModel2, plane.color = "black", show.subtitle = F, show.error = T, color = "red")
#this3 <- spin3d(axis = c(0,0,1))
#play3d(this3)