Multiple Regression

load libraries:

library(stats)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.6.3

library(ggthemes)
library(rgl)

## Warning: package 'rgl' was built under R version 3.6.3

library(predict3d)

## Warning: package 'predict3d' was built under R version 3.6.3

Import Data:

Alumni <- read.csv("~/Copy of Alumni2.csv")

Look at the data to see what needs cleaning:

head(Alumni)

##                            ï..School State Graduation.Rate
## 1                     Boston College    MA              85
## 2               Brandeis University     MA              79
## 3                   Brown University    RI              93
## 4 California Institute of Technology    CA              85
## 5         Carnegie Mellon University    PA              75
## 6    Case Western Reserve University    OH              72
##   X..of.Classes.Under.20 Student.Faculty.Ratio Alumni.Giving.Rate
## 1                     39                    13                 25
## 2                     68                     8                 33
## 3                     60                     8                 40
## 4                     65                     3                 46
## 5                     67                    10                 28
## 6                     52                     8                 31

Clean the data:

I want to make the state a character, graduation rate a double, percent of class under 20 a double, and alumni giving rate a double.

Alumni$ï..School <- as.character(Alumni$ï..School)
Alumni$State <- as.character(Alumni$State)
Alumni$Graduation.Rate <- as.double(Alumni$Graduation.Rate)
Alumni$X..of.Classes.Under.20 <- as.double(Alumni$X..of.Classes.Under.20)
Alumni$Alumni.Giving.Rate <- as.double(Alumni$Alumni.Giving.Rate)
Alumni$Student.Faculty.Ratio <- as.double(Alumni$Student.Faculty.Ratio)

now check to make sure everything cleaned up nicely

head(Alumni)

##                            ï..School State Graduation.Rate
## 1                     Boston College    MA              85
## 2               Brandeis University     MA              79
## 3                   Brown University    RI              93
## 4 California Institute of Technology    CA              85
## 5         Carnegie Mellon University    PA              75
## 6    Case Western Reserve University    OH              72
##   X..of.Classes.Under.20 Student.Faculty.Ratio Alumni.Giving.Rate
## 1                     39                    13                 25
## 2                     68                     8                 33
## 3                     60                     8                 40
## 4                     65                     3                 46
## 5                     67                    10                 28
## 6                     52                     8                 31

make a linear model of the alumni giving rate with respect to the student faculty ratio.

#The variable givingRateModel1 gets the linear model of alumni giving rate as a function of the student faculty ratio.  
givingRateModel1 <- lm(Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio)

the model is stored in the variable “givingRateModel1”.

Print the summary of the model:

summary(givingRateModel1)

## 
## Call:
## lm(formula = Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.328  -5.692  -1.471   4.058  24.272 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                   53.0138     3.4215  15.495  < 2e-16 ***
## Alumni$Student.Faculty.Ratio  -2.0572     0.2737  -7.516 1.54e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.103 on 46 degrees of freedom
## Multiple R-squared:  0.5512, Adjusted R-squared:  0.5414 
## F-statistic: 56.49 on 1 and 46 DF,  p-value: 1.544e-09

find the critical value:

criticalValue <- qt(df = 47, p = .05/2)

criticalValue

## [1] -2.011741

Decision Rule: the test statistic is outside the critical value. The student faculty ratio does appear significant at a 5% level.

plot the relationship.

plot(Alumni$Alumni.Giving.Rate ~ Alumni$Student.Faculty.Ratio, xlab = "Student Faculty Ratio", ylab = "Alumni Giving Rate", main = "Giving Rate With Respect to the Student Faculty Ratio")


abline(givingRateModel1, col = "red")

plot(givingRateModel1, which = 1)

now model it a different way

ggplot(Alumni, aes(x = Student.Faculty.Ratio, y = Alumni.Giving.Rate)) + 
  geom_point() +
  geom_smooth(method = "lm", formula = y ~ x) +
  theme_bw() +
  labs(x = "Student Faculty Ratio", y = "Alumni Giving Rate", title = "Giving Rate With Respect to the Student Faculty Ratio", caption = "M.G. Barclay")

show residuals:

Plot from
https://drsimonj.svbtle.com/visualising-residuals

Alumni$predicted <- predict(givingRateModel1)   # Save the predicted values
Alumni$residuals <- residuals(givingRateModel1)



ggplot(Alumni, aes(x = Student.Faculty.Ratio, y = Alumni.Giving.Rate)) +
  geom_smooth(method = "lm", se = FALSE, color = "lightgrey", formula = y ~ x) +
  geom_segment(aes(xend = Student.Faculty.Ratio, yend = predicted), alpha = .2) +

  # > Color AND size adjustments made here...
  geom_point(aes(color = abs(residuals), size = abs(residuals))) + # size also mapped
  scale_color_continuous(low = "black", high = "red") +
  guides(color = FALSE, size = FALSE) +  # Size legend also removed
  # <

  geom_point(aes(y = predicted), shape = 1) +
  theme_bw()

Multiple Regression. make a linear model of the alumni giving rate with respect to the student faculty ratio, and the graduation rate.

givingRateModel2 <- lm(Alumni.Giving.Rate ~ Student.Faculty.Ratio + Graduation.Rate, data = Alumni)

Print the summary of the model:

summary(givingRateModel2)

## 
## Call:
## lm(formula = Alumni.Giving.Rate ~ Student.Faculty.Ratio + Graduation.Rate, 
##     data = Alumni)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.9304  -6.1594  -0.5521   3.5910  20.5412 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -19.1063    15.5501  -1.229    0.226    
## Student.Faculty.Ratio  -1.2460     0.2843  -4.382 6.95e-05 ***
## Graduation.Rate         0.7557     0.1602   4.717 2.35e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.528 on 45 degrees of freedom
## Multiple R-squared:  0.6996, Adjusted R-squared:  0.6863 
## F-statistic: 52.41 on 2 and 45 DF,  p-value: 1.765e-12

both variables appear significant.

plot it

#open3d()
#plot3d(x = Alumni$Student.Faculty.Ratio, y = Alumni$Alumni.Giving.Rate, z = Alumni$Graduation.Rate, col = "red", type = "s")

#this2 <- spin3d(axis = c(0,0,1))

#play3d(this2)

show the linear model

#myPrediction <- predict3d(givingRateModel2, plane.color = "black", show.subtitle = F, show.error = T, color = "red")

#this3 <- spin3d(axis = c(0,0,1))

#play3d(this3)

Multiple Regression

M.G. Barclay

4/19/2020