> load("D:/Owncloud/Courses/AGSM-Stats/Core-ECCC-v3/Slides/AlumniGiving/AlumniGiving.RData")
> with(GivingRates, Hist(Giving, scale="frequency", breaks="Sturges",
+ col="darkgray"))

> scatterplotMatrix(~Big.Classes+Freshman.Retention+Giving+Graduation.Rate+SFR+Small.Classes,
+ reg.line=lm, smooth=FALSE, spread=FALSE, span=0.5, ellipse=FALSE,
+ levels=c(.5, .9), id.n=0, diagonal = 'density', data=GivingRates,
+ main="Alumni Giving Data")

> stripchart(Giving ~ Special, vertical=TRUE, method="stack", ylab="Giving",
+ data=GivingRates)

> with(GivingRates, plotMeans(Giving, Special, error.bars="se"))

> with(GivingRates, plotMeans(Giving, Special, error.bars="se"))

> with(GivingRates, (t.test(Giving, alternative='two.sided', mu=0.0,
+ conf.level=.95)))
One Sample t-test
data: Giving
t = 19.617, df = 124, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 0
95 percent confidence interval:
0.1273847 0.1559753
sample estimates:
mean of x
0.14168
> with(GivingRates, (t.test(Big.Classes, Small.Classes,
+ alternative='two.sided', conf.level=.95, paired=TRUE)))
Paired t-test
data: Big.Classes and Small.Classes
t = -16.787, df = 124, p-value < 2.2e-16
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.3009218 -0.2374462
sample estimates:
mean of the differences
-0.269184
> with(GivingRates, (t.test(Big.Classes, Small.Classes,
+ alternative='two.sided', conf.level=.95, paired=TRUE)))$p.value<0.05
[1] TRUE
> densityPlot(Giving~Special, data=GivingRates, bw="SJ", adjust=1,
+ kernel="gaussian")

> t.test(Giving~Special, alternative='two.sided', conf.level=.95,
+ var.equal=FALSE, data=GivingRates)
Welch Two Sample t-test
data: Giving by Special
t = -7.1385, df = 2.2106, p-value = 0.01446
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.33632801 -0.09738783
sample estimates:
mean in group No mean in group Yes
0.1364754 0.3533333
> First.Cut <- lm(Giving ~ Big.Classes + Freshman.Retention +
+ Graduation.Rate + SFR + Small.Classes + Special, data=GivingRates)
> summary(First.Cut)
Call:
lm(formula = Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate +
SFR + Small.Classes + Special, data = GivingRates)
Residuals:
Min 1Q Median 3Q Max
-0.124888 -0.030048 -0.005409 0.027063 0.145876
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.188483 0.096503 -1.953 0.05317 .
Big.Classes -0.023674 0.101584 -0.233 0.81613
Freshman.Retention 0.250587 0.148554 1.687 0.09428 .
Graduation.Rate 0.108767 0.081848 1.329 0.18645
SFR -0.001085 0.001519 -0.715 0.47620
Small.Classes 0.166839 0.054459 3.064 0.00271 **
SpecialYes 0.184869 0.028313 6.529 1.74e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.04809 on 118 degrees of freedom
Multiple R-squared: 0.6626, Adjusted R-squared: 0.6454
F-statistic: 38.61 on 6 and 118 DF, p-value: < 2.2e-16
> (0.6626/6)/((1-0.6626)/118)
[1] 38.62221
> GivingRates<- within(GivingRates, {
+ residuals.First.Cut <- residuals(First.Cut)
+ })
> qq.plot(GivingRates$residuals.First.Cut, dist= "norm", labels=FALSE)
Warning: 'qq.plot' is deprecated.
Use 'qqPlot' instead.
See help("Deprecated") and help("car-deprecated").

> shapiro.test(GivingRates$residuals.First.Cut)
Shapiro-Wilk normality test
data: GivingRates$residuals.First.Cut
W = 0.98682, p-value = 0.2704
> densityPlot( ~ residuals.First.Cut, data=GivingRates, bw="SJ", adjust=1,
+ kernel="gaussian")

> library(zoo, pos=15)
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
> library(lmtest, pos=15)
> bptest(Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate + SFR +
+ Small.Classes + Special, varformula = ~ fitted.values(First.Cut),
+ studentize=FALSE, data=GivingRates)
Breusch-Pagan test
data: Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate + SFR + Small.Classes + Special
BP = 5.4658, df = 1, p-value = 0.01939
> resettest(Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate + SFR
+ + Small.Classes + Special, power=2:3, type="regressor", data=GivingRates)
RESET test
data: Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate + SFR + Small.Classes + Special
RESET = 1.2653, df1 = 10, df2 = 108, p-value = 0.2592
> resettest(Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate + SFR
+ + Small.Classes + Special, power=2:3, type="fitted", data=GivingRates)
RESET test
data: Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate + SFR + Small.Classes + Special
RESET = 1.5551, df1 = 2, df2 = 116, p-value = 0.2155
> outlierTest(First.Cut)
No Studentized residuals with Bonferonni p < 0.05
Largest |rstudent|:
rstudent unadjusted p-value Bonferonni p
111 3.235246 0.0015805 0.19756
> vif(First.Cut)
Big.Classes Freshman.Retention Graduation.Rate
2.043765 8.281685 10.349414
SFR Small.Classes Special
2.547100 2.948151 1.015124
> library(grid, pos=17)
> library(lattice, pos=17)
> library(survival, pos=17)
> library(Formula, pos=17)
> library(ggplot2, pos=17)
> library(Hmisc, pos=17)
Attaching package: 'Hmisc'
The following object is masked _by_ 'package:RcmdrMisc':
Dotplot
The following objects are masked from 'package:base':
format.pval, round.POSIXt, trunc.POSIXt, units
> rcorr.adjust(GivingRates[,c("Big.Classes","Freshman.Retention","Giving",
+ "Graduation.Rate","residuals.First.Cut","SFR","Small.Classes")],
+ type="pearson", use="complete")
Pearson correlations:
Big.Classes Freshman.Retention Giving Graduation.Rate
Big.Classes 1.0000 0.0413 -0.1930 0.0083
Freshman.Retention 0.0413 1.0000 0.6513 0.9338
Giving -0.1930 0.6513 1.0000 0.6832
Graduation.Rate 0.0083 0.9338 0.6832 1.0000
residuals.First.Cut 0.0000 0.0000 0.5809 0.0000
SFR 0.4318 -0.5259 -0.5577 -0.6032
Small.Classes -0.5989 0.3828 0.5498 0.4848
residuals.First.Cut SFR Small.Classes
Big.Classes 0.0000 0.4318 -0.5989
Freshman.Retention 0.0000 -0.5259 0.3828
Giving 0.5809 -0.5577 0.5498
Graduation.Rate 0.0000 -0.6032 0.4848
residuals.First.Cut 1.0000 0.0000 0.0000
SFR 0.0000 1.0000 -0.7011
Small.Classes 0.0000 -0.7011 1.0000
Number of observations: 125
Pairwise two-sided p-values:
Big.Classes Freshman.Retention Giving Graduation.Rate
Big.Classes 0.6478 0.0310 0.9272
Freshman.Retention 0.6478 <.0001 <.0001
Giving 0.0310 <.0001 <.0001
Graduation.Rate 0.9272 <.0001 <.0001
residuals.First.Cut 1.0000 1.0000 <.0001 1.0000
SFR <.0001 <.0001 <.0001 <.0001
Small.Classes <.0001 <.0001 <.0001 <.0001
residuals.First.Cut SFR Small.Classes
Big.Classes 1.0000 <.0001 <.0001
Freshman.Retention 1.0000 <.0001 <.0001
Giving <.0001 <.0001 <.0001
Graduation.Rate 1.0000 <.0001 <.0001
residuals.First.Cut 1.0000 1.0000
SFR 1.0000 <.0001
Small.Classes 1.0000 <.0001
Adjusted p-values (Holm's method)
Big.Classes Freshman.Retention Giving Graduation.Rate
Big.Classes 1.0000 0.2484 1.0000
Freshman.Retention 1.0000 <.0001 <.0001
Giving 0.2484 <.0001 <.0001
Graduation.Rate 1.0000 <.0001 <.0001
residuals.First.Cut 1.0000 1.0000 <.0001 1.0000
SFR <.0001 <.0001 <.0001 <.0001
Small.Classes <.0001 <.0001 <.0001 <.0001
residuals.First.Cut SFR Small.Classes
Big.Classes 1.0000 <.0001 <.0001
Freshman.Retention 1.0000 <.0001 <.0001
Giving <.0001 <.0001 <.0001
Graduation.Rate 1.0000 <.0001 <.0001
residuals.First.Cut 1.0000 1.0000
SFR 1.0000 <.0001
Small.Classes 1.0000 <.0001
> library(leaps, pos=23)
> plot(regsubsets(Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate
+ + SFR + Small.Classes + Special, data=GivingRates, nbest=1, nvmax=7),
+ scale='bic')

> plot(regsubsets(Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate
+ + SFR + Small.Classes + Special, data=GivingRates, nbest=1, nvmax=7),
+ scale='Cp')

> plot(regsubsets(Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate
+ + SFR + Small.Classes + Special, data=GivingRates, nbest=1, nvmax=7),
+ scale='r2')

> library(abind, pos=25)
> library(e1071, pos=26)
Attaching package: 'e1071'
The following object is masked _by_ 'package:Hmisc':
impute
> numSummary(GivingRates[,"residuals.First.Cut"], statistics=c("mean", "sd",
+ "IQR", "quantiles"), quantiles=c(0,.25,.5,.75,1))
mean sd IQR 0% 25% 50%
-4.336809e-19 0.04690786 0.05711071 -0.1248877 -0.03004799 -0.005409283
75% 100% n
0.02706272 0.1458763 125
> .NewData <- data.frame(Big.Classes=0.23, Freshman.Retention=0.77,
+ Graduation.Rate=0.67, SFR=17, Small.Classes=0.34, Special=structure(1L,
+ .Label = c("No", "Yes"), class = "factor"), row.names="1")
> .NewData # Newdata
Big.Classes Freshman.Retention Graduation.Rate SFR Small.Classes Special
1 0.23 0.77 0.67 17 0.34 No
> predict(First.Cut, newdata=.NewData, interval="prediction", level=.95,
+ se.fit=FALSE)
fit lwr upr
1 0.1101698 0.0101231 0.2102166
> .NewData # Newdata
Big.Classes Freshman.Retention Graduation.Rate SFR Small.Classes Special
1 0.23 0.77 0.67 17 0.34 No
> predict(First.Cut, newdata=.NewData, interval="prediction", level=.95,
+ se.fit=FALSE)
fit lwr upr
1 0.1101698 0.0101231 0.2102166
> oldpar <- par(oma=c(0,0,3,0), mfrow=c(2,2))
> plot(First.Cut)




> par(oldpar)
> vif(First.Cut)
Big.Classes Freshman.Retention Graduation.Rate
2.043765 8.281685 10.349414
SFR Small.Classes Special
2.547100 2.948151 1.015124
> scatterplotMatrix(~Big.Classes+Freshman.Retention+Graduation.Rate+residuals.First.Cut+SFR+Small.Classes,
+ reg.line=FALSE, smooth=FALSE, spread=FALSE, span=0.5, ellipse=FALSE,
+ levels=c(.5, .9), id.n=0, diagonal = 'density', data=GivingRates)

> library(effects, pos=27)
Attaching package: 'effects'
The following object is masked _by_ 'package:car':
Prestige
> plot(allEffects(First.Cut, partial.residuals=TRUE), span=1)
Warning in Analyze.model(focal.predictors, mod, xlevels, default.levels, : there are no numeric focal predictors
partial residuals suppressed

> summary(First.Cut)
Call:
lm(formula = Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate +
SFR + Small.Classes + Special, data = GivingRates)
Residuals:
Min 1Q Median 3Q Max
-0.124888 -0.030048 -0.005409 0.027063 0.145876
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.188483 0.096503 -1.953 0.05317 .
Big.Classes -0.023674 0.101584 -0.233 0.81613
Freshman.Retention 0.250587 0.148554 1.687 0.09428 .
Graduation.Rate 0.108767 0.081848 1.329 0.18645
SFR -0.001085 0.001519 -0.715 0.47620
Small.Classes 0.166839 0.054459 3.064 0.00271 **
SpecialYes 0.184869 0.028313 6.529 1.74e-09 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.04809 on 118 degrees of freedom
Multiple R-squared: 0.6626, Adjusted R-squared: 0.6454
F-statistic: 38.61 on 6 and 118 DF, p-value: < 2.2e-16
> library(MASS, pos=28)
> stepwise(First.Cut, direction='backward', criterion='BIC')
Direction: backward
Criterion: BIC
Start: AIC=-732.1
Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate +
SFR + Small.Classes + Special
Df Sum of Sq RSS AIC
- Big.Classes 1 0.000126 0.27297 -736.87
- SFR 1 0.001181 0.27402 -736.39
- Graduation.Rate 1 0.004083 0.27693 -735.07
- Freshman.Retention 1 0.006579 0.27942 -733.95
<none> 0.27284 -732.10
- Small.Classes 1 0.021702 0.29454 -727.36
- Special 1 0.098578 0.37142 -698.37
Step: AIC=-736.87
Giving ~ Freshman.Retention + Graduation.Rate + SFR + Small.Classes +
Special
Df Sum of Sq RSS AIC
- SFR 1 0.001466 0.27444 -741.03
- Graduation.Rate 1 0.004019 0.27699 -739.87
- Freshman.Retention 1 0.006792 0.27976 -738.63
<none> 0.27297 -736.87
- Small.Classes 1 0.033667 0.30664 -727.16
- Special 1 0.098714 0.37168 -703.11
Step: AIC=-741.03
Giving ~ Freshman.Retention + Graduation.Rate + Small.Classes +
Special
Df Sum of Sq RSS AIC
- Graduation.Rate 1 0.005052 0.27949 -743.58
- Freshman.Retention 1 0.006793 0.28123 -742.80
<none> 0.27444 -741.03
- Small.Classes 1 0.063197 0.33763 -719.95
- Special 1 0.098852 0.37329 -707.40
Step: AIC=-743.58
Giving ~ Freshman.Retention + Small.Classes + Special
Df Sum of Sq RSS AIC
<none> 0.27949 -743.58
- Small.Classes 1 0.091383 0.37087 -713.04
- Special 1 0.100559 0.38005 -709.99
- Freshman.Retention 1 0.151936 0.43142 -694.14
Call:
lm(formula = Giving ~ Freshman.Retention + Small.Classes + Special,
data = GivingRates)
Coefficients:
(Intercept) Freshman.Retention Small.Classes
-0.3336 0.4560 0.2159
SpecialYes
0.1866
> stepwise(First.Cut, direction='backward', criterion='AIC')
Direction: backward
Criterion: AIC
Start: AIC=-751.9
Giving ~ Big.Classes + Freshman.Retention + Graduation.Rate +
SFR + Small.Classes + Special
Df Sum of Sq RSS AIC
- Big.Classes 1 0.000126 0.27297 -753.84
- SFR 1 0.001181 0.27402 -753.36
- Graduation.Rate 1 0.004083 0.27693 -752.04
<none> 0.27284 -751.90
- Freshman.Retention 1 0.006579 0.27942 -750.92
- Small.Classes 1 0.021702 0.29454 -744.33
- Special 1 0.098578 0.37142 -715.34
Step: AIC=-753.84
Giving ~ Freshman.Retention + Graduation.Rate + SFR + Small.Classes +
Special
Df Sum of Sq RSS AIC
- SFR 1 0.001466 0.27444 -755.17
- Graduation.Rate 1 0.004019 0.27699 -754.01
<none> 0.27297 -753.84
- Freshman.Retention 1 0.006792 0.27976 -752.77
- Small.Classes 1 0.033667 0.30664 -741.30
- Special 1 0.098714 0.37168 -717.25
Step: AIC=-755.17
Giving ~ Freshman.Retention + Graduation.Rate + Small.Classes +
Special
Df Sum of Sq RSS AIC
<none> 0.27444 -755.17
- Graduation.Rate 1 0.005052 0.27949 -754.89
- Freshman.Retention 1 0.006793 0.28123 -754.11
- Small.Classes 1 0.063197 0.33763 -731.26
- Special 1 0.098852 0.37329 -718.72
Call:
lm(formula = Giving ~ Freshman.Retention + Graduation.Rate +
Small.Classes + Special, data = GivingRates)
Coefficients:
(Intercept) Freshman.Retention Graduation.Rate
-0.2281 0.2536 0.1139
Small.Classes SpecialYes
0.1947 0.1851