Use the Brokerage Satisfaction excel file to answer the following questions in R. Create an R Markdown file to answer the questions, and then “knit” your file to create an HTML document. Your HTML document should contain both textual explanations of your answers, as well as all R code needed to support your work.
Submit both your HTML document and original R Markdown file (i.e., your Rmd file).
getwd()
## [1] "C:/Users/raze1/OneDrive/Desktop/UIndy/MSDA 621/Homework/2"
setwd("C:/Users/raze1/OneDrive/Desktop/UIndy/MSDA 621/Homework/2/")
brokerage <- read.csv("C:/Users/raze1/OneDrive/Desktop/UIndy/MSDA 621/Homework/2/Brokerage Satisfaction.csv")
summary(brokerage)
## Brokerage Price Speed Overall
## Length:14 Min. :1.000 Min. :2.500 Min. :2.000
## Class :character 1st Qu.:2.425 1st Qu.:3.025 1st Qu.:2.700
## Mode :character Median :2.750 Median :3.200 Median :3.000
## Mean :2.707 Mean :3.257 Mean :3.029
## 3rd Qu.:3.075 3rd Qu.:3.650 3rd Qu.:3.350
## Max. :3.700 Max. :4.000 Max. :4.000
head(brokerage)
## Brokerage Price Speed Overall
## 1 Scottrade, Inc. 3.2 3.1 3.2
## 2 Charles Schwab 3.3 3.1 3.2
## 3 Fidelity Brokerage Services 3.1 3.3 4.0
## 4 TD Ameritrade 2.8 3.5 3.7
## 5 E*Trade Financial 2.9 3.2 3.0
## 6 (Not listed) 2.4 3.2 2.7
colnames(brokerage)
## [1] "Brokerage" "Price" "Speed" "Overall"
brokerage$Brokerage <- NULL
colnames(brokerage)
## [1] "Price" "Speed" "Overall"
brokerage_model <- lm(Overall~., data = brokerage)
summary(brokerage_model)
##
## Call:
## lm(formula = Overall ~ ., data = brokerage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.58886 -0.13863 -0.09120 0.05781 0.64613
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.6633 0.8248 -0.804 0.438318
## Price 0.7746 0.1521 5.093 0.000348 ***
## Speed 0.4897 0.2016 2.429 0.033469 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3435 on 11 degrees of freedom
## Multiple R-squared: 0.7256, Adjusted R-squared: 0.6757
## F-statistic: 14.54 on 2 and 11 DF, p-value: 0.0008157
brokerage_model$residuals
## 1 2 3 4 5 6
## -0.133395541 -0.210856526 0.646131773 0.480581057 -0.149979422 -0.062674498
## 7 8 9 10 11 12
## -0.588858465 0.392491325 0.076204545 -0.140379336 -0.112435149 0.002632365
## 13 14
## -0.129506736 -0.069955392
MSRes=summary(brokerage_model)$sigma^2
MSRes
## [1] 0.1179775
standard_residuals=brokerage_model$residuals/MSRes
standard_residuals
## 1 2 3 4 5 6
## -1.13068607 -1.78726016 5.47673622 4.07349676 -1.27125421 -0.53124100
## 7 8 9 10 11 12
## -4.99127673 3.32683138 0.64592427 -1.18988204 -0.95302178 0.02231243
## 13 14
## -1.09772381 -0.59295526
qqnorm(brokerage_model$residuals)
qqline(brokerage_model$residuals)
pairs(brokerage)
influence.measures(brokerage_model)
## Influence measures of
## lm(formula = Overall ~ ., data = brokerage) :
##
## dfb.1_ dfb.Pric dfb.Sped dffit cov.r cook.d hat inf
## 1 0.00962 -0.08764 0.02699 -0.14868 1.446 7.98e-03 0.1223
## 2 0.03439 -0.17443 0.04016 -0.26296 1.373 2.43e-02 0.1425
## 3 -0.24242 0.44295 0.12010 0.80373 0.391 1.52e-01 0.1035
## 4 -0.19395 0.09850 0.24332 0.50684 0.763 7.57e-02 0.0950
## 5 -0.00322 -0.03696 0.01017 -0.12836 1.365 5.93e-03 0.0791
## 6 -0.02703 0.02696 0.00988 -0.05831 1.452 1.24e-03 0.0923
## 7 0.58333 -0.09317 -0.76407 -0.99786 0.499 2.47e-01 0.1727
## 8 -0.15454 -0.14032 0.33365 0.53071 0.996 8.89e-02 0.1482
## 9 0.10729 -0.02697 -0.10760 0.13088 1.693 6.24e-03 0.2273
## 10 -0.21391 0.11364 0.17843 -0.24206 1.621 2.11e-02 0.2264
## 11 0.32169 -0.28061 -0.24955 -0.38499 2.297 5.34e-02 0.4508 *
## 12 0.00480 -0.00154 -0.00468 0.00551 1.869 1.11e-05 0.2881 *
## 13 -0.03299 -0.04447 0.05420 -0.13178 1.425 6.28e-03 0.1059
## 14 -0.08532 0.53853 -0.25935 -0.66594 5.014 1.60e-01 0.7462 *
cooks.distance(brokerage_model)
## 1 2 3 4 5 6
## 0.0079790547 0.0243407712 0.1518660850 0.0756708677 0.0059271795 0.0012425789
## 7 8 9 10 11 12
## 0.2471814307 0.0888808107 0.0062442356 0.0210623352 0.0533835032 0.0000111262
## 13 14
## 0.0062752299 0.1601935254
plot(brokerage_model)
new_observations = data.frame(Speed = c(2, 3, 3, 2), Price = c(4, 5, 4, 3))
new_observations
## Speed Price
## 1 2 4
## 2 3 5
## 3 3 4
## 4 2 3
brokerage_predict<-predict(brokerage_model, new_observations, type = "response")
brokerage_predict
## 1 2 3 4
## 3.414448 4.678726 3.904117 2.639838
brokerage_predict_prediction<-predict(brokerage_model, new_observations, interval = "prediction", level = .95, type = "response")
brokerage_predict_prediction
## fit lwr upr
## 1 3.414448 2.391110 4.437787
## 2 4.678726 3.587160 5.770292
## 3 3.904117 3.009861 4.798372
## 4 2.639838 1.681356 3.598321
brokerage_predict_confidence<-predict(brokerage_model, new_observations, interval = "confidence", level = .95, type = "response")
brokerage_predict_confidence
## fit lwr upr
## 1 3.414448 2.724739 4.104158
## 2 4.678726 3.891330 5.466123
## 3 3.904117 3.426445 4.381789
## 4 2.639838 2.050631 3.229046
hatvalues(brokerage_model)
## 1 2 3 4 5 6 7
## 0.12226809 0.14248379 0.10348052 0.09498002 0.07909281 0.09225511 0.17268548
## 8 9 10 11 12 13 14
## 0.14817354 0.22725402 0.22639250 0.45080022 0.28805237 0.10586879 0.74621276
brokerage_predictions = data.frame(Price = c(4, 5, 4, 3), Speed = c(2, 3, 3, 2), Overall = c(3.4, 4.7, 3.9, 2.6))
brokerage_predictions
## Price Speed Overall
## 1 4 2 3.4
## 2 5 3 4.7
## 3 4 3 3.9
## 4 3 2 2.6
new_brokerage<-rbind(brokerage, brokerage_predictions)
new_brokerage
## Price Speed Overall
## 1 3.2 3.1 3.2
## 2 3.3 3.1 3.2
## 3 3.1 3.3 4.0
## 4 2.8 3.5 3.7
## 5 2.9 3.2 3.0
## 6 2.4 3.2 2.7
## 7 2.7 3.8 2.7
## 8 2.4 3.7 3.4
## 9 2.6 2.6 2.7
## 10 2.3 2.7 2.3
## 11 3.7 3.9 4.0
## 12 2.5 2.5 2.5
## 13 3.0 3.0 3.0
## 14 1.0 4.0 2.0
## 15 4.0 2.0 3.4
## 16 5.0 3.0 4.7
## 17 4.0 3.0 3.9
## 18 3.0 2.0 2.6
total_brokerage_model <- lm(Overall~., data = new_brokerage)
summary(total_brokerage_model)
##
## Call:
## lm(formula = Overall ~ ., data = new_brokerage)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.59403 -0.12955 -0.04203 0.01517 0.64520
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.71563 0.54417 -1.315 0.20823
## Price 0.77901 0.08776 8.877 2.34e-07 ***
## Speed 0.50167 0.12895 3.890 0.00145 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2943 on 15 degrees of freedom
## Multiple R-squared: 0.8427, Adjusted R-squared: 0.8217
## F-statistic: 40.19 on 2 and 15 DF, p-value: 9.442e-07
hatvalues(total_brokerage_model)
## 1 2 3 4 5 6 7
## 0.05951763 0.06415055 0.06687217 0.08502226 0.05788606 0.08410355 0.14373397
## 8 9 10 11 12 13 14
## 0.12984340 0.13058768 0.14891091 0.27156387 0.16699041 0.05703613 0.42439441
## 15 16 17 18
## 0.28619899 0.40057379 0.13988658 0.28272766