pacman::p_load(pacman,GGally,ggthemes,httr,ggplot2,ggvis,rio,shiny,rmarkdown,tidyr,stringr,lubridate,plotly,dplyr)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(datasets)
mtcars ###the data set we are working with at the moment
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
##The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973-74 models).##
##type of data time- series data-QUANTITATIVE DATA##
head(mtcars) ##gets the 1st six data entries
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
##adds the 4 plots to the same page for easy comparison
par(mfrow= c(2, 1))
plot(mtcars$mpg, mtcars$wt, col= "red",
pch= 19,
ylab = "Car Weight",
xlab = "Miles per Gallon",
main = "Miles Per Gallon as a function of Weight")
plot(mtcars$mpg, mtcars$cyl,col= "green", pch= 19,
ylab = "Cylinders",
xlab = "Miles per Gallon",
main = "Miles Per Gallon as a function of cylinders")

plot(mtcars$mpg, mtcars$disp, col= "blue", pch= 19,
ylab = "displacement",
xlab = "Miles per Gallon",
main = "Miles Per Gallon as a function of displacement")
plot(mtcars$mpg, mtcars$hp, col= "pink", pch= 19,
ylab = "horse power",
xlab = "Miles per Gallon",
main = "Miles Per Gallon as a function of horse power")

par(mfrow= c(2,1))
##bar chart
#create a table
cylinders<- table(mtcars$cyl) ##this creates a table##
barplot(cylinders,col = "purple",
xlab = "number of cylinders",
main = "Bar chart of cylinders") #bar chart of the cylinders#
summary(mtcars) ##this gives a brief summary of the data##
## mpg cyl disp hp
## Min. :10.40 Min. :4.000 Min. : 71.1 Min. : 52.0
## 1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8 1st Qu.: 96.5
## Median :19.20 Median :6.000 Median :196.3 Median :123.0
## Mean :20.09 Mean :6.188 Mean :230.7 Mean :146.7
## 3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0 3rd Qu.:180.0
## Max. :33.90 Max. :8.000 Max. :472.0 Max. :335.0
## drat wt qsec vs
## Min. :2.760 Min. :1.513 Min. :14.50 Min. :0.0000
## 1st Qu.:3.080 1st Qu.:2.581 1st Qu.:16.89 1st Qu.:0.0000
## Median :3.695 Median :3.325 Median :17.71 Median :0.0000
## Mean :3.597 Mean :3.217 Mean :17.85 Mean :0.4375
## 3rd Qu.:3.920 3rd Qu.:3.610 3rd Qu.:18.90 3rd Qu.:1.0000
## Max. :4.930 Max. :5.424 Max. :22.90 Max. :1.0000
## am gear carb
## Min. :0.0000 Min. :3.000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
## Median :0.0000 Median :4.000 Median :2.000
## Mean :0.4062 Mean :3.688 Mean :2.812
## 3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :1.0000 Max. :5.000 Max. :8.000
#histogram
hist(mtcars$wt, col = "blue",
main = "Histogram of MPG",
xlab = "Miles per Gallon")
##add a normal distribution
curve(dnorm(x, mean = mean(mtcars$mpg), sd= sd(mtcars$mpg)),
col= "thistle4",
lwd= 2,
add = T)

describe(mtcars)
## vars n mean sd median trimmed mad min max range skew
## mpg 1 32 20.09 6.03 19.20 19.70 5.41 10.40 33.90 23.50 0.61
## cyl 2 32 6.19 1.79 6.00 6.23 2.97 4.00 8.00 4.00 -0.17
## disp 3 32 230.72 123.94 196.30 222.52 140.48 71.10 472.00 400.90 0.38
## hp 4 32 146.69 68.56 123.00 141.19 77.10 52.00 335.00 283.00 0.73
## drat 5 32 3.60 0.53 3.70 3.58 0.70 2.76 4.93 2.17 0.27
## wt 6 32 3.22 0.98 3.33 3.15 0.77 1.51 5.42 3.91 0.42
## qsec 7 32 17.85 1.79 17.71 17.83 1.42 14.50 22.90 8.40 0.37
## vs 8 32 0.44 0.50 0.00 0.42 0.00 0.00 1.00 1.00 0.24
## am 9 32 0.41 0.50 0.00 0.38 0.00 0.00 1.00 1.00 0.36
## gear 10 32 3.69 0.74 4.00 3.62 1.48 3.00 5.00 2.00 0.53
## carb 11 32 2.81 1.62 2.00 2.65 1.48 1.00 8.00 7.00 1.05
## kurtosis se
## mpg -0.37 1.07
## cyl -1.76 0.32
## disp -1.21 21.91
## hp -0.14 12.12
## drat -0.71 0.09
## wt -0.02 0.17
## qsec 0.34 0.32
## vs -2.00 0.09
## am -1.92 0.09
## gear -1.07 0.13
## carb 1.26 0.29
###some regression analysis/modeling##
par(mfrow= c(1,1))
##MODEL 1##
lr1 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb, data = mtcars)
lr1 # will return just the coefficients
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am + gear + carb, data = mtcars)
##
## Coefficients:
## (Intercept) cyl disp hp drat wt
## 12.30337 -0.11144 0.01334 -0.02148 0.78711 -3.71530
## qsec vs am gear carb
## 0.82104 0.31776 2.52023 0.65541 -0.19942
##this will give us desciptive stats and inferential##
summary(lr1)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am + gear + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4506 -1.6044 -0.1196 1.2193 4.6271
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.30337 18.71788 0.657 0.5181
## cyl -0.11144 1.04502 -0.107 0.9161
## disp 0.01334 0.01786 0.747 0.4635
## hp -0.02148 0.02177 -0.987 0.3350
## drat 0.78711 1.63537 0.481 0.6353
## wt -3.71530 1.89441 -1.961 0.0633 .
## qsec 0.82104 0.73084 1.123 0.2739
## vs 0.31776 2.10451 0.151 0.8814
## am 2.52023 2.05665 1.225 0.2340
## gear 0.65541 1.49326 0.439 0.6652
## carb -0.19942 0.82875 -0.241 0.8122
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared: 0.869, Adjusted R-squared: 0.8066
## F-statistic: 13.93 on 10 and 21 DF, p-value: 3.793e-07
##some further analysis,,,ANOVA,e.t.c ###
anova(lr1)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## cyl 1 817.71 817.71 116.4245 5.034e-10 ***
## disp 1 37.59 37.59 5.3526 0.030911 *
## hp 1 9.37 9.37 1.3342 0.261031
## drat 1 16.47 16.47 2.3446 0.140644
## wt 1 77.48 77.48 11.0309 0.003244 **
## qsec 1 3.95 3.95 0.5623 0.461656
## vs 1 0.13 0.13 0.0185 0.893173
## am 1 14.47 14.47 2.0608 0.165858
## gear 1 0.97 0.97 0.1384 0.713653
## carb 1 0.41 0.41 0.0579 0.812179
## Residuals 21 147.49 7.02
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr1)
## 2.5 % 97.5 %
## (Intercept) -26.62259745 51.22934576
## cyl -2.28468553 2.06180457
## disp -0.02380146 0.05047194
## hp -0.06675236 0.02378812
## drat -2.61383350 4.18805545
## wt -7.65495413 0.22434628
## qsec -0.69883421 2.34091571
## vs -4.05880242 4.69432805
## am -1.75681208 6.79726585
## gear -2.44999107 3.76081711
## carb -1.92290442 1.52406591
resid(lr1)
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -1.599505761 -1.111886079 -3.450644085 0.162595453
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 1.006565971 -2.283039036 -0.086256253 1.903988115
## Merc 230 Merc 280 Merc 280C Merc 450SE
## -1.619089898 0.500970058 -1.391654392 2.227837890
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 1.700426404 -0.542224699 -1.634013415 -0.536437711
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 4.206370638 4.627094192 0.503261089 4.387630904
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## -2.143103442 -1.443053221 -2.532181498 -0.006021976
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 2.508321011 -0.993468693 -0.152953961 2.763727417
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -3.070040803 0.006171846 1.058881618 -2.968267683
hist(residuals(lr1),
col = "yellow",
main = "HIST of Model Residuals:Model 1",
xlab ="Residuals" )

##MODEL 2## carb removed
lr2 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am+gear, data = mtcars)
lr2 # will return just the coefficients
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am + gear, data = mtcars)
##
## Coefficients:
## (Intercept) cyl disp hp drat wt
## 12.83084 -0.16881 0.01623 -0.02424 0.70590 -4.03214
## qsec vs am gear
## 0.86829 0.36470 2.55093 0.50294
##this will give us desciptive stats and inferential##
summary(lr2)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am + gear, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.3038 -1.6964 -0.1796 1.1802 4.7245
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.83084 18.18671 0.706 0.48790
## cyl -0.16881 0.99544 -0.170 0.86689
## disp 0.01623 0.01290 1.259 0.22137
## hp -0.02424 0.01811 -1.339 0.19428
## drat 0.70590 1.56553 0.451 0.65647
## wt -4.03214 1.33252 -3.026 0.00621 **
## qsec 0.86829 0.68874 1.261 0.22063
## vs 0.36470 2.05009 0.178 0.86043
## am 2.55093 2.00826 1.270 0.21728
## gear 0.50294 1.32287 0.380 0.70745
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.593 on 22 degrees of freedom
## Multiple R-squared: 0.8687, Adjusted R-squared: 0.8149
## F-statistic: 16.17 on 9 and 22 DF, p-value: 9.244e-08
##some further analysis,,,ANOVA,e.t.c ###
anova(lr2)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## cyl 1 817.71 817.71 121.6332 1.975e-10 ***
## disp 1 37.59 37.59 5.5920 0.027282 *
## hp 1 9.37 9.37 1.3939 0.250354
## drat 1 16.47 16.47 2.4495 0.131834
## wt 1 77.48 77.48 11.5244 0.002604 **
## qsec 1 3.95 3.95 0.5875 0.451553
## vs 1 0.13 0.13 0.0193 0.890766
## am 1 14.47 14.47 2.1530 0.156442
## gear 1 0.97 0.97 0.1445 0.707454
## Residuals 22 147.90 6.72
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr2)
## 2.5 % 97.5 %
## (Intercept) -24.88610284 50.54777383
## cyl -2.23322212 1.89559685
## disp -0.01051582 0.04298297
## hp -0.06178812 0.01330703
## drat -2.54081522 3.95261689
## wt -6.79561305 -1.26867120
## qsec -0.56007378 2.29664412
## vs -3.88692251 4.61633113
## am -1.61394222 6.71579920
## gear -2.24053853 3.24641088
resid(lr2)
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -1.79231944 -1.25036289 -3.30375312 0.09642396
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 0.97763351 -2.25257013 -0.21912718 1.88147040
## Merc 230 Merc 280 Merc 280C Merc 450SE
## -1.66448328 0.48025051 -1.44072059 2.43247655
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 1.78789119 -0.45781577 -1.79338883 -0.56507514
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 4.28962224 4.72445634 0.29984145 4.37967035
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## -2.17440603 -1.41845456 -2.48258053 0.03818114
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 2.48467255 -0.95400885 0.03307755 2.77483581
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -2.83156999 -0.14008902 0.90722862 -2.84700682
hist(residuals(lr2),
col = "thistle4",
main = "HIST of Model Residuals:Model 2",
xlab ="Residuals" )

##MODEL 3## gear removed
lr3 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am, data = mtcars)
lr3 # will return just the coefficients
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am, data = mtcars)
##
## Coefficients:
## (Intercept) cyl disp hp drat wt
## 15.57313 -0.27859 0.01471 -0.02144 0.81506 -3.94374
## qsec vs am
## 0.80976 0.36836 2.79375
##this will give us desciptive stats and inferential##
summary(lr3)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs +
## am, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.4735 -1.4664 -0.3822 1.2006 4.6215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.57313 16.38173 0.951 0.35167
## cyl -0.27859 0.93475 -0.298 0.76835
## disp 0.01471 0.01203 1.223 0.23379
## hp -0.02144 0.01623 -1.321 0.19947
## drat 0.81506 1.51009 0.540 0.59456
## wt -3.94374 1.28744 -3.063 0.00551 **
## qsec 0.80976 0.65871 1.229 0.23139
## vs 0.36836 2.01158 0.183 0.85631
## am 2.79375 1.86824 1.495 0.14841
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.544 on 23 degrees of freedom
## Multiple R-squared: 0.8678, Adjusted R-squared: 0.8218
## F-statistic: 18.87 on 8 and 23 DF, p-value: 2.168e-08
##some further analysis,,,ANOVA,e.t.c ###
anova(lr3)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## cyl 1 817.71 817.71 126.3320 8.066e-11 ***
## disp 1 37.59 37.59 5.8081 0.024351 *
## hp 1 9.37 9.37 1.4478 0.241128
## drat 1 16.47 16.47 2.5441 0.124356
## wt 1 77.48 77.48 11.9696 0.002127 **
## qsec 1 3.95 3.95 0.6101 0.442699
## vs 1 0.13 0.13 0.0200 0.888635
## am 1 14.47 14.47 2.2362 0.148409
## Residuals 23 148.87 6.47
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr3)
## 2.5 % 97.5 %
## (Intercept) -18.31506979 49.46133115
## cyl -2.21227359 1.65508655
## disp -0.01017556 0.03959580
## hp -0.05501890 0.01213407
## drat -2.30879492 3.93891217
## wt -6.60701236 -1.28046632
## qsec -0.55289175 2.17240553
## vs -3.79291448 4.52963179
## am -1.07100601 6.65850569
resid(lr3)
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -1.86500258 -1.31281291 -3.47348933 0.12059433
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 1.02936387 -2.20575928 -0.45037118 2.12216447
## Merc 230 Merc 280 Merc 280C Merc 450SE
## -1.37695408 0.65495553 -1.23089860 2.31722089
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 1.71439814 -0.51231765 -1.73478253 -0.58511927
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 4.15560325 4.62146878 0.13478699 4.31005474
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## -2.51152091 -1.33403600 -2.42938049 -0.31409539
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 2.57093491 -1.06647376 0.23507859 3.01241089
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -2.51995473 0.04655028 0.98872281 -3.11133978
hist(residuals(lr3),
col = "red",
main = "HIST of Model Residuals:Model 3",
xlab ="Residuals" )

##MODEL 4## transmision removed(am)##
lr4 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs, data = mtcars)
lr4 # will return just the coefficients
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs, data = mtcars)
##
## Coefficients:
## (Intercept) cyl disp hp drat wt
## 25.88354 -0.85665 0.01314 -0.01733 1.31266 -4.22434
## qsec vs
## 0.44873 -0.27817
##this will give us desciptive stats and inferential##
summary(lr4)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9989 -1.6052 -0.4549 1.1675 5.5320
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.88354 15.23801 1.699 0.10232
## cyl -0.85665 0.87272 -0.982 0.33609
## disp 0.01314 0.01229 1.069 0.29555
## hp -0.01733 0.01640 -1.057 0.30124
## drat 1.31266 1.51043 0.869 0.39342
## wt -4.22434 1.30608 -3.234 0.00353 **
## qsec 0.44873 0.62846 0.714 0.48210
## vs -0.27817 2.01453 -0.138 0.89133
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.609 on 24 degrees of freedom
## Multiple R-squared: 0.8549, Adjusted R-squared: 0.8126
## F-statistic: 20.21 on 7 and 24 DF, p-value: 1.275e-08
##some further analysis,,,ANOVA,e.t.c ###
anova(lr4)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## cyl 1 817.71 817.71 120.1437 7.954e-11 ***
## disp 1 37.59 37.59 5.5235 0.027319 *
## hp 1 9.37 9.37 1.3768 0.252155
## drat 1 16.47 16.47 2.4195 0.132922
## wt 1 77.48 77.48 11.3832 0.002514 **
## qsec 1 3.95 3.95 0.5803 0.453632
## vs 1 0.13 0.13 0.0191 0.891328
## Residuals 24 163.35 6.81
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr4)
## 2.5 % 97.5 %
## (Intercept) -5.56616992 57.33325342
## cyl -2.65785675 0.94455057
## disp -0.01222182 0.03850376
## hp -0.05118536 0.01652396
## drat -1.80471383 4.43002483
## wt -6.91996557 -1.52872145
## qsec -0.84834461 1.74581163
## vs -4.43595469 3.87961670
resid(lr4)
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -1.37753175 -0.55161492 -2.79040773 0.26545834
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 0.73123847 -1.58254139 -1.45570195 1.02524936
## Merc 230 Merc 280 Merc 280C Merc 450SE
## -1.69751766 -0.16189617 -1.83113627 2.22019163
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 1.59416813 -0.47410810 -1.01656736 0.02937847
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 4.39601778 5.53195106 0.16804293 5.19587916
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## -3.99888945 -1.43291509 -2.61290179 -1.67334496
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 2.49488272 -0.43566416 -0.72951304 2.78902622
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -1.92236656 0.07805726 1.70263107 -2.47755427
hist(residuals(lr4),
col = "blue",
main = "HIST of Model Residuals:Model 4",
xlab ="Residuals" )

##MODEL 5##vs removed
lr5 <- lm(mpg~cyl+disp+hp+drat+wt+qsec, data = mtcars)
lr5 # will return just the coefficients
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec, data = mtcars)
##
## Coefficients:
## (Intercept) cyl disp hp drat wt
## 26.30736 -0.81856 0.01320 -0.01793 1.32041 -4.19083
## qsec
## 0.40146
##this will give us desciptive stats and inferential##
summary(lr5)
##
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.9682 -1.5795 -0.4353 1.1662 5.5272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 26.30736 14.62994 1.798 0.08424 .
## cyl -0.81856 0.81156 -1.009 0.32282
## disp 0.01320 0.01204 1.097 0.28307
## hp -0.01793 0.01551 -1.156 0.25846
## drat 1.32041 1.47948 0.892 0.38065
## wt -4.19083 1.25791 -3.332 0.00269 **
## qsec 0.40146 0.51658 0.777 0.44436
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.557 on 25 degrees of freedom
## Multiple R-squared: 0.8548, Adjusted R-squared: 0.82
## F-statistic: 24.53 on 6 and 25 DF, p-value: 2.45e-09
##some further analysis,,,ANOVA,e.t.c ###
anova(lr5)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## cyl 1 817.71 817.71 125.0503 3.209e-11 ***
## disp 1 37.59 37.59 5.7491 0.024287 *
## hp 1 9.37 9.37 1.4331 0.242493
## drat 1 16.47 16.47 2.5183 0.125100
## wt 1 77.48 77.48 11.8481 0.002041 **
## qsec 1 3.95 3.95 0.6040 0.444365
## Residuals 25 163.48 6.54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr5)
## 2.5 % 97.5 %
## (Intercept) -3.82356207 56.43828006
## cyl -2.49000541 0.85288494
## disp -0.01158520 0.03799499
## hp -0.04986374 0.01400388
## drat -1.72663198 4.36744344
## wt -6.78154092 -1.60012383
## qsec -0.66246389 1.46538622
resid(lr5)
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -1.31414056 -0.47029655 -2.82378627 0.17170341
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 0.74941510 -1.64604065 -1.45618159 1.00861445
## Merc 230 Merc 280 Merc 280C Merc 450SE
## -1.55735289 -0.31002275 -1.95089945 2.24421854
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 1.63904330 -0.41199955 -1.00114204 0.03762625
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 4.39648853 5.52715918 0.12316110 5.22244752
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## -3.96815985 -1.43378362 -2.59272238 -1.70659058
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 2.49889097 -0.45854001 -0.57546067 2.71528485
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -1.96865360 0.13311966 1.69867910 -2.52007897
hist(residuals(lr5),
col = "brown",
main = "HIST of Model Residuals:Model 5",
xlab ="Residuals" )

par(mfrow = c(1,1))
####PRINCIPAL COMPONENT ANALYSES##
pc <- prcomp(mtcars,center = T ,scale. = T)
summary(pc)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.5707 1.6280 0.79196 0.51923 0.47271 0.46000 0.3678
## Proportion of Variance 0.6008 0.2409 0.05702 0.02451 0.02031 0.01924 0.0123
## Cumulative Proportion 0.6008 0.8417 0.89873 0.92324 0.94356 0.96279 0.9751
## PC8 PC9 PC10 PC11
## Standard deviation 0.35057 0.2776 0.22811 0.1485
## Proportion of Variance 0.01117 0.0070 0.00473 0.0020
## Cumulative Proportion 0.98626 0.9933 0.99800 1.0000
plot(pc, col= "green")

## Principal component analysis tells us that only two variables explain most of the data##
lr6 <- lm(mpg~cyl+wt,data = mtcars) ###model based on two variables###
lr6
##
## Call:
## lm(formula = mpg ~ cyl + wt, data = mtcars)
##
## Coefficients:
## (Intercept) cyl wt
## 39.686 -1.508 -3.191
summary(lr6)
##
## Call:
## lm(formula = mpg ~ cyl + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.2893 -1.5512 -0.4684 1.5743 6.1004
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.6863 1.7150 23.141 < 2e-16 ***
## cyl -1.5078 0.4147 -3.636 0.001064 **
## wt -3.1910 0.7569 -4.216 0.000222 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.568 on 29 degrees of freedom
## Multiple R-squared: 0.8302, Adjusted R-squared: 0.8185
## F-statistic: 70.91 on 2 and 29 DF, p-value: 6.809e-12
anova(lr6)
## Analysis of Variance Table
##
## Response: mpg
## Df Sum Sq Mean Sq F value Pr(>F)
## cyl 1 817.71 817.71 124.044 5.424e-12 ***
## wt 1 117.16 117.16 17.773 0.000222 ***
## Residuals 29 191.17 6.59
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr6)
## 2.5 % 97.5 %
## (Intercept) 36.178725 43.1937976
## cyl -2.355928 -0.6596622
## wt -4.739020 -1.6429245
resid(lr6)
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -1.27914467 -0.46544677 -3.45202624 1.01948376
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 2.05304242 -1.49872807 -1.93213120 0.92411952
## Merc 230 Merc 280 Merc 280C Merc 450SE
## -0.80351937 -0.46254751 -1.86254751 1.76335487
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 1.57842434 -0.36202705 -0.47129800 0.08393115
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 4.13184435 5.76505710 1.89833840 6.10035227
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## -4.28933528 -0.89167980 -1.46291244 -2.07056872
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 3.84538614 -0.18055052 -0.82640123 1.57285924
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -1.70852005 -2.10049885 -1.23213120 -3.38417906
hist(residuals(lr6),col = "purple",
main = "HIST of MODEL 6:Residuals",
xlab = "Residuals")

###MODEL BASED ON PC RESULTS###
#the model with the highest adj R^2 is the one we will go with since it predicts most of the data#
#the model built by PC has 2 variables compared to the other 5 models and its adj.R^2 is almost 82% which is almost as good as the best model in the previous 5##
#The model residuals of the PC model is spread around 0 making it a good estimator for our data in question
#thus we can conclude that MILES per GALLON is a fucntion of the cars mass and its cylnders numbers
# the bigger the car the less milage it gets due to its size and number of cylinders##
biplot(pc) ##this will plot for two variables but since most of our data is explained by just 2 this should be sufficient##

###See how individual cases load on the PC##
predict(pc) %>% round(2)
## PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10
## Mazda RX4 -0.65 1.71 -0.59 0.11 -0.95 -0.02 -0.43 -0.01 0.15 -0.07
## Mazda RX4 Wag -0.62 1.53 -0.38 0.20 -1.02 -0.24 -0.42 -0.08 0.07 -0.13
## Datsun 710 -2.74 -0.14 -0.24 -0.25 0.40 -0.35 -0.61 0.59 -0.13 0.05
## Hornet 4 Drive -0.31 -2.33 -0.13 -0.50 0.55 0.02 -0.04 -0.05 0.22 -0.06
## Hornet Sportabout 1.94 -0.74 -1.12 0.07 0.21 0.15 0.38 -0.16 -0.02 -0.06
## Valiant -0.06 -2.74 0.16 -0.98 0.21 -0.24 -0.29 0.26 -0.03 -0.20
## Duster 360 2.96 0.33 -0.36 -0.05 0.34 0.71 -0.14 -0.17 -0.18 0.36
## Merc 240D -2.02 -1.44 0.93 -0.14 -0.32 0.00 0.64 0.16 0.38 0.29
## Merc 230 -2.25 -1.95 1.77 0.29 -0.33 -0.33 0.62 -0.11 -0.86 -0.12
## Merc 280 -0.52 -0.16 1.47 0.07 -0.07 0.82 0.16 0.10 0.54 -0.22
## Merc 280C -0.50 -0.32 1.66 0.09 -0.15 0.73 0.09 0.20 0.31 -0.34
## Merc 450SE 2.21 -0.67 -0.37 -0.13 -0.38 0.13 -0.02 -0.19 -0.06 -0.07
## Merc 450SL 2.02 -0.67 -0.48 -0.21 -0.36 0.24 0.05 -0.33 -0.21 -0.11
## Merc 450SLC 2.11 -0.79 -0.29 -0.18 -0.43 0.18 -0.07 -0.12 -0.39 -0.21
## Cadillac Fleetwood 3.84 -0.81 0.64 0.29 -0.05 -0.88 -0.17 0.14 0.19 0.06
## Lincoln Continental 3.89 -0.72 0.71 0.41 0.00 -0.86 -0.19 0.13 0.20 0.12
## Chrysler Imperial 3.54 -0.41 0.54 0.67 0.21 -0.65 0.03 -0.39 0.27 0.28
## Fiat 128 -3.80 -0.29 -0.42 0.06 0.22 -0.47 -0.04 -0.63 0.11 -0.03
## Honda Civic -4.19 0.68 -0.20 1.17 0.10 0.52 -0.25 -0.40 0.24 -0.15
## Toyota Corolla -4.17 -0.27 -0.46 0.18 0.22 -0.32 0.07 -0.85 -0.11 -0.13
## Toyota Corona -1.87 -2.09 0.15 0.05 0.04 0.72 -0.28 0.21 -0.45 0.51
## Dodge Challenger 2.15 -1.00 -1.15 -0.58 -0.23 0.11 0.09 0.32 0.10 -0.14
## AMC Javelin 1.83 -0.89 -0.95 0.01 -0.25 0.29 0.08 0.32 -0.12 -0.30
## Camaro Z28 2.84 0.67 -0.16 0.81 0.39 0.95 -0.21 0.04 -0.05 0.33
## Pontiac Firebird 2.21 -0.86 -1.03 0.15 0.30 -0.20 0.47 -0.23 0.21 0.02
## Fiat X1-9 -3.52 -0.12 -0.45 -0.01 0.21 -0.14 -0.36 0.09 -0.02 -0.08
## Porsche 914-2 -2.61 2.01 -0.82 0.57 -0.60 -0.34 0.82 0.63 -0.13 0.35
## Lotus Europa -3.33 1.36 -0.45 -1.15 0.69 0.02 0.51 0.00 0.30 0.24
## Ford Pantera L 1.35 3.44 -0.13 0.59 1.10 -0.17 0.41 0.61 -0.23 -0.50
## Ferrari Dino 0.00 3.17 0.40 -0.94 -0.85 -0.01 0.03 0.01 0.10 0.14
## Maserati Bora 2.63 4.31 1.33 -0.88 0.46 -0.02 -0.19 -0.56 -0.34 0.05
## Volvo 142E -2.38 0.23 0.41 0.22 0.32 -0.33 -0.78 0.48 -0.04 0.12
## PC11
## Mazda RX4 0.18
## Mazda RX4 Wag 0.09
## Datsun 710 -0.09
## Hornet 4 Drive 0.15
## Hornet Sportabout 0.15
## Valiant 0.02
## Duster 360 0.17
## Merc 240D -0.02
## Merc 230 0.16
## Merc 280 -0.12
## Merc 280C -0.03
## Merc 450SE -0.40
## Merc 450SL -0.20
## Merc 450SLC -0.14
## Cadillac Fleetwood 0.26
## Lincoln Continental 0.04
## Chrysler Imperial -0.22
## Fiat 128 -0.21
## Honda Civic 0.25
## Toyota Corolla -0.03
## Toyota Corona 0.06
## Dodge Challenger 0.05
## AMC Javelin 0.05
## Camaro Z28 -0.10
## Pontiac Firebird 0.12
## Fiat X1-9 -0.01
## Porsche 914-2 -0.11
## Lotus Europa 0.03
## Ford Pantera L -0.04
## Ferrari Dino 0.04
## Maserati Bora 0.06
## Volvo 142E -0.15