pacman::p_load(pacman,GGally,ggthemes,httr,ggplot2,ggvis,rio,shiny,rmarkdown,tidyr,stringr,lubridate,plotly,dplyr)
library(psych)
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(datasets)
mtcars   ###the data set we are working with at the moment
##                      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4           21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag       21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710          22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive      21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## Valiant             18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## Merc 240D           24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## Merc 230            22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## Merc 280            19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
## Merc 280C           17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
## Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## Fiat 128            32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1
## Honda Civic         30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2
## Toyota Corolla      33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1
## Toyota Corona       21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
## Fiat X1-9           27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1
## Porsche 914-2       26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2
## Lotus Europa        30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2
## Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
## Ferrari Dino        19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6
## Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
## Volvo 142E          21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2
##The data was extracted from the 1974 Motor Trend US magazine, and comprises fuel consumption and 10 aspects of automobile design and performance for 32 automobiles (1973-74 models).##

##type of data time- series data-QUANTITATIVE DATA##
head(mtcars) ##gets the 1st six data entries
##                    mpg cyl disp  hp drat    wt  qsec vs am gear carb
## Mazda RX4         21.0   6  160 110 3.90 2.620 16.46  0  1    4    4
## Mazda RX4 Wag     21.0   6  160 110 3.90 2.875 17.02  0  1    4    4
## Datsun 710        22.8   4  108  93 3.85 2.320 18.61  1  1    4    1
## Hornet 4 Drive    21.4   6  258 110 3.08 3.215 19.44  1  0    3    1
## Hornet Sportabout 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2
## Valiant           18.1   6  225 105 2.76 3.460 20.22  1  0    3    1
##adds the 4 plots to the same page for easy comparison
par(mfrow= c(2, 1))

plot(mtcars$mpg, mtcars$wt, col= "red",
     pch= 19,
     ylab = "Car Weight",
     xlab = "Miles per Gallon",
     main = "Miles Per Gallon as a function of Weight")
plot(mtcars$mpg, mtcars$cyl,col= "green", pch= 19,
     ylab = "Cylinders",
     xlab = "Miles per Gallon",
     main = "Miles Per Gallon as a function of cylinders")

plot(mtcars$mpg, mtcars$disp, col= "blue",  pch= 19,
     ylab = "displacement",
     xlab = "Miles per Gallon",
     main = "Miles Per Gallon as a function of displacement")
plot(mtcars$mpg, mtcars$hp, col= "pink",  pch= 19,
     ylab = "horse power",
     xlab = "Miles per Gallon",
     main = "Miles Per Gallon as a function of horse power")

par(mfrow= c(2,1))
##bar chart
#create a table
cylinders<- table(mtcars$cyl) ##this creates a table##
barplot(cylinders,col = "purple",
        xlab = "number of cylinders",
        main = "Bar chart of cylinders") #bar chart of the cylinders#
summary(mtcars) ##this gives a brief summary of the data##
##       mpg             cyl             disp             hp       
##  Min.   :10.40   Min.   :4.000   Min.   : 71.1   Min.   : 52.0  
##  1st Qu.:15.43   1st Qu.:4.000   1st Qu.:120.8   1st Qu.: 96.5  
##  Median :19.20   Median :6.000   Median :196.3   Median :123.0  
##  Mean   :20.09   Mean   :6.188   Mean   :230.7   Mean   :146.7  
##  3rd Qu.:22.80   3rd Qu.:8.000   3rd Qu.:326.0   3rd Qu.:180.0  
##  Max.   :33.90   Max.   :8.000   Max.   :472.0   Max.   :335.0  
##       drat             wt             qsec             vs        
##  Min.   :2.760   Min.   :1.513   Min.   :14.50   Min.   :0.0000  
##  1st Qu.:3.080   1st Qu.:2.581   1st Qu.:16.89   1st Qu.:0.0000  
##  Median :3.695   Median :3.325   Median :17.71   Median :0.0000  
##  Mean   :3.597   Mean   :3.217   Mean   :17.85   Mean   :0.4375  
##  3rd Qu.:3.920   3rd Qu.:3.610   3rd Qu.:18.90   3rd Qu.:1.0000  
##  Max.   :4.930   Max.   :5.424   Max.   :22.90   Max.   :1.0000  
##        am              gear            carb      
##  Min.   :0.0000   Min.   :3.000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:3.000   1st Qu.:2.000  
##  Median :0.0000   Median :4.000   Median :2.000  
##  Mean   :0.4062   Mean   :3.688   Mean   :2.812  
##  3rd Qu.:1.0000   3rd Qu.:4.000   3rd Qu.:4.000  
##  Max.   :1.0000   Max.   :5.000   Max.   :8.000
#histogram
hist(mtcars$wt, col = "blue",
     main = "Histogram of MPG",
     xlab = "Miles per Gallon")

##add a normal distribution 
curve(dnorm(x, mean = mean(mtcars$mpg), sd= sd(mtcars$mpg)),
      col= "thistle4",
      lwd= 2, 
      add = T)

describe(mtcars)
##      vars  n   mean     sd median trimmed    mad   min    max  range  skew
## mpg     1 32  20.09   6.03  19.20   19.70   5.41 10.40  33.90  23.50  0.61
## cyl     2 32   6.19   1.79   6.00    6.23   2.97  4.00   8.00   4.00 -0.17
## disp    3 32 230.72 123.94 196.30  222.52 140.48 71.10 472.00 400.90  0.38
## hp      4 32 146.69  68.56 123.00  141.19  77.10 52.00 335.00 283.00  0.73
## drat    5 32   3.60   0.53   3.70    3.58   0.70  2.76   4.93   2.17  0.27
## wt      6 32   3.22   0.98   3.33    3.15   0.77  1.51   5.42   3.91  0.42
## qsec    7 32  17.85   1.79  17.71   17.83   1.42 14.50  22.90   8.40  0.37
## vs      8 32   0.44   0.50   0.00    0.42   0.00  0.00   1.00   1.00  0.24
## am      9 32   0.41   0.50   0.00    0.38   0.00  0.00   1.00   1.00  0.36
## gear   10 32   3.69   0.74   4.00    3.62   1.48  3.00   5.00   2.00  0.53
## carb   11 32   2.81   1.62   2.00    2.65   1.48  1.00   8.00   7.00  1.05
##      kurtosis    se
## mpg     -0.37  1.07
## cyl     -1.76  0.32
## disp    -1.21 21.91
## hp      -0.14 12.12
## drat    -0.71  0.09
## wt      -0.02  0.17
## qsec     0.34  0.32
## vs      -2.00  0.09
## am      -1.92  0.09
## gear    -1.07  0.13
## carb     1.26  0.29
###some regression analysis/modeling##
par(mfrow= c(1,1))
##MODEL 1##
lr1 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am+gear+carb, data = mtcars)

lr1 # will return just the coefficients
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am + gear + carb, data = mtcars)
## 
## Coefficients:
## (Intercept)          cyl         disp           hp         drat           wt  
##    12.30337     -0.11144      0.01334     -0.02148      0.78711     -3.71530  
##        qsec           vs           am         gear         carb  
##     0.82104      0.31776      2.52023      0.65541     -0.19942
##this will give us desciptive stats and inferential##
summary(lr1)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am + gear + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4506 -1.6044 -0.1196  1.2193  4.6271 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) 12.30337   18.71788   0.657   0.5181  
## cyl         -0.11144    1.04502  -0.107   0.9161  
## disp         0.01334    0.01786   0.747   0.4635  
## hp          -0.02148    0.02177  -0.987   0.3350  
## drat         0.78711    1.63537   0.481   0.6353  
## wt          -3.71530    1.89441  -1.961   0.0633 .
## qsec         0.82104    0.73084   1.123   0.2739  
## vs           0.31776    2.10451   0.151   0.8814  
## am           2.52023    2.05665   1.225   0.2340  
## gear         0.65541    1.49326   0.439   0.6652  
## carb        -0.19942    0.82875  -0.241   0.8122  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.65 on 21 degrees of freedom
## Multiple R-squared:  0.869,  Adjusted R-squared:  0.8066 
## F-statistic: 13.93 on 10 and 21 DF,  p-value: 3.793e-07
##some further analysis,,,ANOVA,e.t.c ###
anova(lr1)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## cyl        1 817.71  817.71 116.4245 5.034e-10 ***
## disp       1  37.59   37.59   5.3526  0.030911 *  
## hp         1   9.37    9.37   1.3342  0.261031    
## drat       1  16.47   16.47   2.3446  0.140644    
## wt         1  77.48   77.48  11.0309  0.003244 ** 
## qsec       1   3.95    3.95   0.5623  0.461656    
## vs         1   0.13    0.13   0.0185  0.893173    
## am         1  14.47   14.47   2.0608  0.165858    
## gear       1   0.97    0.97   0.1384  0.713653    
## carb       1   0.41    0.41   0.0579  0.812179    
## Residuals 21 147.49    7.02                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr1)
##                    2.5 %      97.5 %
## (Intercept) -26.62259745 51.22934576
## cyl          -2.28468553  2.06180457
## disp         -0.02380146  0.05047194
## hp           -0.06675236  0.02378812
## drat         -2.61383350  4.18805545
## wt           -7.65495413  0.22434628
## qsec         -0.69883421  2.34091571
## vs           -4.05880242  4.69432805
## am           -1.75681208  6.79726585
## gear         -2.44999107  3.76081711
## carb         -1.92290442  1.52406591
resid(lr1)
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##        -1.599505761        -1.111886079        -3.450644085         0.162595453 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##         1.006565971        -2.283039036        -0.086256253         1.903988115 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##        -1.619089898         0.500970058        -1.391654392         2.227837890 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##         1.700426404        -0.542224699        -1.634013415        -0.536437711 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##         4.206370638         4.627094192         0.503261089         4.387630904 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##        -2.143103442        -1.443053221        -2.532181498        -0.006021976 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##         2.508321011        -0.993468693        -0.152953961         2.763727417 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##        -3.070040803         0.006171846         1.058881618        -2.968267683
hist(residuals(lr1),
    col = "yellow",
    main = "HIST of Model Residuals:Model 1",
    xlab ="Residuals" )

##MODEL 2## carb removed
lr2 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am+gear, data = mtcars)

lr2 # will return just the coefficients
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am + gear, data = mtcars)
## 
## Coefficients:
## (Intercept)          cyl         disp           hp         drat           wt  
##    12.83084     -0.16881      0.01623     -0.02424      0.70590     -4.03214  
##        qsec           vs           am         gear  
##     0.86829      0.36470      2.55093      0.50294
##this will give us desciptive stats and inferential##
summary(lr2)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am + gear, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.3038 -1.6964 -0.1796  1.1802  4.7245 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 12.83084   18.18671   0.706  0.48790   
## cyl         -0.16881    0.99544  -0.170  0.86689   
## disp         0.01623    0.01290   1.259  0.22137   
## hp          -0.02424    0.01811  -1.339  0.19428   
## drat         0.70590    1.56553   0.451  0.65647   
## wt          -4.03214    1.33252  -3.026  0.00621 **
## qsec         0.86829    0.68874   1.261  0.22063   
## vs           0.36470    2.05009   0.178  0.86043   
## am           2.55093    2.00826   1.270  0.21728   
## gear         0.50294    1.32287   0.380  0.70745   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.593 on 22 degrees of freedom
## Multiple R-squared:  0.8687, Adjusted R-squared:  0.8149 
## F-statistic: 16.17 on 9 and 22 DF,  p-value: 9.244e-08
##some further analysis,,,ANOVA,e.t.c ###
anova(lr2)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## cyl        1 817.71  817.71 121.6332 1.975e-10 ***
## disp       1  37.59   37.59   5.5920  0.027282 *  
## hp         1   9.37    9.37   1.3939  0.250354    
## drat       1  16.47   16.47   2.4495  0.131834    
## wt         1  77.48   77.48  11.5244  0.002604 ** 
## qsec       1   3.95    3.95   0.5875  0.451553    
## vs         1   0.13    0.13   0.0193  0.890766    
## am         1  14.47   14.47   2.1530  0.156442    
## gear       1   0.97    0.97   0.1445  0.707454    
## Residuals 22 147.90    6.72                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr2)
##                    2.5 %      97.5 %
## (Intercept) -24.88610284 50.54777383
## cyl          -2.23322212  1.89559685
## disp         -0.01051582  0.04298297
## hp           -0.06178812  0.01330703
## drat         -2.54081522  3.95261689
## wt           -6.79561305 -1.26867120
## qsec         -0.56007378  2.29664412
## vs           -3.88692251  4.61633113
## am           -1.61394222  6.71579920
## gear         -2.24053853  3.24641088
resid(lr2)
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##         -1.79231944         -1.25036289         -3.30375312          0.09642396 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##          0.97763351         -2.25257013         -0.21912718          1.88147040 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##         -1.66448328          0.48025051         -1.44072059          2.43247655 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##          1.78789119         -0.45781577         -1.79338883         -0.56507514 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##          4.28962224          4.72445634          0.29984145          4.37967035 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##         -2.17440603         -1.41845456         -2.48258053          0.03818114 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##          2.48467255         -0.95400885          0.03307755          2.77483581 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##         -2.83156999         -0.14008902          0.90722862         -2.84700682
hist(residuals(lr2),
     col = "thistle4",
     main = "HIST of Model Residuals:Model 2",
     xlab ="Residuals" )

##MODEL 3## gear removed
lr3 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs+am, data = mtcars)

lr3 # will return just the coefficients
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am, data = mtcars)
## 
## Coefficients:
## (Intercept)          cyl         disp           hp         drat           wt  
##    15.57313     -0.27859      0.01471     -0.02144      0.81506     -3.94374  
##        qsec           vs           am  
##     0.80976      0.36836      2.79375
##this will give us desciptive stats and inferential##
summary(lr3)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs + 
##     am, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4735 -1.4664 -0.3822  1.2006  4.6215 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 15.57313   16.38173   0.951  0.35167   
## cyl         -0.27859    0.93475  -0.298  0.76835   
## disp         0.01471    0.01203   1.223  0.23379   
## hp          -0.02144    0.01623  -1.321  0.19947   
## drat         0.81506    1.51009   0.540  0.59456   
## wt          -3.94374    1.28744  -3.063  0.00551 **
## qsec         0.80976    0.65871   1.229  0.23139   
## vs           0.36836    2.01158   0.183  0.85631   
## am           2.79375    1.86824   1.495  0.14841   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.544 on 23 degrees of freedom
## Multiple R-squared:  0.8678, Adjusted R-squared:  0.8218 
## F-statistic: 18.87 on 8 and 23 DF,  p-value: 2.168e-08
##some further analysis,,,ANOVA,e.t.c ###
anova(lr3)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## cyl        1 817.71  817.71 126.3320 8.066e-11 ***
## disp       1  37.59   37.59   5.8081  0.024351 *  
## hp         1   9.37    9.37   1.4478  0.241128    
## drat       1  16.47   16.47   2.5441  0.124356    
## wt         1  77.48   77.48  11.9696  0.002127 ** 
## qsec       1   3.95    3.95   0.6101  0.442699    
## vs         1   0.13    0.13   0.0200  0.888635    
## am         1  14.47   14.47   2.2362  0.148409    
## Residuals 23 148.87    6.47                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr3)
##                    2.5 %      97.5 %
## (Intercept) -18.31506979 49.46133115
## cyl          -2.21227359  1.65508655
## disp         -0.01017556  0.03959580
## hp           -0.05501890  0.01213407
## drat         -2.30879492  3.93891217
## wt           -6.60701236 -1.28046632
## qsec         -0.55289175  2.17240553
## vs           -3.79291448  4.52963179
## am           -1.07100601  6.65850569
resid(lr3)
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##         -1.86500258         -1.31281291         -3.47348933          0.12059433 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##          1.02936387         -2.20575928         -0.45037118          2.12216447 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##         -1.37695408          0.65495553         -1.23089860          2.31722089 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##          1.71439814         -0.51231765         -1.73478253         -0.58511927 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##          4.15560325          4.62146878          0.13478699          4.31005474 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##         -2.51152091         -1.33403600         -2.42938049         -0.31409539 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##          2.57093491         -1.06647376          0.23507859          3.01241089 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##         -2.51995473          0.04655028          0.98872281         -3.11133978
hist(residuals(lr3),
     col = "red",
     main = "HIST of Model Residuals:Model 3",
     xlab ="Residuals" )

##MODEL 4## transmision removed(am)##
lr4 <- lm(mpg~cyl+disp+hp+drat+wt+qsec+vs, data = mtcars)

lr4 # will return just the coefficients
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs, data = mtcars)
## 
## Coefficients:
## (Intercept)          cyl         disp           hp         drat           wt  
##    25.88354     -0.85665      0.01314     -0.01733      1.31266     -4.22434  
##        qsec           vs  
##     0.44873     -0.27817
##this will give us desciptive stats and inferential##
summary(lr4)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec + vs, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9989 -1.6052 -0.4549  1.1675  5.5320 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 25.88354   15.23801   1.699  0.10232   
## cyl         -0.85665    0.87272  -0.982  0.33609   
## disp         0.01314    0.01229   1.069  0.29555   
## hp          -0.01733    0.01640  -1.057  0.30124   
## drat         1.31266    1.51043   0.869  0.39342   
## wt          -4.22434    1.30608  -3.234  0.00353 **
## qsec         0.44873    0.62846   0.714  0.48210   
## vs          -0.27817    2.01453  -0.138  0.89133   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.609 on 24 degrees of freedom
## Multiple R-squared:  0.8549, Adjusted R-squared:  0.8126 
## F-statistic: 20.21 on 7 and 24 DF,  p-value: 1.275e-08
##some further analysis,,,ANOVA,e.t.c ###
anova(lr4)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## cyl        1 817.71  817.71 120.1437 7.954e-11 ***
## disp       1  37.59   37.59   5.5235  0.027319 *  
## hp         1   9.37    9.37   1.3768  0.252155    
## drat       1  16.47   16.47   2.4195  0.132922    
## wt         1  77.48   77.48  11.3832  0.002514 ** 
## qsec       1   3.95    3.95   0.5803  0.453632    
## vs         1   0.13    0.13   0.0191  0.891328    
## Residuals 24 163.35    6.81                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr4)
##                   2.5 %      97.5 %
## (Intercept) -5.56616992 57.33325342
## cyl         -2.65785675  0.94455057
## disp        -0.01222182  0.03850376
## hp          -0.05118536  0.01652396
## drat        -1.80471383  4.43002483
## wt          -6.91996557 -1.52872145
## qsec        -0.84834461  1.74581163
## vs          -4.43595469  3.87961670
resid(lr4)
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##         -1.37753175         -0.55161492         -2.79040773          0.26545834 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##          0.73123847         -1.58254139         -1.45570195          1.02524936 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##         -1.69751766         -0.16189617         -1.83113627          2.22019163 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##          1.59416813         -0.47410810         -1.01656736          0.02937847 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##          4.39601778          5.53195106          0.16804293          5.19587916 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##         -3.99888945         -1.43291509         -2.61290179         -1.67334496 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##          2.49488272         -0.43566416         -0.72951304          2.78902622 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##         -1.92236656          0.07805726          1.70263107         -2.47755427
hist(residuals(lr4),
     col = "blue",
     main = "HIST of Model Residuals:Model 4",
     xlab ="Residuals" )

##MODEL 5##vs removed
lr5 <- lm(mpg~cyl+disp+hp+drat+wt+qsec, data = mtcars)

lr5 # will return just the coefficients
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec, data = mtcars)
## 
## Coefficients:
## (Intercept)          cyl         disp           hp         drat           wt  
##    26.30736     -0.81856      0.01320     -0.01793      1.32041     -4.19083  
##        qsec  
##     0.40146
##this will give us desciptive stats and inferential##
summary(lr5)
## 
## Call:
## lm(formula = mpg ~ cyl + disp + hp + drat + wt + qsec, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.9682 -1.5795 -0.4353  1.1662  5.5272 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) 26.30736   14.62994   1.798  0.08424 . 
## cyl         -0.81856    0.81156  -1.009  0.32282   
## disp         0.01320    0.01204   1.097  0.28307   
## hp          -0.01793    0.01551  -1.156  0.25846   
## drat         1.32041    1.47948   0.892  0.38065   
## wt          -4.19083    1.25791  -3.332  0.00269 **
## qsec         0.40146    0.51658   0.777  0.44436   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.557 on 25 degrees of freedom
## Multiple R-squared:  0.8548, Adjusted R-squared:   0.82 
## F-statistic: 24.53 on 6 and 25 DF,  p-value: 2.45e-09
##some further analysis,,,ANOVA,e.t.c ###
anova(lr5)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq  F value    Pr(>F)    
## cyl        1 817.71  817.71 125.0503 3.209e-11 ***
## disp       1  37.59   37.59   5.7491  0.024287 *  
## hp         1   9.37    9.37   1.4331  0.242493    
## drat       1  16.47   16.47   2.5183  0.125100    
## wt         1  77.48   77.48  11.8481  0.002041 ** 
## qsec       1   3.95    3.95   0.6040  0.444365    
## Residuals 25 163.48    6.54                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr5)
##                   2.5 %      97.5 %
## (Intercept) -3.82356207 56.43828006
## cyl         -2.49000541  0.85288494
## disp        -0.01158520  0.03799499
## hp          -0.04986374  0.01400388
## drat        -1.72663198  4.36744344
## wt          -6.78154092 -1.60012383
## qsec        -0.66246389  1.46538622
resid(lr5)
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##         -1.31414056         -0.47029655         -2.82378627          0.17170341 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##          0.74941510         -1.64604065         -1.45618159          1.00861445 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##         -1.55735289         -0.31002275         -1.95089945          2.24421854 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##          1.63904330         -0.41199955         -1.00114204          0.03762625 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##          4.39648853          5.52715918          0.12316110          5.22244752 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##         -3.96815985         -1.43378362         -2.59272238         -1.70659058 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##          2.49889097         -0.45854001         -0.57546067          2.71528485 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##         -1.96865360          0.13311966          1.69867910         -2.52007897
hist(residuals(lr5),
     col = "brown",
     main = "HIST of Model Residuals:Model 5",
     xlab ="Residuals" )

par(mfrow = c(1,1))
####PRINCIPAL COMPONENT ANALYSES##
pc <- prcomp(mtcars,center = T ,scale. = T)
summary(pc)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.5707 1.6280 0.79196 0.51923 0.47271 0.46000 0.3678
## Proportion of Variance 0.6008 0.2409 0.05702 0.02451 0.02031 0.01924 0.0123
## Cumulative Proportion  0.6008 0.8417 0.89873 0.92324 0.94356 0.96279 0.9751
##                            PC8    PC9    PC10   PC11
## Standard deviation     0.35057 0.2776 0.22811 0.1485
## Proportion of Variance 0.01117 0.0070 0.00473 0.0020
## Cumulative Proportion  0.98626 0.9933 0.99800 1.0000
plot(pc, col= "green")

## Principal component analysis tells us that only two variables explain most of the data##
lr6 <- lm(mpg~cyl+wt,data = mtcars)  ###model based on two variables###
lr6
## 
## Call:
## lm(formula = mpg ~ cyl + wt, data = mtcars)
## 
## Coefficients:
## (Intercept)          cyl           wt  
##      39.686       -1.508       -3.191
summary(lr6)
## 
## Call:
## lm(formula = mpg ~ cyl + wt, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2893 -1.5512 -0.4684  1.5743  6.1004 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  39.6863     1.7150  23.141  < 2e-16 ***
## cyl          -1.5078     0.4147  -3.636 0.001064 ** 
## wt           -3.1910     0.7569  -4.216 0.000222 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.568 on 29 degrees of freedom
## Multiple R-squared:  0.8302, Adjusted R-squared:  0.8185 
## F-statistic: 70.91 on 2 and 29 DF,  p-value: 6.809e-12
anova(lr6)
## Analysis of Variance Table
## 
## Response: mpg
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## cyl        1 817.71  817.71 124.044 5.424e-12 ***
## wt         1 117.16  117.16  17.773  0.000222 ***
## Residuals 29 191.17    6.59                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
confint(lr6)
##                 2.5 %     97.5 %
## (Intercept) 36.178725 43.1937976
## cyl         -2.355928 -0.6596622
## wt          -4.739020 -1.6429245
resid(lr6)
##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##         -1.27914467         -0.46544677         -3.45202624          1.01948376 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##          2.05304242         -1.49872807         -1.93213120          0.92411952 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##         -0.80351937         -0.46254751         -1.86254751          1.76335487 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##          1.57842434         -0.36202705         -0.47129800          0.08393115 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##          4.13184435          5.76505710          1.89833840          6.10035227 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##         -4.28933528         -0.89167980         -1.46291244         -2.07056872 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##          3.84538614         -0.18055052         -0.82640123          1.57285924 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##         -1.70852005         -2.10049885         -1.23213120         -3.38417906
hist(residuals(lr6),col = "purple",
     main = "HIST of MODEL 6:Residuals",
     xlab = "Residuals")

###MODEL BASED ON PC RESULTS###
#the model with the highest adj R^2 is the one we will go with since it predicts most of the data#
#the model built by PC has 2 variables compared to the other 5 models and its adj.R^2 is almost 82% which is almost as good as the best model in the previous 5##
#The model residuals of the PC model is spread around 0 making it a good estimator for our data in question
#thus we can conclude that MILES per GALLON is a fucntion of the cars mass and its cylnders numbers
# the bigger the car the less milage it gets due to its size and number of cylinders##
biplot(pc) ##this will plot for two variables but since most of our data is explained by just 2 this should be sufficient##

###See how individual cases load on the PC##

predict(pc) %>% round(2)
##                       PC1   PC2   PC3   PC4   PC5   PC6   PC7   PC8   PC9  PC10
## Mazda RX4           -0.65  1.71 -0.59  0.11 -0.95 -0.02 -0.43 -0.01  0.15 -0.07
## Mazda RX4 Wag       -0.62  1.53 -0.38  0.20 -1.02 -0.24 -0.42 -0.08  0.07 -0.13
## Datsun 710          -2.74 -0.14 -0.24 -0.25  0.40 -0.35 -0.61  0.59 -0.13  0.05
## Hornet 4 Drive      -0.31 -2.33 -0.13 -0.50  0.55  0.02 -0.04 -0.05  0.22 -0.06
## Hornet Sportabout    1.94 -0.74 -1.12  0.07  0.21  0.15  0.38 -0.16 -0.02 -0.06
## Valiant             -0.06 -2.74  0.16 -0.98  0.21 -0.24 -0.29  0.26 -0.03 -0.20
## Duster 360           2.96  0.33 -0.36 -0.05  0.34  0.71 -0.14 -0.17 -0.18  0.36
## Merc 240D           -2.02 -1.44  0.93 -0.14 -0.32  0.00  0.64  0.16  0.38  0.29
## Merc 230            -2.25 -1.95  1.77  0.29 -0.33 -0.33  0.62 -0.11 -0.86 -0.12
## Merc 280            -0.52 -0.16  1.47  0.07 -0.07  0.82  0.16  0.10  0.54 -0.22
## Merc 280C           -0.50 -0.32  1.66  0.09 -0.15  0.73  0.09  0.20  0.31 -0.34
## Merc 450SE           2.21 -0.67 -0.37 -0.13 -0.38  0.13 -0.02 -0.19 -0.06 -0.07
## Merc 450SL           2.02 -0.67 -0.48 -0.21 -0.36  0.24  0.05 -0.33 -0.21 -0.11
## Merc 450SLC          2.11 -0.79 -0.29 -0.18 -0.43  0.18 -0.07 -0.12 -0.39 -0.21
## Cadillac Fleetwood   3.84 -0.81  0.64  0.29 -0.05 -0.88 -0.17  0.14  0.19  0.06
## Lincoln Continental  3.89 -0.72  0.71  0.41  0.00 -0.86 -0.19  0.13  0.20  0.12
## Chrysler Imperial    3.54 -0.41  0.54  0.67  0.21 -0.65  0.03 -0.39  0.27  0.28
## Fiat 128            -3.80 -0.29 -0.42  0.06  0.22 -0.47 -0.04 -0.63  0.11 -0.03
## Honda Civic         -4.19  0.68 -0.20  1.17  0.10  0.52 -0.25 -0.40  0.24 -0.15
## Toyota Corolla      -4.17 -0.27 -0.46  0.18  0.22 -0.32  0.07 -0.85 -0.11 -0.13
## Toyota Corona       -1.87 -2.09  0.15  0.05  0.04  0.72 -0.28  0.21 -0.45  0.51
## Dodge Challenger     2.15 -1.00 -1.15 -0.58 -0.23  0.11  0.09  0.32  0.10 -0.14
## AMC Javelin          1.83 -0.89 -0.95  0.01 -0.25  0.29  0.08  0.32 -0.12 -0.30
## Camaro Z28           2.84  0.67 -0.16  0.81  0.39  0.95 -0.21  0.04 -0.05  0.33
## Pontiac Firebird     2.21 -0.86 -1.03  0.15  0.30 -0.20  0.47 -0.23  0.21  0.02
## Fiat X1-9           -3.52 -0.12 -0.45 -0.01  0.21 -0.14 -0.36  0.09 -0.02 -0.08
## Porsche 914-2       -2.61  2.01 -0.82  0.57 -0.60 -0.34  0.82  0.63 -0.13  0.35
## Lotus Europa        -3.33  1.36 -0.45 -1.15  0.69  0.02  0.51  0.00  0.30  0.24
## Ford Pantera L       1.35  3.44 -0.13  0.59  1.10 -0.17  0.41  0.61 -0.23 -0.50
## Ferrari Dino         0.00  3.17  0.40 -0.94 -0.85 -0.01  0.03  0.01  0.10  0.14
## Maserati Bora        2.63  4.31  1.33 -0.88  0.46 -0.02 -0.19 -0.56 -0.34  0.05
## Volvo 142E          -2.38  0.23  0.41  0.22  0.32 -0.33 -0.78  0.48 -0.04  0.12
##                      PC11
## Mazda RX4            0.18
## Mazda RX4 Wag        0.09
## Datsun 710          -0.09
## Hornet 4 Drive       0.15
## Hornet Sportabout    0.15
## Valiant              0.02
## Duster 360           0.17
## Merc 240D           -0.02
## Merc 230             0.16
## Merc 280            -0.12
## Merc 280C           -0.03
## Merc 450SE          -0.40
## Merc 450SL          -0.20
## Merc 450SLC         -0.14
## Cadillac Fleetwood   0.26
## Lincoln Continental  0.04
## Chrysler Imperial   -0.22
## Fiat 128            -0.21
## Honda Civic          0.25
## Toyota Corolla      -0.03
## Toyota Corona        0.06
## Dodge Challenger     0.05
## AMC Javelin          0.05
## Camaro Z28          -0.10
## Pontiac Firebird     0.12
## Fiat X1-9           -0.01
## Porsche 914-2       -0.11
## Lotus Europa         0.03
## Ford Pantera L      -0.04
## Ferrari Dino         0.04
## Maserati Bora        0.06
## Volvo 142E          -0.15