1. Before PCA

Load the data

# load the data
help(mtcars)
head(mtcars)

Visualize the correlation between variables

Some questions to guide your analyses:

  • How many variables do you have?
  • Is there a dimensionality problem?
  • Are the variables related?
# visualize the correlation
#install.packages("corrplot")
library(corrplot)
## corrplot 0.84 loaded
M<-cor(mtcars)
head(round(M,2))
##        mpg   cyl  disp    hp  drat    wt  qsec    vs    am  gear  carb
## mpg   1.00 -0.85 -0.85 -0.78  0.68 -0.87  0.42  0.66  0.60  0.48 -0.55
## cyl  -0.85  1.00  0.90  0.83 -0.70  0.78 -0.59 -0.81 -0.52 -0.49  0.53
## disp -0.85  0.90  1.00  0.79 -0.71  0.89 -0.43 -0.71 -0.59 -0.56  0.39
## hp   -0.78  0.83  0.79  1.00 -0.45  0.66 -0.71 -0.72 -0.24 -0.13  0.75
## drat  0.68 -0.70 -0.71 -0.45  1.00 -0.71  0.09  0.44  0.71  0.70 -0.09
## wt   -0.87  0.78  0.89  0.66 -0.71  1.00 -0.17 -0.55 -0.69 -0.58  0.43
# Morrelation Matrix
corrplot(M, method="color")

Set up an objective: Can you predict how fast is a car?

# simple regressions: Choose a dependent variables to demonstrate the objective
summary(lm(formula = "qsec ~ mpg", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg", data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.8161 -1.0287  0.0954  0.8623  4.7149 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 15.35477    1.02978  14.911 2.05e-15 ***
## mpg          0.12414    0.04916   2.525   0.0171 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.65 on 30 degrees of freedom
## Multiple R-squared:  0.1753, Adjusted R-squared:  0.1478 
## F-statistic: 6.377 on 1 and 30 DF,  p-value: 0.01708
summary(lm(formula = "qsec ~ mpg+cyl", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl", data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5454 -0.9929  0.4851  0.7705  3.4270 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.00212    3.28710   7.606 2.19e-08 ***
## mpg         -0.09220    0.08311  -1.109   0.2764    
## cyl         -0.85673    0.28047  -3.055   0.0048 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.459 on 29 degrees of freedom
## Multiple R-squared:  0.376,  Adjusted R-squared:  0.333 
## F-statistic: 8.739 on 2 and 29 DF,  p-value: 0.001071
summary(lm(formula = "qsec ~ mpg+cyl+disp", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp", data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.6385 -1.0082  0.1268  0.8874  3.2562 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 24.553482   3.270432   7.508 3.54e-08 ***
## mpg         -0.053003   0.087735  -0.604  0.55062    
## cyl         -1.158244   0.364015  -3.182  0.00356 ** 
## disp         0.006617   0.005172   1.279  0.21123    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.444 on 28 degrees of freedom
## Multiple R-squared:  0.4105, Adjusted R-squared:  0.3474 
## F-statistic:   6.5 on 3 and 28 DF,  p-value: 0.001777
summary(lm(formula = "qsec ~ mpg+cyl+disp+hp", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp+hp", data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.3832 -0.6694 -0.1863  0.6997  3.4004 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 25.652179   2.684761   9.555 3.74e-10 ***
## mpg         -0.105556   0.072893  -1.448 0.159106    
## cyl         -0.695276   0.320273  -2.171 0.038900 *  
## disp         0.008186   0.004241   1.930 0.064168 .  
## hp          -0.022288   0.005751  -3.875 0.000615 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.178 on 27 degrees of freedom
## Multiple R-squared:  0.6212, Adjusted R-squared:  0.5651 
## F-statistic: 11.07 on 4 and 27 DF,  p-value: 1.91e-05
summary(lm(formula = "qsec ~ mpg+cyl+disp+hp+drat", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp+hp+drat", data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.9439 -0.4343 -0.1150  0.5630  3.3673 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 30.083312   3.441864   8.740 3.23e-09 ***
## mpg         -0.070857   0.071790  -0.987  0.33274    
## cyl         -0.880359   0.320099  -2.750  0.01069 *  
## disp         0.006118   0.004184   1.462  0.15562    
## hp          -0.017102   0.006109  -2.800  0.00952 ** 
## drat        -1.186305   0.616077  -1.926  0.06516 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.123 on 26 degrees of freedom
## Multiple R-squared:  0.6685, Adjusted R-squared:  0.6047 
## F-statistic: 10.49 on 5 and 26 DF,  p-value: 1.362e-05
summary(lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt", data = mtcars)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.60648 -0.52041 -0.02188  0.48331  2.91008 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 22.048508   3.991422   5.524 9.67e-06 ***
## mpg          0.058756   0.075605   0.777  0.44436    
## cyl         -0.654615   0.288404  -2.270  0.03210 *  
## disp        -0.002837   0.004680  -0.606  0.54982    
## hp          -0.013752   0.005432  -2.532  0.01801 *  
## drat        -0.973094   0.541001  -1.799  0.08415 .  
## wt           1.504983   0.493764   3.048  0.00538 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9783 on 25 degrees of freedom
## Multiple R-squared:  0.7583, Adjusted R-squared:  0.7003 
## F-statistic: 13.07 on 6 and 25 DF,  p-value: 1.153e-06
summary(lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs", data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2465 -0.3602 -0.1187  0.2469  2.9281 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.045363   3.647146   4.948 4.74e-05 ***
## mpg          0.046355   0.064921   0.714  0.48210    
## cyl         -0.221732   0.282468  -0.785  0.44015    
## disp        -0.001670   0.004028  -0.415  0.68214    
## hp          -0.013322   0.004658  -2.860  0.00863 ** 
## drat        -0.644776   0.475146  -1.357  0.18741    
## wt           1.289547   0.428645   3.008  0.00608 ** 
## vs           1.722758   0.543967   3.167  0.00416 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8385 on 24 degrees of freedom
## Multiple R-squared:  0.8295, Adjusted R-squared:  0.7798 
## F-statistic: 16.68 on 7 and 24 DF,  p-value: 8.256e-08
summary(lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs+am", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs+am", data = mtcars)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.23411 -0.32872  0.00477  0.26556  2.35665 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.379009   3.396731   5.411 1.69e-05 ***
## mpg          0.076138   0.061936   1.229   0.2314    
## cyl         -0.397493   0.274961  -1.446   0.1618    
## disp        -0.002548   0.003770  -0.676   0.5058    
## hp          -0.008650   0.004837  -1.788   0.0869 .  
## drat        -0.372726   0.459443  -0.811   0.4255    
## wt           1.109605   0.407305   2.724   0.0121 *  
## vs           1.164650   0.567495   2.052   0.0517 .  
## am          -1.188121   0.546546  -2.174   0.0403 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7801 on 23 degrees of freedom
## Multiple R-squared:  0.8586, Adjusted R-squared:  0.8094 
## F-statistic: 17.46 on 8 and 23 DF,  p-value: 4.566e-08
summary(lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs+am+gear", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs+am+gear", data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9749 -0.3050 -0.0830  0.2615  2.4843 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 19.661451   3.557495   5.527 1.49e-05 ***
## mpg          0.077595   0.061550   1.261  0.22063    
## cyl         -0.469658   0.280432  -1.675  0.10814    
## disp        -0.003821   0.003908  -0.978  0.33889    
## hp          -0.005600   0.005500  -1.018  0.31968    
## drat        -0.261221   0.466851  -0.560  0.58145    
## wt           1.147468   0.406045   2.826  0.00984 ** 
## vs           1.100808   0.566617   1.943  0.06495 .  
## am          -0.925655   0.589838  -1.569  0.13084    
## gear        -0.439419   0.385541  -1.140  0.26665    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7751 on 22 degrees of freedom
## Multiple R-squared:  0.8665, Adjusted R-squared:  0.8119 
## F-statistic: 15.86 on 9 and 22 DF,  p-value: 1.099e-07
summary(lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs+am+gear+carb", data = mtcars))
## 
## Call:
## lm(formula = "qsec ~ mpg+cyl+disp+hp+drat+wt+vs+am+gear+carb", 
##     data = mtcars)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.93377 -0.33421 -0.03696  0.31389  2.38743 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.776177   3.875998   4.586  0.00016 ***
## mpg          0.069048   0.061462   1.123  0.27394    
## cyl         -0.362678   0.292621  -1.239  0.22887    
## disp        -0.007501   0.004985  -1.505  0.14730    
## hp          -0.001563   0.006449  -0.242  0.81089    
## drat        -0.131064   0.476002  -0.275  0.78574    
## wt           1.496332   0.500469   2.990  0.00698 ** 
## vs           0.970035   0.572767   1.694  0.10512    
## am          -0.901186   0.585218  -1.540  0.13851    
## gear        -0.201285   0.432798  -0.465  0.64666    
## carb        -0.273598   0.233143  -1.174  0.25373    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7685 on 21 degrees of freedom
## Multiple R-squared:  0.8747, Adjusted R-squared:  0.815 
## F-statistic: 14.66 on 10 and 21 DF,  p-value: 2.438e-07

2. Perform PCA

# install.packages("factoextra")
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.5.2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

Preliminaries

Remove the dependent variable!

drops <- c("qsec")
data <- mtcars[ , !(names(mtcars) %in% drops)]

Compute PCA

The main and built-in function to do PCA is prcomp. If you want to learn more, type: help(prcomp)

res.pca <- prcomp(data, scale = TRUE)
summary(res.pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6    PC7
## Standard deviation     2.5265 1.4507 0.71431 0.51819 0.47059 0.42359 0.3674
## Proportion of Variance 0.6383 0.2105 0.05102 0.02685 0.02215 0.01794 0.0135
## Cumulative Proportion  0.6383 0.8488 0.89982 0.92667 0.94882 0.96676 0.9803
##                            PC8     PC9    PC10
## Standard deviation     0.33929 0.23856 0.15935
## Proportion of Variance 0.01151 0.00569 0.00254
## Cumulative Proportion  0.99177 0.99746 1.00000

Scree plot

How many components should I use?

Use a scree plot to visualize eigenvalues. Show the percentage of variances explained by each principal component.

fviz_eig(res.pca)

Data plot by PCA

Graph of individuals. Individuals with a similar profile are grouped together.

fviz_pca_ind(res.pca,
             col.ind = "cos2", # Color by the quality of representation
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

Variable plot by PCA

Graph of variables. Positive correlated variables point to the same side of the plot. Negative correlated variables point to opposite sides of the graph.

fviz_pca_var(res.pca,
             col.var = "contrib", # Color by contributions to the PC
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE     # Avoid text overlapping
)

Biplot of individuals and variables

Visualize both the data points and the variables by PCA

fviz_pca_biplot(res.pca, repel = TRUE,
                col.var = "#2E9FDF", # Variables color
                col.ind = "#696969"  # Individuals color
)

Extracting the values

How do you get the actual results? The components? the variance explained?

# Results for individuals
res.ind <- get_pca_ind(res.pca)
res.ind$coord          # Coordinates
##                           Dim.1        Dim.2      Dim.3       Dim.4       Dim.5
## Mazda RX4           -0.88932503 -1.365132502 -0.7165487  0.04671982  0.86552907
## Mazda RX4 Wag       -0.79500151 -1.346100834 -0.6333614  0.10600580  0.84591971
## Datsun 710          -2.69205912  0.366502004 -0.1131377 -0.24013425 -0.48934831
## Hornet 4 Drive      -0.02805671  2.188755138  0.1719630 -0.45602689 -0.49977078
## Hornet Sportabout    1.92240577  0.859828167 -0.9403429  0.07094159 -0.15641251
## Valiant              0.32838706  2.366562980  0.3273906 -0.96404089 -0.29162066
## Duster 360           2.77769801 -0.193415173 -0.1381697  0.01890594 -0.06310678
## Merc 240D           -1.75392677  1.213754998  1.0251084 -0.11484231  0.32543053
## Merc 230            -1.66824215  0.920045509  1.1589415  0.22566967  0.16646668
## Merc 280            -0.46335344  0.022369158  1.6480994  0.18536325  0.41016049
## Merc 280C           -0.37753002  0.010889763  1.7010238  0.19393874  0.44445170
## Merc 450SE           2.23368912  0.573400285 -0.3282147 -0.14804885  0.38974352
## Merc 450SL           2.05275224  0.555404338 -0.4731539 -0.23260964  0.39384498
## Merc 450SLC          2.19998216  0.541916946 -0.3774561 -0.20812171  0.44143682
## Cadillac Fleetwood   3.95373834  0.240517052  0.3603170  0.22752849 -0.26445662
## Lincoln Continental  3.98770271  0.168893252  0.4522916  0.35122738 -0.29871529
## Chrysler Imperial    3.57019362  0.007438769  0.3474257  0.63184614 -0.41382047
## Fiat 128            -3.67464746  0.448720063 -0.4592981  0.01109641 -0.39121030
## Honda Civic         -4.21749294 -0.306129629 -0.1273632  1.20409564  0.11619807
## Toyota Corolla      -4.01112383  0.366880951 -0.5969032  0.13165380 -0.35430570
## Toyota Corona       -1.57266468  1.973518077  0.4386018  0.12542926  0.24063905
## Dodge Challenger     2.13158194  1.158538246 -0.8678132 -0.59360347  0.23751811
## AMC Javelin          1.84825333  0.950109422 -0.7817894 -0.00151445  0.33009556
## Camaro Z28           2.60476490 -0.447347975  0.1166588  0.91837052  0.01153168
## Pontiac Firebird     2.20367482  0.931206103 -0.8783982  0.12701030 -0.36106342
## Fiat X1-9           -3.45911838  0.387134033 -0.3532148 -0.01867936 -0.24709148
## Porsche 914-2       -2.87320660 -1.469674939 -0.9241287  0.49309618  0.43501263
## Lotus Europa        -3.54917651 -0.701190903 -0.1388198 -1.09901703 -0.64840939
## Ford Pantera L       0.85012758 -3.053206346 -0.3100419  0.61276640 -1.08936937
## Ferrari Dino        -0.40324800 -2.878891350  0.1603252 -0.97747899  0.77063499
## Maserati Bora        2.11458829 -4.342275621  0.8378585 -0.86436393 -0.46347964
## Volvo 142E          -2.35136674 -0.149019983  0.4121501  0.23681645 -0.39243286
##                           Dim.6        Dim.7        Dim.8        Dim.9
## Mazda RX4           -0.29123992  0.412191499  0.042123003 -0.136187219
## Mazda RX4 Wag       -0.45569630  0.406747419  0.110602298 -0.135424646
## Datsun 710          -0.33776704  0.538024050 -0.596181503  0.068693086
## Hornet 4 Drive       0.12422860  0.042461824  0.092296226 -0.164530041
## Hornet Sportabout    0.35704630 -0.346922689  0.098264233 -0.063709227
## Valiant             -0.18378292  0.263529711 -0.268906985 -0.177026513
## Duster 360           0.76522300  0.184051522  0.002236971  0.357025259
## Merc 240D           -0.36557793 -0.688790065  0.141053958  0.120426485
## Merc 230            -0.06821101 -0.577696934 -0.134842169  0.307244897
## Merc 280             0.30661465 -0.185097532  0.082526904 -0.431028769
## Merc 280C            0.28299028 -0.116598225 -0.108248117 -0.450036087
## Merc 450SE           0.08089811  0.040278973  0.157664645  0.006833937
## Merc 450SL           0.31536038  0.003502477  0.189000003  0.018036162
## Merc 450SLC          0.24767747  0.105183971 -0.083735215 -0.010325290
## Cadillac Fleetwood  -0.89143185  0.111859860  0.049719091 -0.017622440
## Lincoln Continental -0.89894330  0.136173595  0.075259821  0.053291039
## Chrysler Imperial   -0.57353686 -0.030732340  0.590069286  0.201217085
## Fiat 128            -0.15470300  0.093226484  0.639165383 -0.005578360
## Honda Civic          0.43583118  0.300820709  0.349780811 -0.255963807
## Toyota Corolla       0.15718664  0.036125687  0.721990030 -0.007384398
## Toyota Corona        0.54046334  0.284525827 -0.386464231  0.612541706
## Dodge Challenger     0.01693834 -0.126119554 -0.282843679 -0.202489220
## AMC Javelin          0.21732474 -0.096442124 -0.408388102 -0.250659873
## Camaro Z28           0.76306788  0.229217358 -0.138147440  0.279683988
## Pontiac Firebird     0.02538886 -0.449777090  0.311459127 -0.074286572
## Fiat X1-9           -0.07027262  0.347878043 -0.126763847 -0.075823423
## Porsche 914-2       -0.53405226 -0.898520209 -0.464651559  0.361579963
## Lotus Europa         0.20961194 -0.512044495  0.122836491  0.066947379
## Ford Pantera L       0.21996998 -0.447299912 -0.716660716 -0.400361865
## Ferrari Dino        -0.23806774 -0.044166385  0.065472635  0.088870333
## Maserati Bora        0.43559541  0.275262285  0.318548396  0.201777860
## Volvo 142E          -0.43813432  0.709146262 -0.444235749  0.114268571
##                           Dim.10
## Mazda RX4           -0.187999469
## Mazda RX4 Wag       -0.061209605
## Datsun 710           0.080227441
## Hornet 4 Drive      -0.175182955
## Hornet Sportabout   -0.136139445
## Valiant              0.020534587
## Duster 360          -0.227206670
## Merc 240D           -0.083838570
## Merc 230             0.079226798
## Merc 280             0.052498872
## Merc 280C            0.032395223
## Merc 450SE           0.409924947
## Merc 450SL           0.253795570
## Merc 450SLC          0.248500853
## Cadillac Fleetwood  -0.269053922
## Lincoln Continental -0.064445970
## Chrysler Imperial    0.160188571
## Fiat 128             0.223186546
## Honda Civic         -0.253056492
## Toyota Corolla       0.116008466
## Toyota Corona       -0.097466279
## Dodge Challenger    -0.069451487
## AMC Javelin          0.004358759
## Camaro Z28           0.005957542
## Pontiac Firebird    -0.153496927
## Fiat X1-9            0.016554829
## Porsche 914-2        0.055597021
## Lotus Europa        -0.146731481
## Ford Pantera L       0.130600079
## Ferrari Dino        -0.082957767
## Maserati Bora        0.006035215
## Volvo 142E           0.112645720
res.ind$contrib        # Contributions to the PCs
##                            Dim.1        Dim.2       Dim.3        Dim.4
## Mazda RX4           0.3871838061 2.767188e+00  3.14461150 2.540276e-02
## Mazda RX4 Wag       0.3094083787 2.690570e+00  2.45685073 1.307789e-01
## Datsun 710          3.5478507632 1.994535e-01  0.07839541 6.710992e-01
## Hornet 4 Drive      0.0003853627 7.113501e+00  0.18111129 2.420246e+00
## Hornet Sportabout   1.8091987512 1.097773e+00  5.41561818 5.857070e-02
## Valiant             0.0527920132 8.316205e+00  0.65645965 1.081607e+01
## Duster 360          3.7771672300 5.554822e-02  0.11692331 4.159826e-03
## Merc 240D           1.5059795311 2.187515e+00  6.43598705 1.534908e-01
## Merc 230            1.3624304461 1.256920e+00  8.22618772 5.926863e-01
## Merc 280            0.1051044087 7.429992e-04 16.63574950 3.998764e-01
## Merc 280C           0.0697748884 1.760864e-04 17.72133162 4.377314e-01
## Merc 450SE          2.4425396020 4.882080e-01  0.65976849 2.550872e-01
## Merc 450SL          2.0628574986 4.580444e-01  1.37113562 6.297004e-01
## Merc 450SLC         2.3693786892 4.360683e-01  0.87258625 5.040961e-01
## Cadillac Fleetwood  7.6526527247 8.589755e-02  0.79514251 6.024904e-01
## Lincoln Continental 7.7846968513 4.235587e-02  1.25288920 1.435672e+00
## Chrysler Imperial   6.2399297185 8.216583e-05  0.73926377 4.646235e+00
## Fiat 128            6.6103966124 2.989785e-01  1.29200686 1.432990e-03
## Honda Civic         8.7077286287 1.391553e-01  0.09934894 1.687332e+01
## Toyota Corolla      7.8764096748 1.998662e-01  2.18214390 2.017183e-01
## Toyota Corona       1.2107888898 5.783241e+00  1.17819286 1.830949e-01
## Dodge Challenger    2.2243351349 1.993011e+00  4.61241155 4.100827e+00
## AMC Javelin         1.6723191044 1.340406e+00  3.74330391 2.669245e-05
## Camaro Z28          3.3214920816 2.971529e-01  0.08335102 9.815545e+00
## Pontiac Firebird    2.3773393441 1.287599e+00  4.72561603 1.877398e-01
## Fiat X1-9           5.8576984268 2.225419e-01  0.76410554 4.060716e-03
## Porsche 914-2       4.0413816884 3.207241e+00  5.23046758 2.829709e+00
## Lotus Europa        6.1666791673 7.300651e-01  0.11802619 1.405683e+01
## Ford Pantera L      0.3538053276 1.384208e+01  0.58873035 4.369869e+00
## Ferrari Dino        0.0796050477 1.230664e+01  0.15742692 1.111971e+01
## Maserati Bora       2.1890102981 2.799779e+01  4.29948977 8.695044e+00
## Volvo 142E          2.7066799096 3.297449e-02  1.04036678 6.526830e-01
##                            Dim.5       Dim.6        Dim.7        Dim.8
## Mazda RX4           10.571240500  1.47728489 3.934096e+00 4.816748e-02
## Mazda RX4 Wag       10.097664340  3.61670802 3.830862e+00 3.320807e-01
## Datsun 710           3.379082678  1.98699594 6.702708e+00 9.648777e+00
## Hornet 4 Drive       3.524555521  0.26878537 4.174882e-02 2.312508e-01
## Hornet Sportabout    0.345227686  2.22029926 2.786842e+00 2.621237e-01
## Valiant              1.200049643  0.58826466 1.608074e+00 1.962996e+00
## Duster 360           0.056197205 10.19855084 7.843786e-01 1.358426e-04
## Merc 240D            1.494442569  2.32767523 1.098552e+01 5.401145e-01
## Merc 230             0.391036479  0.08103477 7.727642e+00 4.935904e-01
## Merc 280             2.373942998  1.63737534 7.933195e-01 1.848870e-01
## Merc 280C            2.787480098  1.39477922 3.147971e-01 3.180944e-01
## Merc 450SE           2.143485035  0.11398276 3.756675e-02 6.748137e-01
## Merc 450SL           2.188836262  1.73211488 2.840518e-04 9.697033e-01
## Merc 450SLC          2.749791402  1.06840383 2.561805e-01 1.903407e-01
## Cadillac Fleetwood   0.986896286 13.84008437 2.897313e-01 6.710604e-02
## Lincoln Continental  1.259150237 14.07430750 4.293710e-01 1.537594e-01
## Chrysler Imperial    2.416498808  5.72908293 2.186947e-02 9.451947e+00
## Fiat 128             2.159649184  0.41683091 2.012452e-01 1.109026e+01
## Honda Civic          0.190528720  3.30825538 2.095378e+00 3.321293e+00
## Toyota Corolla       1.771409255  0.43032217 3.021893e-02 1.415069e+01
## Toyota Corona        0.817136916  5.08738868 1.874521e+00 4.054468e+00
## Dodge Challenger     0.796078839  0.00499694 3.683085e-01 2.171742e+00
## AMC Javelin          1.537595154  0.82258499 2.153676e-01 4.527531e+00
## Camaro Z28           0.001876496 10.14118684 1.216583e+00 5.180849e-01
## Pontiac Firebird     1.839626397  0.01122661 4.684266e+00 2.633403e+00
## Fiat X1-9            0.861545409  0.08600718 2.802212e+00 4.362205e-01
## Porsche 914-2        2.670338886  4.96740938 1.869402e+01 5.860978e+00
## Lotus Europa         5.932820872  0.76523440 6.071029e+00 4.096096e-01
## Ford Pantera L      16.746072290  0.84273161 4.632810e+00 1.394256e+01
## Ferrari Dino         8.380310104  0.98710546 4.516797e-02 1.163684e-01
## Maserati Bora        3.031265409  3.30467706 1.754447e+00 2.754648e+00
## Volvo 142E           2.173168321  3.34331257 1.164444e+01 5.357254e+00
##                            Dim.9       Dim.10
## Mazda RX4            1.018447128  4.349955043
## Mazda RX4 Wag        1.007073574  0.461116517
## Datsun 710           0.259114574  0.792168199
## Hornet 4 Drive       1.486470156  3.777071606
## Hornet Sportabout    0.222879668  2.281074525
## Valiant              1.720847614  0.051897144
## Duster 360           6.999446020  6.353509421
## Merc 240D            0.796360669  0.865085927
## Merc 230             5.183643669  0.772530675
## Merc 280            10.201835071  0.339212200
## Merc 280C           11.121425615  0.129161613
## Merc 450SE           0.002564530 20.681413681
## Merc 450SL           0.017862986  7.927561070
## Merc 450SLC          0.005854237  7.600239754
## Cadillac Fleetwood   0.017052885  8.909435287
## Lincoln Continental  0.155945973  0.511167289
## Chrysler Imperial    2.223286787  3.158163269
## Fiat 128             0.001708751  6.130664434
## Honda Civic          3.597683079  7.881456617
## Toyota Corolla       0.002994305  1.656345344
## Toyota Corona       20.603315352  1.169177081
## Dodge Challenger     2.251487788  0.593655675
## AMC Javelin          3.450129637  0.002338284
## Camaro Z28           4.295374067  0.004368233
## Pontiac Firebird     0.303030556  2.899819013
## Fiat X1-9            0.315698529  0.033730356
## Porsche 914-2        7.179174244  0.380429827
## Lotus Europa         0.246112089  2.649830671
## Ford Pantera L       8.801793812  2.099222150
## Ferrari Dino         0.433689986  0.847004325
## Maserati Bora        2.235696284  0.004482880
## Volvo 142E           0.717000367  1.561711889
res.ind$cos2           # Quality of representation 
##                            Dim.1        Dim.2        Dim.3        Dim.4
## Mazda RX4           0.1869897289 4.406019e-01 0.1213914670 5.160586e-04
## Mazda RX4 Wag       0.1588247663 4.553418e-01 0.1008058000 2.823850e-03
## Datsun 710          0.8564813396 1.587455e-02 0.0015127382 6.814860e-03
## Hornet 4 Drive      0.0001468001 8.934024e-01 0.0055147103 3.878231e-02
## Hornet Sportabout   0.6565625298 1.313437e-01 0.1570935835 8.941036e-04
## Valiant             0.0153236622 7.958416e-01 0.0152308093 1.320631e-01
## Duster 360          0.8997809855 4.362625e-03 0.0022263441 4.168342e-05
## Merc 240D           0.4830120178 2.313114e-01 0.1649964588 2.070803e-03
## Merc 230            0.5052209356 1.536674e-01 0.2438294780 9.245056e-03
## Merc 280            0.0620932316 1.447167e-04 0.7855725439 9.937263e-03
## Merc 280C           0.0398096860 3.312246e-05 0.8081771172 1.050546e-02
## Merc 450SE          0.8601158022 5.667967e-02 0.0185706659 3.778519e-03
## Merc 450SL          0.8173732882 5.983646e-02 0.0434262392 1.049549e-02
## Merc 450SLC         0.8557859175 5.192689e-02 0.0251918241 7.658815e-03
## Cadillac Fleetwood  0.9291645398 3.438496e-03 0.0077169589 3.077148e-03
## Lincoln Continental 0.9252340781 1.659704e-03 0.0119026367 7.177659e-03
## Chrysler Imperial   0.8987813584 3.901869e-06 0.0085112701 2.815096e-02
## Fiat 128            0.9274375848 1.382943e-02 0.0144891394 8.457034e-06
## Honda Civic         0.8941516203 4.711001e-03 0.0008154365 7.288269e-02
## Toyota Corolla      0.9308853294 7.787793e-03 0.0206144556 1.002838e-03
## Toyota Corona       0.3279692915 5.164677e-01 0.0255094877 2.086213e-03
## Dodge Challenger    0.6319609007 1.866839e-01 0.1047462861 4.900936e-02
## AMC Javelin         0.6415015412 1.695204e-01 0.1147768326 4.307096e-07
## Camaro Z28          0.7913019890 2.333975e-02 0.0015872326 9.836511e-02
## Pontiac Firebird    0.6966823852 1.244032e-01 0.1106936422 2.314290e-03
## Fiat X1-9           0.9611162763 1.203837e-02 0.0100212724 2.802652e-05
## Porsche 914-2       0.6280702830 1.643301e-01 0.0649739822 1.849856e-02
## Lotus Europa        0.8351505090 3.259734e-02 0.0012776514 8.007896e-02
## Ford Pantera L      0.0571655153 7.373577e-01 0.0076033813 2.969996e-02
## Ferrari Dino        0.0160945332 8.203222e-01 0.0025441188 9.456900e-02
## Maserati Bora       0.1760529010 7.423801e-01 0.0276396727 2.941608e-02
## Volvo 142E          0.8072570001 3.242352e-03 0.0248017308 8.188320e-03
##                            Dim.5        Dim.6        Dim.7        Dim.8
## Mazda RX4           1.771169e-01 2.005389e-02 4.016935e-02 4.195033e-04
## Mazda RX4 Wag       1.798211e-01 5.218351e-02 4.157497e-02 3.074048e-03
## Datsun 710          2.829990e-02 1.348290e-02 3.420992e-02 4.200546e-02
## Hornet 4 Drive      4.657946e-02 2.878034e-03 3.362405e-04 1.588622e-03
## Hornet Sportabout   4.346393e-03 2.264827e-02 2.138215e-02 1.715445e-03
## Valiant             1.208446e-02 4.799553e-03 9.868467e-03 1.027530e-02
## Duster 360          4.644285e-04 6.828775e-02 3.950441e-03 5.835627e-07
## Merc 240D           1.662844e-02 2.098432e-02 7.449185e-02 3.123961e-03
## Merc 230            5.030577e-03 8.446410e-04 6.058474e-02 3.300765e-03
## Merc 280            4.865496e-02 2.718975e-02 9.908793e-03 1.969748e-03
## Merc 280C           5.517405e-02 2.236808e-02 3.797254e-03 3.272853e-03
## Merc 450SE          2.618601e-02 1.128205e-03 2.796844e-04 4.285289e-03
## Merc 450SL          3.008832e-02 1.929131e-02 2.379567e-06 6.929007e-03
## Merc 450SLC         3.445593e-02 1.084675e-02 1.956257e-03 1.239776e-03
## Cadillac Fleetwood  4.157054e-03 4.723382e-02 7.437472e-04 1.469343e-04
## Lincoln Continental 5.191836e-03 4.701871e-02 1.078927e-03 3.295584e-04
## Chrysler Imperial   1.207520e-02 2.319494e-02 6.659809e-05 2.455142e-02
## Fiat 128            1.051172e-02 1.643807e-03 5.969417e-04 2.805949e-02
## Honda Civic         6.787350e-04 9.548585e-03 4.549021e-03 6.150273e-03
## Toyota Corolla      7.263071e-03 1.429537e-03 7.550858e-05 3.015966e-02
## Toyota Corona       7.678796e-03 3.873409e-02 1.073506e-02 1.980521e-02
## Dodge Challenger    7.846565e-03 3.990505e-05 2.212336e-03 1.112703e-02
## AMC Javelin         2.046228e-02 8.869376e-03 1.746659e-03 3.131994e-02
## Camaro Z28          1.550923e-05 6.790966e-02 6.127738e-03 2.225824e-03
## Pontiac Firebird    1.870281e-02 9.247539e-05 2.902248e-02 1.391688e-02
## Fiat X1-9           4.904115e-03 3.966590e-04 9.720735e-03 1.290731e-03
## Porsche 914-2       1.439721e-02 2.169912e-02 6.142289e-02 1.642591e-02
## Lotus Europa        2.787457e-02 2.913007e-03 1.738301e-02 1.000379e-03
## Ford Pantera L      9.386771e-02 3.827302e-03 1.582572e-02 4.062498e-02
## Ferrari Dino        5.878024e-02 5.609641e-03 1.930713e-04 4.242810e-04
## Maserati Bora       8.457711e-03 7.470645e-03 2.983218e-03 3.995235e-03
## Volvo 142E          2.248546e-02 2.802758e-02 7.342479e-02 2.881364e-02
##                            Dim.9       Dim.10
## Mazda RX4           4.384998e-03 8.356222e-03
## Mazda RX4 Wag       4.608692e-03 9.415025e-04
## Datsun 710          5.576668e-04 7.606670e-04
## Hornet 4 Drive      5.048277e-03 5.723167e-03
## Hornet Sportabout   7.210919e-04 3.292713e-03
## Valiant             4.453148e-03 5.991871e-05
## Duster 360          1.486499e-02 6.020172e-03
## Merc 240D           2.277084e-03 1.103628e-03
## Merc 230            1.713691e-02 1.139482e-03
## Merc 280            5.373188e-02 7.971121e-04
## Merc 280C           5.656925e-02 2.931215e-04
## Merc 450SE          8.051066e-06 2.896811e-02
## Merc 450SL          6.310091e-05 1.249440e-02
## Merc 450SLC         1.885087e-05 1.091899e-02
## Cadillac Fleetwood  1.845903e-05 4.302842e-03
## Lincoln Continental 1.652397e-04 2.416560e-04
## Chrysler Imperial   2.854961e-03 1.809396e-03
## Fiat 128            2.137304e-06 3.421286e-03
## Honda Civic         3.293514e-03 3.219122e-03
## Toyota Corolla      3.154963e-06 7.786520e-04
## Toyota Corona       4.975449e-02 1.259707e-03
## Dodge Challenger    5.702824e-03 6.708868e-04
## AMC Javelin         1.179898e-02 3.567799e-06
## Camaro Z28          9.123057e-03 4.139420e-06
## Pontiac Firebird    7.916996e-04 3.380175e-03
## Fiat X1-9           4.617978e-04 2.201377e-05
## Porsche 914-2       9.946797e-03 2.351676e-04
## Lotus Europa        2.971507e-04 1.427434e-03
## Ford Pantera L      1.267860e-02 1.349127e-03
## Ferrari Dino        7.817134e-04 6.811583e-04
## Maserati Bora       1.603019e-03 1.434092e-06
## Volvo 142E          1.906447e-03 1.852680e-03
dataafter <- data.frame(qsec=mtcars$qsec, res.ind$coord )
dataafter

3. After PCA

After doing PCA, we can use the dimensions in our regression to ‘explain’ how fast cars are.

# simple regressions:
summary(lm(formula = "qsec ~ Dim.1", data = dataafter))
## 
## Call:
## lm(formula = "qsec ~ Dim.1", data = dataafter)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0874 -1.0936  0.0239  0.8545  4.5384 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  17.8488     0.2892  61.721   <2e-16 ***
## Dim.1        -0.3074     0.1163  -2.644   0.0129 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.636 on 30 degrees of freedom
## Multiple R-squared:  0.189,  Adjusted R-squared:  0.1619 
## F-statistic:  6.99 on 1 and 30 DF,  p-value: 0.01291
summary(lm(formula = "qsec ~ Dim.1+Dim.2", data = dataafter))
## 
## Call:
## lm(formula = "qsec ~ Dim.1+Dim.2", data = dataafter)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.4271 -0.5386 -0.0752  0.4271  3.7342 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.84875    0.18111  98.549  < 2e-16 ***
## Dim.1       -0.30745    0.07283  -4.221 0.000219 ***
## Dim.2        0.87405    0.12684   6.891 1.43e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.025 on 29 degrees of freedom
## Multiple R-squared:  0.6925, Adjusted R-squared:  0.6713 
## F-statistic: 32.65 on 2 and 29 DF,  p-value: 3.751e-08
summary(lm(formula = "qsec ~ Dim.1+Dim.2+Dim.3", data = dataafter))
## 
## Call:
## lm(formula = "qsec ~ Dim.1+Dim.2+Dim.3", data = dataafter)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.3493 -0.3696 -0.1109  0.3834  2.7168 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 17.84875    0.14272 125.059  < 2e-16 ***
## Dim.1       -0.30745    0.05739  -5.357 1.05e-05 ***
## Dim.2        0.87405    0.09996   8.744 1.71e-09 ***
## Dim.3        0.87785    0.20300   4.324 0.000175 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8074 on 28 degrees of freedom
## Multiple R-squared:  0.8156, Adjusted R-squared:  0.7959 
## F-statistic: 41.29 on 3 and 28 DF,  p-value: 2.069e-10