Ashish Dalal
30 November, 2015
#clearing the workspace prior to start
rm(list = ls())
#loading up libraries
library(stats)
library(graphics)
#reading the dataset
data <- read.csv("~/Desktop/data.csv")
#checking the dimension of data
dim(data)
[1] 768 10
#getting brief data description
names(data)
[1] "Compactness" "SurfaceArea" "WallArea" "RoofArea"
[5] "OverallHeight" "Orientation" "GlazingArea" "Gadistribution"
[9] "HeatingLoad" "CoolingLoad"
kable(summary(data[,1:7]))
| Compactness | SurfaceArea | WallArea | RoofArea | OverallHeight | Orientation | GlazingArea | |
|---|---|---|---|---|---|---|---|
| Min. :0.6200 | Min. :514.5 | Min. :245.0 | Min. :110.2 | Min. :3.50 | Min. :2.00 | Min. :0.0000 | |
| 1st Qu.:0.6825 | 1st Qu.:606.4 | 1st Qu.:294.0 | 1st Qu.:140.9 | 1st Qu.:3.50 | 1st Qu.:2.75 | 1st Qu.:0.1000 | |
| Median :0.7500 | Median :673.8 | Median :318.5 | Median :183.8 | Median :5.25 | Median :3.50 | Median :0.2500 | |
| Mean :0.7642 | Mean :671.7 | Mean :318.5 | Mean :176.6 | Mean :5.25 | Mean :3.50 | Mean :0.2344 | |
| 3rd Qu.:0.8300 | 3rd Qu.:741.1 | 3rd Qu.:343.0 | 3rd Qu.:220.5 | 3rd Qu.:7.00 | 3rd Qu.:4.25 | 3rd Qu.:0.4000 | |
| Max. :0.9800 | Max. :808.5 | Max. :416.5 | Max. :220.5 | Max. :7.00 | Max. :5.00 | Max. :0.4000 |
kable(summary(data[,8:10]))
| Gadistribution | HeatingLoad | CoolingLoad | |
|---|---|---|---|
| Min. :0.000 | Min. : 6.01 | Min. :10.90 | |
| 1st Qu.:1.750 | 1st Qu.:12.99 | 1st Qu.:15.62 | |
| Median :3.000 | Median :18.95 | Median :22.08 | |
| Mean :2.812 | Mean :22.31 | Mean :24.59 | |
| 3rd Qu.:4.000 | 3rd Qu.:31.67 | 3rd Qu.:33.13 | |
| Max. :5.000 | Max. :43.10 | Max. :48.03 |
pr.out <- prcomp(data, scale=TRUE)
kable(pr.out$rotation[,1:5])
| PC1 | PC2 | PC3 | PC4 | PC5 | |
|---|---|---|---|---|---|
| Compactness | -0.3782382 | 0.3780101 | -0.0961734 | 0.0004256 | -0.0056578 |
| SurfaceArea | 0.3876436 | -0.3621610 | 0.0909008 | -0.0004457 | 0.0038601 |
| WallArea | -0.1062748 | -0.6842761 | 0.3728286 | -0.0074353 | -0.1388195 |
| RoofArea | 0.4293324 | -0.0226808 | -0.0914195 | 0.0031563 | 0.0708081 |
| OverallHeight | -0.4291436 | 0.0019210 | 0.0925547 | -0.0028939 | -0.0627936 |
| Orientation | -0.0011190 | -0.0049823 | 0.0011794 | 0.9998752 | -0.0038127 |
| GlazingArea | -0.0463028 | -0.3041805 | -0.6465749 | -0.0000632 | 0.6363451 |
| Gadistribution | -0.0154822 | -0.1882539 | -0.6382946 | -0.0029063 | -0.7459251 |
| HeatingLoad | -0.4021382 | -0.2718396 | -0.0304048 | -0.0063587 | 0.0774642 |
| CoolingLoad | -0.4034644 | -0.2351787 | 0.0125631 | 0.0112517 | 0.0661125 |
kable(pr.out$rotation[,5:10])
| PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | |
|---|---|---|---|---|---|---|
| Compactness | -0.0056578 | -0.3088552 | 0.3809042 | -0.0194190 | 0.6811065 | 0.0000000 |
| SurfaceArea | 0.0038601 | 0.1313702 | -0.0320973 | -0.0021671 | 0.5065943 | 0.6598204 |
| WallArea | -0.1388195 | -0.4789628 | 0.1286283 | 0.0476003 | 0.0844790 | -0.3267898 |
| RoofArea | 0.0708081 | 0.3594229 | -0.0934213 | -0.0251022 | 0.4531998 | -0.6766428 |
| OverallHeight | -0.0627936 | 0.0113953 | -0.8504495 | 0.1171563 | 0.2572829 | 0.0000000 |
| Orientation | -0.0038127 | -0.0083547 | -0.0041101 | -0.0109894 | -0.0001494 | 0.0000000 |
| GlazingArea | 0.6363451 | -0.2607372 | -0.0899318 | 0.0786798 | -0.0086035 | 0.0000000 |
| Gadistribution | -0.7459251 | -0.0075680 | -0.0045351 | 0.0202427 | -0.0007590 | 0.0000000 |
| HeatingLoad | 0.0774642 | 0.3848062 | 0.0982822 | -0.7741000 | 0.0213652 | 0.0000000 |
| CoolingLoad | 0.0661125 | 0.5589665 | 0.2959053 | 0.6140397 | 0.0142768 | 0.0000000 |
biplot(pr.out, scale=0)
pr.var <- pr.out$sdev^2
pr.var
[1] 5.222885e+00 1.533386e+00 1.218894e+00 1.000177e+00 8.047700e-01
[6] 1.630918e-01 3.308280e-02 1.935870e-02 4.353904e-03 4.582035e-30
pve <- pr.var / sum(pr.var)
pve * 100
[1] 5.222885e+01 1.533386e+01 1.218894e+01 1.000177e+01 8.047700e+00
[6] 1.630918e+00 3.308280e-01 1.935870e-01 4.353904e-02 4.582035e-29
library(FactoMineR)
PCA(data)
**Results for the Principal Component Analysis (PCA)**
The analysis was performed on 768 individuals, described by 10 variables
*The results are available in the following objects:
name description
1 "$eig" "eigenvalues"
2 "$var" "results for the variables"
3 "$var$coord" "coord. for the variables"
4 "$var$cor" "correlations variables - dimensions"
5 "$var$cos2" "cos2 for the variables"
6 "$var$contrib" "contributions of the variables"
7 "$ind" "results for the individuals"
8 "$ind$coord" "coord. for the individuals"
9 "$ind$cos2" "cos2 for the individuals"
10 "$ind$contrib" "contributions of the individuals"
11 "$call" "summary statistics"
12 "$call$centre" "mean of the variables"
13 "$call$ecart.type" "standard error of the variables"
14 "$call$row.w" "weights for the individuals"
15 "$call$col.w" "weights for the variables"