pca<-prcomp(SamsungData)
smy<-summary(pca)
Req1 - Loaded the data set
Req2 - Obtained R object of PCA
Req3 - Variance on PC1 - 62.23%
Req4 - PC69 has cumulative variance of 95.066%.
Req 5 - First 10 principal components:
# extracting first 10 components
ten<-smy$importance[,1:10]
ten
## PC1 PC2 PC3 PC4 PC5
## Standard deviation 5.809166 1.608798 1.47618 0.9943542 0.9347122
## Proportion of Variance 0.622270 0.047730 0.04018 0.0182300 0.0161100
## Cumulative Proportion 0.622270 0.670000 0.71018 0.7284100 0.7445200
## PC6 PC7 PC8 PC9 PC10
## Standard deviation 0.8073169 0.8023245 0.763973 0.7218214 0.693615
## Proportion of Variance 0.0120200 0.0118700 0.010760 0.0096100 0.008870
## Cumulative Proportion 0.7565400 0.7684100 0.779170 0.7887800 0.797650
Req 6: Eigen value calculation for first 10 components
#subsetting row 1 from the above table to extract standard devs.
stdev<-ten[1,]
stdev
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## 5.8091660 1.6087975 1.4761804 0.9943542 0.9347122 0.8073169 0.8023245
## PC8 PC9 PC10
## 0.7639730 0.7218214 0.6936150
#calculating eigen values for first 10 components using std dev values
ev<-stdev^2
ev
## PC1 PC2 PC3 PC4 PC5 PC6
## 33.7464094 2.5882294 2.1791085 0.9887402 0.8736869 0.6517605
## PC7 PC8 PC9 PC10
## 0.6437246 0.5836548 0.5210261 0.4811018
Plot data:
#subsetting row 2 from the above table to extract variances
myvariance<-ten[2,]
#Subsetting cumulative variance (row 3)
cumulative_Var<-ten[3,]
#getting std dev for all components
all_Stdev<-smy$importance[1,]
#eigen value for all components
all_Eigen<-all_Stdev^2
#eV above zero
ev_abovezero<-subset(all_Eigen, all_Eigen>0.0)
Req7 - Plots
Req. 9 - Maximum contributors in first 10 components:
#calculating maximum contributors in each first 10 components
maxContributors<-vector()
for (i in 1:10){
maxContributors[i]<-names(which.max(abs(pca$rotation[,i])))
}
maxContributors
## [1] "fBodyAccJerk-entropy()-X" "tGravityAcc-energy()-X"
## [3] "tGravityAcc-energy()-X" "tBodyGyroMag-entropy()"
## [5] "tGravityAcc-correlation()-X,Z" "tGravityAcc-correlation()-X,Z"
## [7] "tGravityAcc-correlation()-X,Y" "tGravityAcc-correlation()-X,Y"
## [9] "angle(tBodyGyroMean,gravityMean)" "angle(tBodyGyroMean,gravityMean)"
Req 10 - Features most frequently occuring in first 10 components:
summary(as.factor(maxContributors))
## angle(tBodyGyroMean,gravityMean) fBodyAccJerk-entropy()-X
## 2 1
## tBodyGyroMag-entropy() tGravityAcc-correlation()-X,Y
## 1 2
## tGravityAcc-correlation()-X,Z tGravityAcc-energy()-X
## 2 2
We can see that there are 4 features that occur 2 times each in first ten components