S <- matrix(c(5, 0, 0, 0, 9, 0, 0, 0, 9), nrow=3, byrow=TRUE)
eigen_result <- eigen(S)
eigenvalues <- eigen_result$values
eigenvectors <- eigen_result$vectors
eigenvalues
## [1] 9 9 5
eigenvectors
## [,1] [,2] [,3]
## [1,] 0 0 1
## [2,] 0 1 0
## [3,] 1 0 0
pc <- princomp(covmat = S)
summary(pc, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3
## Standard deviation 3.0000000 3.0000000 2.2360680
## Proportion of Variance 0.3913043 0.3913043 0.2173913
## Cumulative Proportion 0.3913043 0.7826087 1.0000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3
## [1,] 1
## [2,] 1
## [3,] 1
S <- matrix(c(36, 5, 5, 4), nrow=2, byrow=TRUE)
pc_result <- princomp(covmat = S)
summary(pc_result, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2
## Standard deviation 6.0632545 1.79915130
## Proportion of Variance 0.9190764 0.08092363
## Cumulative Proportion 0.9190764 1.00000000
##
## Loadings:
## Comp.1 Comp.2
## [1,] 0.989 0.151
## [2,] 0.151 -0.989
R <- cov2cor(S)
R
## [,1] [,2]
## [1,] 1.0000000 0.4166667
## [2,] 0.4166667 1.0000000
pc_result_R <- princomp(covmat = R)
summary(pc_result_R, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2
## Standard deviation 1.1902381 0.7637626
## Proportion of Variance 0.7083333 0.2916667
## Cumulative Proportion 0.7083333 1.0000000
##
## Loadings:
## Comp.1 Comp.2
## [1,] 0.707 0.707
## [2,] 0.707 -0.707
The PCA in (a) uses a covariance matrix while in (c) correlation matrix is used.Performing PCA on the covariance matrix emphasizes variables with larger variances, while using the correlation matrix provides a more balanced view of the relationships between variables.
# Define the correlation matrix
my.cor.mat <- matrix(c(1, .402, .396, .301, .305, .339, .340,
.402, 1, .618, .150, .135, .206, .183,
.396, .618, 1, .321, .289, .363, .345,
.301, .150, .321, 1, .846, .759, .661,
.305, .135, .289, .846, 1, .797, .800,
.339, .206, .363, .759, .797, 1, .736,
.340, .183, .345, .661, .800, .736, 1), nrow=7, byrow=TRUE)
# Perform PCA using the correlation matrix
pc_result <- princomp(covmat = my.cor.mat)
# Print the summary of PCA results
summary(pc_result, loadings = TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.9492241 1.2256950 0.80610632 0.6000474 0.58237656
## Proportion of Variance 0.5427821 0.2146183 0.09282963 0.0514367 0.04845178
## Cumulative Proportion 0.5427821 0.7574004 0.85023003 0.9016667 0.95011851
## Comp.6 Comp.7
## Standard deviation 0.48502898 0.33751644
## Proportion of Variance 0.03360759 0.01627391
## Cumulative Proportion 0.98372609 1.00000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## [1,] 0.276 0.365 0.882
## [2,] 0.212 0.639 -0.258 -0.687
## [3,] 0.295 0.512 -0.381 0.699 0.101
## [4,] 0.438 -0.235 -0.102 0.619 0.318 -0.503
## [5,] 0.456 -0.277 -0.113 0.290 0.785
## [6,] 0.450 -0.178 -0.870
## [7,] 0.436 -0.180 -0.770 0.233 -0.353
# Scree plot to determine the number of components to retain
plot(1:length(pc_result$sdev), (pc_result$sdev)^2, type='b',
main="Scree Plot", xlab="Number of Components", ylab="Eigenvalue Size")
food.full <- read.table("D:\\Semester1.3\\Multivariate\\Datasets\\foodstuffs.txt",header = TRUE)
food.labels <- as.character(food.full[,1])
food.data <- food.full[,-1]
food.pc <- princomp(food.data, cor=TRUE)
summary(food.pc, loadings=TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.4824903 1.0696751 0.9211811 0.8988007 0.0400021115
## Proportion of Variance 0.4395555 0.2288410 0.1697149 0.1615686 0.0003200338
## Cumulative Proportion 0.4395555 0.6683965 0.8381114 0.9996800 1.0000000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Energy 0.654 0.149 0.199 0.709
## Protein 0.151 -0.691 -0.463 0.525 -0.104
## Fat 0.639 0.202 0.216 0.134 -0.697
## Calcium -0.355 0.652 0.670
## Iron -0.122 0.689 -0.540 0.468
plot(food.pc$scores[,1], food.pc$scores[,2], xlab="PC 1", ylab="PC 2")
text(food.pc$scores[,1], food.pc$scores[,2], labels=food.labels, cex=0.7)
a)Perform an appropriate PCA on the dataset, including the appropriate display of PC scores. Identify any notable outliers.
USairpol.full <- read.table("D:\\Semester1.3\\Multivariate\\Datasets\\USairpol.txt", header=T)
city.names <- as.character(USairpol.full[,1])
USairpol.data <- USairpol.full[,-1]
# Perform PCA on the full dataset
USairpol.pc <- princomp(USairpol.data, cor=TRUE)
# Print the summary of PCA results
summary(USairpol.pc, loadings=TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.6517021 1.2297702 1.1810897 0.9444529 0.58887916
## Proportion of Variance 0.3897314 0.2160478 0.1992819 0.1274273 0.04953981
## Cumulative Proportion 0.3897314 0.6057792 0.8050611 0.9324884 0.98202821
## Comp.6 Comp.7
## Standard deviation 0.3166822 0.159733920
## Proportion of Variance 0.0143268 0.003644989
## Cumulative Proportion 0.9963550 1.000000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## SO2 0.490 0.404 0.730 0.183 0.150
## Neg.Temp 0.315 -0.677 0.185 -0.162 -0.611
## Manuf 0.541 -0.226 0.267 -0.164 -0.745
## Pop 0.488 -0.282 0.345 -0.113 -0.349 0.649
## Wind 0.250 -0.311 -0.862 0.268 0.150
## Precip 0.626 0.492 -0.184 0.161 -0.554
## Days 0.260 0.678 -0.110 0.110 -0.440 0.505
# Plot PC scores
plot(USairpol.pc$scores[,1], USairpol.pc$scores[,2], xlab="PC 1", ylab="PC 2")
text(USairpol.pc$scores[,1], USairpol.pc$scores[,2], labels=city.names, cex=0.7)
Chicago, Phoenix, Philadelphia are notable outliers
b)Perform another PCA after removing one or two of the most severe outliers.
outlier_indices <- c(1,11,29)
# Remove outliers from the dataset
USairpol.data.reduced <- USairpol.data[-outlier_indices,]
# Perform PCA on the reduced dataset
USairpol.pc.reduced <- princomp(USairpol.data.reduced, cor=TRUE)
# Print the summary of PCA results for the reduced dataset
summary(USairpol.pc.reduced, loadings=TRUE)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 1.4938795 1.2960418 1.2571494 0.8824378 0.73021351
## Proportion of Variance 0.3188108 0.2399606 0.2257749 0.1112424 0.07617311
## Cumulative Proportion 0.3188108 0.5587715 0.7845464 0.8957888 0.97196188
## Comp.6 Comp.7
## Standard deviation 0.32613931 0.29983327
## Proportion of Variance 0.01519526 0.01284286
## Cumulative Proportion 0.98715714 1.00000000
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## SO2 0.315 0.490 0.200 0.228 0.704 0.108 0.246
## Neg.Temp 0.420 0.456 -0.327 -0.301 0.428 -0.485
## Manuf 0.558 -0.292 0.201 0.245 -0.542 -0.453
## Pop 0.426 -0.511 0.197 0.208 -0.169 0.568 0.353
## Wind 0.352 -0.194 -0.203 -0.842 0.282
## Precip -0.235 0.698 -0.274 0.146 0.344 -0.491
## Days 0.229 0.407 0.496 -0.246 -0.530 -0.264 0.353