library(readr)
mydata <-read_csv('survey_results.csv')
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Airbnb = col_double(),
## Vrbo = col_double(),
## Hotels.com = col_double(),
## Booking.com = col_double()
## )
pr.out=prcomp(mydata, scale=TRUE)
names(pr.out)
## [1] "sdev" "rotation" "center" "scale" "x"
pr.out$center
## Airbnb Vrbo Hotels.com Booking.com
## 4.727273 1.545455 3.772727 3.772727
pr.out$scale
## Airbnb Vrbo Hotels.com Booking.com
## 2.529480 1.595448 2.599117 2.389099
pr.out$rotation
## PC1 PC2 PC3 PC4
## Airbnb 0.49418163 -0.46340086 0.64757555 -0.34884104
## Vrbo -0.02826569 -0.81589927 -0.57686767 -0.02707639
## Hotels.com 0.57077718 0.34552972 -0.49035396 -0.56069209
## Booking.com 0.65513280 0.01331296 -0.08615452 0.75046731
dim(pr.out$x)
## [1] 22 4
biplot(pr.out, scale=0)
pr.out$rotation=-pr.out$rotation
pr.out$x=-pr.out$x
biplot(pr.out, scale=0)
pr.out$sdev
## [1] 1.3946308 1.1090774 0.7342365 0.5346486
pr.var=pr.out$sdev^2
pr.var
## [1] 1.9449950 1.2300526 0.5391033 0.2858491
pve=pr.var/sum(pr.var)
pve
## [1] 0.48624876 0.30751314 0.13477582 0.07146227
plot(pve, xlab="Principal Component", ylab="Proportion of Variance Explained", ylim=c(0,1),type='b')
plot(cumsum(pve), xlab="Principal Component", ylab="Cumulative Proportion of Variance Explained", ylim=c(0,1),type='b')
#save your cluster solutions in the working directory
#We want to examine the cluster memberships for each observation - see last column of pca_data
Principal component analysis - reading (p.404-p.405) https://faculty.marshall.usc.edu/gareth-james/ISL/ISLR%20Seventh%20Printing.pdf
Principal Component Methods in R: Practical Guide http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/118-principal-component-analysis-in-r-prcomp-vs-princomp/
Interpretation of the Principal Components https://online.stat.psu.edu/stat505/lesson/11/11.4