# PREP #########################################################################
# Packages
pacman::p_load(
  pacman, rio, tidyverse, magrittr, janitor,  # general stuff
  psych,        # EDA
  visdat,       # missingness
  data.table,   # working with data.tables
  corrplot,     # correlation plot
  FactoMineR,   # EDA, PCA, MFA
  factoextra,   # extract and visualize PCA/MFA
  nFactors,     # how many factors/components to retain
  cluster,      # clustering algorithms
  NbClust,      # number of clusters
  clValid       # ?
)


# Data
data <- import("data/Travel_Review.xlsx", range = "A1:Y5457",
               na = "0")
## Warning in read_fun(path = enc2native(normalizePath(path)), sheet_i = sheet, :
## Expecting numeric in L2714 / R2714C12: got '2 2.'
# clean variable names
data %<>% rename(SwimmingPools = `Swimming Pools`)
# drop missings
complete <- data[complete.cases(data),]
# standardize by user
data2 <- complete
data2[,-1] <- t(data2[,-1]) %>% scale() %>% t()
# PCA - 1st PASS ###############################################################

# RUN THE PCA

# create X matrix
# X <- subset(data2, select = -c(UserID))
X <- subset(data2, select = -c(UserID, DanceClubs))
str(X)
## 'data.frame':    3724 obs. of  23 variables:
##  $ Churches            : num  -0.609 -0.722 -0.663 -0.587 -0.679 ...
##  $ Resorts             : num  -0.651 -0.678 -0.619 -0.595 -0.635 ...
##  $ Beaches             : num  -0.617 -0.67 -0.612 -0.587 -0.628 ...
##  $ Parks               : num  -0.593 2.39 -0.597 -0.562 -0.606 ...
##  $ Theatres            : num  1.89 1.55 1.62 1.92 1.56 ...
##  $ Museums             : num  2.84 2.39 2.45 1.15 2.37 ...
##  $ Malls               : num  1.129 0.878 2.448 2.853 0.859 ...
##  $ Zoo                 : num  1.121 0.856 0.91 1.121 0.845 ...
##  $ Restaurants         : num  1.038 0.79 0.837 1.029 0.765 ...
##  $ Pubs_Bars           : num  1.046 0.489 0.837 1.029 0.765 ...
##  $ LocalServices       : num  0.409 0.225 0.271 0.391 0.199 ...
##  $ Burger_PizzaShops   : num  -0.0876 -0.208 -0.1707 -0.1143 -0.2288 ...
##  $ Hotels_OtherLodgings: num  -0.179 -0.296 -0.281 -0.23 -0.316 ...
##  $ JuiceBars           : num  -0.212 -0.325 -0.281 -0.239 -0.345 ...
##  $ ArtGalleries        : num  -0.195 -0.318 -0.274 -0.23 -0.338 ...
##  $ SwimmingPools       : num  -0.841 -0.281 -0.244 -0.811 -0.845 ...
##  $ Gyms                : num  -0.816 -0.854 -0.803 -0.811 -0.831 ...
##  $ Bakeries            : num  -0.857 -0.898 -0.847 -0.852 -0.874 ...
##  $ BeautySpas          : num  -0.882 -0.913 -0.855 -0.869 -0.882 ...
##  $ Cafes               : num  -0.675 -0.898 -0.84 -0.852 -0.874 ...
##  $ ViewPoints          : num  -0.692 -0.729 -0.671 -0.645 2.368 ...
##  $ Monuments           : num  -0.7 -0.744 -0.686 -0.661 -0.7 ...
##  $ Gardens             : num  -0.692 -0.736 -0.663 -0.628 -0.621 ...
# run PCA
pca <- prcomp(X, 
              center = TRUE,
              scale. = TRUE)
summary(pca)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.2674 1.6773 1.31537 1.28890 1.18226 1.08449 1.02765
## Proportion of Variance 0.2235 0.1223 0.07523 0.07223 0.06077 0.05114 0.04592
## Cumulative Proportion  0.2235 0.3458 0.42108 0.49331 0.55408 0.60521 0.65113
##                           PC8     PC9   PC10    PC11    PC12    PC13   PC14
## Standard deviation     0.9287 0.87314 0.8471 0.82327 0.77763 0.76758 0.7352
## Proportion of Variance 0.0375 0.03315 0.0312 0.02947 0.02629 0.02562 0.0235
## Cumulative Proportion  0.6886 0.72177 0.7530 0.78244 0.80873 0.83435 0.8579
##                           PC15    PC16    PC17    PC18    PC19    PC20    PC21
## Standard deviation     0.71319 0.68935 0.66916 0.63865 0.63332 0.59991 0.57179
## Proportion of Variance 0.02211 0.02066 0.01947 0.01773 0.01744 0.01565 0.01422
## Cumulative Proportion  0.87996 0.90062 0.92009 0.93782 0.95526 0.97091 0.98513
##                           PC22   PC23
## Standard deviation     0.56279 0.1593
## Proportion of Variance 0.01377 0.0011
## Cumulative Proportion  0.99890 1.0000
# pca2 <- principal(X, nfactors = 7, residuals = T)
# pca2
# print(pca2$loadings, cutoff = 0.4, sort = T)
# plot(pca2)


# add labels to PCA results
pca_label <- cbind(data2, as.data.frame(pca$x))
head(pca_label)
##     UserID   Churches    Resorts    Beaches      Parks Theatres  Museums
## 69 User 69 -0.6091162 -0.6505058 -0.6173941 -0.5925604 1.890813 2.842772
## 70 User 70 -0.7217397 -0.6777013 -0.6703616  2.3903039 1.553575 2.390304
## 71 User 71 -0.6634526 -0.6193246 -0.6119699 -0.5972605 1.616495 2.447573
## 72 User 72 -0.5867433 -0.5950317 -0.5867433 -0.5618784 1.916327 1.145514
## 73 User 73 -0.6785527 -0.6350286 -0.6277746 -0.6060126 1.555684 2.368134
## 74 User 74 -0.7170883 -0.6864435 -0.6741855  1.8448168 1.158373 1.844817
##        Malls       Zoo Restaurants Pubs_Bars LocalServices Burger_PizzaShops
## 69 1.1292449 1.1209670   1.0381879 1.0464658    0.40906672       -0.08760788
## 70 0.8783205 0.8563013   0.7902438 0.4893150    0.22508491       -0.20795889
## 71 2.4475731 0.9104469   0.8369002 0.8369002    0.27059059       -0.17068965
## 72 2.8529057 1.1206487   1.0294773 1.0294773    0.39127734       -0.11430962
## 73 0.8592986 0.8447906   0.7649964 0.7649964    0.19918319       -0.22880375
## 74 1.8448168 0.5577353   0.4841878 0.4780588    0.00612896       -0.36773757
##    Hotels_OtherLodgings  JuiceBars ArtGalleries DanceClubs SwimmingPools
## 69           -0.1786649 -0.2117765   -0.1952207 -0.1786649    -0.8408977
## 70           -0.2960356 -0.3253945   -0.3180548 -0.3033753    -0.2813561
## 71           -0.2810097 -0.2810097   -0.2736550 -0.2589457    -0.2442364
## 72           -0.2303460 -0.2386343   -0.2303460 -0.2137694    -0.8105277
## 73           -0.3158519 -0.3448680   -0.3376140 -0.3231060    -0.8453951
## 74           -0.4474140 -0.4596720   -0.4535430 -0.4412851    -0.8703123
##          Gyms   Bakeries BeautySpas      Cafes ViewPoints  Monuments    Gardens
## 69 -0.8160640 -0.8574535 -0.8822872 -0.6753395 -0.6918953 -0.7001732 -0.6918953
## 70 -0.8538547 -0.8978931 -0.9125725 -0.8978931 -0.7290794 -0.7437588 -0.7364191
## 71 -0.8031913 -0.8473193 -0.8546740 -0.8399647 -0.6708072 -0.6855166 -0.6634526
## 72 -0.8105277 -0.8519693 -0.8685459 -0.8519693 -0.6447615 -0.6613381 -0.6281849
## 73 -0.8308871 -0.8744112 -0.8816652 -0.8744112  2.3681339 -0.7003148 -0.6205206
## 74 -0.8519254 -0.8948281 -0.9009571 -0.8948281  1.8448168 -0.7354751 -0.6680566
##           PC1       PC2        PC3        PC4       PC5       PC6        PC7
## 69 -2.4018090 1.2278103 -0.2923164 -0.5814276 -2.032376 0.8311117 -1.0391171
## 70 -1.5285281 2.2347710  0.7060933 -0.2213471 -2.008094 1.3924620  0.4016328
## 71 -2.3264528 0.9043579 -0.2234224 -0.9065431 -2.547463 0.6729884 -0.8727915
## 72 -2.6156521 0.7834587 -0.5954404 -0.5106310 -1.763650 0.2650046 -0.8500840
## 73 -1.1433292 1.2343446 -0.7913175  0.1942561 -1.620325 1.4250462 -0.6586073
## 74 -0.9124913 2.0710481 -0.1661480  0.1184531 -1.415868 1.7969091 -0.0221131
##          PC8         PC9        PC10        PC11        PC12        PC13
## 69 0.3297053  0.11251663 -0.22774677  0.03073840  0.05046358 -0.39227801
## 70 0.2264530  0.42523596 -0.28907466 -0.26650641  1.16215674 -0.15074384
## 71 0.1555925  0.21174085  0.49257294  0.04518984 -0.08680914 -0.15991534
## 72 0.3357248  0.07728942  0.08217964 -0.15327959 -0.34785789 -0.08047636
## 73 0.5737145 -0.48398956 -0.39330174  0.31567044  0.41228850 -0.43594555
## 74 0.5371520 -0.39570900 -0.58174990 -0.17581793  0.61425642 -0.38527620
##           PC14        PC15        PC16         PC17        PC18       PC19
## 69 -0.07456332 -0.35780502  0.48022504  0.876865612  0.19106203  1.5360486
## 70 -0.17160864  0.02679749 -0.04002613 -0.042603440 -0.67779072  0.2599628
## 71 -0.30669290 -0.67279644  0.42744732  0.763634169 -0.05546526  0.6578375
## 72 -0.13233556 -0.11201236  0.20550858  0.316993416 -0.42551784  0.5378189
## 73 -0.28563083 -0.35644848  1.76171306  0.996104985  1.45044406  0.7993980
## 74 -0.20132555  0.17623385  1.15375125  0.003257858  0.17932864 -0.6338799
##          PC20       PC21       PC22         PC23
## 69  0.6678709 -1.1415123  0.1812368 -0.122754769
## 70  0.8980402  0.5814774 -0.5800904 -0.028220795
## 71 -0.3847716 -0.9790030 -0.1680465 -0.095953139
## 72 -1.3612756 -0.9113013  0.6005325 -0.105124169
## 73 -0.1772946 -0.9531523  0.1948476 -0.067323662
## 74 -0.5981263  0.4188443 -0.7404749 -0.001799961
c_pca <- cor(subset(pca_label, select = PC1:PC7))
corrplot(c_pca)

# ANALYZING THE RESULTS

# get eigenvalues
eig.val <- get_eigenvalue(pca)
eig.val %>% round(., 2)
##        eigenvalue variance.percent cumulative.variance.percent
## Dim.1        5.14            22.35                       22.35
## Dim.2        2.81            12.23                       34.59
## Dim.3        1.73             7.52                       42.11
## Dim.4        1.66             7.22                       49.33
## Dim.5        1.40             6.08                       55.41
## Dim.6        1.18             5.11                       60.52
## Dim.7        1.06             4.59                       65.11
## Dim.8        0.86             3.75                       68.86
## Dim.9        0.76             3.31                       72.18
## Dim.10       0.72             3.12                       75.30
## Dim.11       0.68             2.95                       78.24
## Dim.12       0.60             2.63                       80.87
## Dim.13       0.59             2.56                       83.43
## Dim.14       0.54             2.35                       85.78
## Dim.15       0.51             2.21                       88.00
## Dim.16       0.48             2.07                       90.06
## Dim.17       0.45             1.95                       92.01
## Dim.18       0.41             1.77                       93.78
## Dim.19       0.40             1.74                       95.53
## Dim.20       0.36             1.56                       97.09
## Dim.21       0.33             1.42                       98.51
## Dim.22       0.32             1.38                       99.89
## Dim.23       0.03             0.11                      100.00
# screeplot
fviz_eig(pca, 
         choice = "variance", 
         addlabels = T) + 
  labs(title = "Scree Plot of Variance Explained by PCA Dimension") +
  theme(plot.title = element_text(hjust=0.5, size=14, face="bold"))

fviz_eig(pca, 
         choice = "eigenvalue",
         addlabels = T) + 
  labs(title = "Scree Plot of Eigenvalues by PCA Dimension") +
  theme(plot.title = element_text(hjust=0.5, size=14, face="bold")) # 590x590

# estiamte number of factors
vss(X, n = 10)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## 
## Very Simple Structure
## Call: vss(x = X, n = 10)
## VSS complexity 1 achieves a maximimum of 0.53  with  1  factors
## VSS complexity 2 achieves a maximimum of 0.7  with  3  factors
## 
## The Velicer MAP achieves a minimum of 0.02  with  4  factors 
## BIC achieves a minimum of  6564.07  with  10  factors
## Sample Size adjusted BIC achieves a minimum of  6780.14  with  10  factors
## 
## Statistics by number of factors 
##    vss1 vss2   map dof chisq prob sqresid  fit RMSEA   BIC SABIC complex eChisq
## 1  0.53 0.00 0.026 230 22982    0    23.3 0.53  0.16 21091 21821     1.0  29250
## 2  0.50 0.68 0.023 208 18156    0    15.8 0.68  0.15 16446 17107     1.4  14234
## 3  0.48 0.70 0.022 187 14975    0    13.0 0.74  0.15 13437 14031     1.6   9565
## 4  0.45 0.69 0.021 167 12284    0    10.4 0.79  0.14 10911 11441     1.9   5655
## 5  0.45 0.69 0.023 148 11048    0     8.8 0.82  0.14  9831 10301     2.0   3824
## 6  0.45 0.66 0.025 130  9829    0     7.5 0.85  0.14  8760  9173     2.2   2560
## 7  0.41 0.65 0.027 113  8578    0     6.4 0.87  0.14  7649  8008     2.2   1482
## 8  0.41 0.66 0.033  97  8026    0     5.8 0.88  0.15  7228  7537     2.2   1216
## 9  0.42 0.65 0.041  82  7552    0     5.1 0.90  0.16  6878  7138     2.2   1019
## 10 0.43 0.65 0.048  68  7123    0     4.7 0.90  0.17  6564  6780     2.4    799
##     SRMR eCRMS  eBIC
## 1  0.125 0.131 27359
## 2  0.087 0.096 12524
## 3  0.071 0.083  8027
## 4  0.055 0.067  4281
## 5  0.045 0.059  2607
## 6  0.037 0.051  1491
## 7  0.028 0.042   553
## 8  0.025 0.041   418
## 9  0.023 0.041   345
## 10 0.021 0.040   240
  # lines in VSS = how many components will we allow each var to contribute to
  # VSS helps if we're assign a var to only 1 component
  # 1 factor per var not helpful here
nfactors(X, n = 15)
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## An ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## 
## Number of factors
## Call: vss(x = x, n = n, rotate = rotate, diagonal = diagonal, fm = fm, 
##     n.obs = n.obs, plot = FALSE, title = title, use = use, cor = cor)
## VSS complexity 1 achieves a maximimum of 0.53  with  1  factors
## VSS complexity 2 achieves a maximimum of 0.7  with  3  factors
## The Velicer MAP achieves a minimum of 0.02  with  4  factors 
## Empirical BIC achieves a minimum of  98.08  with  15  factors
## Sample Size adjusted BIC achieves a minimum of  4709.87  with  15  factors
## 
## Statistics by number of factors 
##    vss1 vss2   map dof chisq prob sqresid  fit RMSEA   BIC SABIC complex eChisq
## 1  0.53 0.00 0.026 230 22982    0    23.3 0.53  0.16 21091 21821     1.0  29250
## 2  0.50 0.68 0.023 208 18156    0    15.8 0.68  0.15 16446 17107     1.4  14234
## 3  0.48 0.70 0.022 187 14975    0    13.0 0.74  0.15 13437 14031     1.6   9565
## 4  0.45 0.69 0.021 167 12284    0    10.4 0.79  0.14 10911 11441     1.9   5655
## 5  0.45 0.69 0.023 148 11048    0     8.8 0.82  0.14  9831 10301     2.0   3824
## 6  0.45 0.66 0.025 130  9829    0     7.5 0.85  0.14  8760  9173     2.2   2560
## 7  0.41 0.65 0.027 113  8578    0     6.4 0.87  0.14  7649  8008     2.2   1482
## 8  0.41 0.66 0.033  97  8026    0     5.8 0.88  0.15  7228  7537     2.2   1216
## 9  0.42 0.65 0.041  82  7552    0     5.1 0.90  0.16  6878  7138     2.2   1019
## 10 0.43 0.65 0.048  68  7123    0     4.7 0.90  0.17  6564  6780     2.4    799
## 11 0.46 0.66 0.059  55  6743    0     4.1 0.92  0.18  6291  6465     2.2    640
## 12 0.43 0.65 0.072  43  6278    0     3.6 0.93  0.20  5925  6061     2.4    491
## 13 0.45 0.66 0.083  32  5883    0     3.3 0.93  0.22  5620  5722     2.2    409
## 14 0.45 0.64 0.102  22  5485    0     2.7 0.95  0.26  5304  5374     2.3    307
## 15 0.47 0.66 0.128  13  4775    0     2.3 0.95  0.31  4669  4710     2.3    205
##     SRMR eCRMS  eBIC
## 1  0.125 0.131 27359
## 2  0.087 0.096 12524
## 3  0.071 0.083  8027
## 4  0.055 0.067  4281
## 5  0.045 0.059  2607
## 6  0.037 0.051  1491
## 7  0.028 0.042   553
## 8  0.025 0.041   418
## 9  0.023 0.041   345
## 10 0.021 0.040   240
## 11 0.018 0.040   188
## 12 0.016 0.039   138
## 13 0.015 0.041   146
## 14 0.013 0.043   126
## 15 0.010 0.046    98
# DATA2: I'm inclined to keep 7 PCs
  # Dims 1-7 have eigenvals > 1
  # each dim explains at least 4% of variance
  # cumulatively explains 65.1% of variance
# RESULTS AT THE VARIABLE LEVEL
var <- get_pca_var(pca)
var
## Principal Component Analysis Results for variables
##  ===================================================
##   Name       Description                                    
## 1 "$coord"   "Coordinates for the variables"                
## 2 "$cor"     "Correlations between variables and dimensions"
## 3 "$cos2"    "Cos2 for the variables"                       
## 4 "$contrib" "contributions of the variables"
print(round(var$cor[,1:7],2))
##                      Dim.1 Dim.2 Dim.3 Dim.4 Dim.5 Dim.6 Dim.7
## Churches              0.64  0.02 -0.09  0.01  0.09 -0.33 -0.33
## Resorts               0.27  0.17  0.00 -0.44  0.35 -0.52  0.10
## Beaches               0.36  0.37  0.19 -0.20  0.41 -0.17  0.22
## Parks                 0.16  0.71  0.26  0.12  0.08  0.19  0.29
## Theatres              0.02  0.75  0.39  0.08 -0.14  0.18 -0.02
## Museums              -0.29  0.55  0.18 -0.35 -0.34  0.22 -0.29
## Malls                -0.69  0.11 -0.06 -0.32 -0.21 -0.02 -0.24
## Zoo                  -0.66  0.21 -0.33 -0.13 -0.18 -0.22 -0.09
## Restaurants          -0.70  0.05 -0.40 -0.25 -0.01  0.02  0.08
## Pubs_Bars            -0.66  0.04 -0.45  0.19  0.04 -0.12  0.24
## LocalServices        -0.52  0.05 -0.16  0.54  0.04 -0.25  0.24
## Burger_PizzaShops    -0.46 -0.20  0.40  0.36  0.01 -0.16 -0.05
## Hotels_OtherLodgings -0.43 -0.27  0.42  0.43  0.13 -0.02 -0.17
## JuiceBars            -0.33 -0.53  0.41 -0.03  0.18  0.09 -0.18
## ArtGalleries         -0.26 -0.51 -0.05 -0.16  0.31  0.31 -0.03
## SwimmingPools         0.33 -0.27  0.07  0.02 -0.56 -0.05  0.37
## Gyms                  0.36 -0.39  0.14 -0.09 -0.54 -0.14  0.25
## Bakeries              0.37 -0.40  0.24 -0.30 -0.15 -0.12  0.11
## BeautySpas            0.32 -0.28 -0.07 -0.28  0.26  0.28  0.14
## Cafes                 0.45 -0.23 -0.42  0.02 -0.06  0.34 -0.12
## ViewPoints            0.60  0.19 -0.30  0.34  0.07  0.28  0.08
## Monuments             0.57  0.11 -0.28  0.33 -0.05  0.01 -0.22
## Gardens               0.62  0.02 -0.09  0.17 -0.14 -0.29 -0.41
var$cor[,1:7] %>% as.data.frame() %>% 
  arrange(desc(Dim.1))%>% as.matrix() %>% 
  corrplot(is.corr = F, tl.col = "black",
           cl.align.text = "l",
           mar = c(0,0,2,0),
           title = "Correlations Between Variables & Dimensions")

corrplot(var$cor, is.corr = FALSE,
         cl.align.text = "l")      #

corrplot(var$cos2[,1:7], is.corr = FALSE)     # quality of representation

corrplot(var$contrib[,1:7], is.corr = FALSE)  # amount of contributions

# Contributions of variables to all PCs kept
fviz_contrib(pca, choice = "var", axes = 1:7)

  # dashed line is expected value if contributions were uniform

# Contributions of variables to individual PCs
fviz_contrib(pca, choice = "var", axes = 1)

fviz_contrib(pca, choice = "var", axes = 2)

# Correlation Plots
fviz_pca_var(pca, col.var = "cos2",
             labelsize = 3,
             # select.var = list(cos2 = 0.3),
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Avoids text overlapping
) + labs(title = "Variable Correlation - Dims 1 & 2",
         subtitle = "colored by 'quality of representation'")

fviz_pca_var(pca, col.var = "contrib",
             labelsize = 3,
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE
) + labs(title = "Variable Correlation - Dims 1 & 2",
         subtitle = "colored by variable contribution")

fviz_pca_var(pca, col.var = "cos2",
             axes = c(1,2),
             labelsize = 3,
             # select.var = list(cos2 = 0.3),
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE)

# creating rough clusters
set.seed(1679)
km <- kmeans(var$coord, centers = 6, nstart = 25)
grp <- as.factor(km$cluster)

# Color variables by groups
fviz_pca_var(pca, col.var = grp, 
             labelsize = 3,
             legend.title = "Cluster",
             repel = T)

# RESULTS AT THE INDIVIDUAL LEVEL
ind <- get_pca_ind(pca)
ind
## Principal Component Analysis Results for individuals
##  ===================================================
##   Name       Description                       
## 1 "$coord"   "Coordinates for the individuals" 
## 2 "$cos2"    "Cos2 for the individuals"        
## 3 "$contrib" "contributions of the individuals"
# Correlation Plots
fviz_pca_ind(pca, col.ind = "cos2",
             # select.ind = list(cos2 = 0.3),
             gradient.cols = c("#0073C2FF", "#00AFBB", "#E7B800", "#FC4E07"),
             label = "none"
) + labs(title = "Individual Correlation - Dims 1 & 2",
         subtitle = "colored by 'quality of representation'")

fviz_pca_ind(pca, col.ind = "contrib", axes = c(1,2),
             gradient.cols = c("#0073C2FF", "#00AFBB", "#E7B800", "#FC4E07"),
             label = "none"
) + labs(title = "Individual Correlation - Dims 1 & 2",
         subtitle = "colored by individual contribution")

ind$coord %>% scale() %>% 
  as.data.frame() %>% 
  ggplot(aes(Dim.2, Dim.3)) + geom_point()

# visualizing individuals on kept PCs, slow
PerformanceAnalytics::chart.Correlation(dplyr::select(pca_label, PC1:PC7))

# Contributions of individuals to PCs
fviz_contrib(pca, choice = "ind", axes = 1:2)

  # slow to run on my laptop




# Rotations are eigenvectors! Aka component loadings
# loadings represent each feature's INFLUENCE on associated PC
PC1 <- pca$rotation[,1]
PC1_scores <- abs(PC1)
PC1_scores_ordered <- sort(PC1_scores, decreasing = TRUE)
PC1_scores_ordered
##          Restaurants                Malls            Pubs_Bars 
##          0.306887643          0.304638343          0.291667870 
##                  Zoo             Churches              Gardens 
##          0.291009048          0.282251732          0.271254093 
##           ViewPoints            Monuments        LocalServices 
##          0.266124324          0.251086378          0.227637461 
##    Burger_PizzaShops                Cafes Hotels_OtherLodgings 
##          0.200898981          0.200623532          0.187490813 
##             Bakeries                 Gyms              Beaches 
##          0.161152907          0.160698686          0.159532793 
##            JuiceBars        SwimmingPools           BeautySpas 
##          0.146787198          0.144747636          0.140974848 
##              Museums              Resorts         ArtGalleries 
##          0.126624564          0.120836301          0.114199223 
##                Parks             Theatres 
##          0.072196224          0.007721186
eig.vec <- pca$rotation %>% as_tibble() %>% 
  mutate(feature = row.names(pca$rotation))

# influence of each var on PC1
ggplot(eig.vec, aes(PC1, reorder(feature, PC1))) + geom_point()

ggplot(eig.vec, aes(PC1, PC2, label = feature)) + geom_text()

# EFA - 1st PASS ###############################################################

# RUN THE EFA

# create X matrix
# different from PCA, trying something
X <- data2[,-1]
str(X)
## 'data.frame':    3724 obs. of  24 variables:
##  $ Churches            : num  -0.609 -0.722 -0.663 -0.587 -0.679 ...
##  $ Resorts             : num  -0.651 -0.678 -0.619 -0.595 -0.635 ...
##  $ Beaches             : num  -0.617 -0.67 -0.612 -0.587 -0.628 ...
##  $ Parks               : num  -0.593 2.39 -0.597 -0.562 -0.606 ...
##  $ Theatres            : num  1.89 1.55 1.62 1.92 1.56 ...
##  $ Museums             : num  2.84 2.39 2.45 1.15 2.37 ...
##  $ Malls               : num  1.129 0.878 2.448 2.853 0.859 ...
##  $ Zoo                 : num  1.121 0.856 0.91 1.121 0.845 ...
##  $ Restaurants         : num  1.038 0.79 0.837 1.029 0.765 ...
##  $ Pubs_Bars           : num  1.046 0.489 0.837 1.029 0.765 ...
##  $ LocalServices       : num  0.409 0.225 0.271 0.391 0.199 ...
##  $ Burger_PizzaShops   : num  -0.0876 -0.208 -0.1707 -0.1143 -0.2288 ...
##  $ Hotels_OtherLodgings: num  -0.179 -0.296 -0.281 -0.23 -0.316 ...
##  $ JuiceBars           : num  -0.212 -0.325 -0.281 -0.239 -0.345 ...
##  $ ArtGalleries        : num  -0.195 -0.318 -0.274 -0.23 -0.338 ...
##  $ DanceClubs          : num  -0.179 -0.303 -0.259 -0.214 -0.323 ...
##  $ SwimmingPools       : num  -0.841 -0.281 -0.244 -0.811 -0.845 ...
##  $ Gyms                : num  -0.816 -0.854 -0.803 -0.811 -0.831 ...
##  $ Bakeries            : num  -0.857 -0.898 -0.847 -0.852 -0.874 ...
##  $ BeautySpas          : num  -0.882 -0.913 -0.855 -0.869 -0.882 ...
##  $ Cafes               : num  -0.675 -0.898 -0.84 -0.852 -0.874 ...
##  $ ViewPoints          : num  -0.692 -0.729 -0.671 -0.645 2.368 ...
##  $ Monuments           : num  -0.7 -0.744 -0.686 -0.661 -0.7 ...
##  $ Gardens             : num  -0.692 -0.736 -0.663 -0.628 -0.621 ...
# couldn't get this to work with data2... doesn't like negative? diff way to scale?
# fit <- factanal(X, factors = 8, rotation = "varimax")
# print(fit, digits = 2, cutoff = 0.3, sort = T)

# print(fit$loadings, cutoff = 0.5, sort = T)

# Keeping 7 factors: SS loadings > 1, prop var > 4%, cum var = 44.3%



# Calculate and plot factors with fa()
fit <- fa(X, nfactors = 7, rotate = "varimax", fm = "pa")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
fit
## Factor Analysis using method =  pa
## Call: fa(r = X, nfactors = 7, rotate = "varimax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                        PA1   PA2   PA4   PA3   PA6   PA5   PA7   h2   u2 com
## Churches              0.59 -0.06 -0.22  0.15 -0.06  0.06  0.27 0.50 0.50 1.9
## Resorts               0.06  0.00 -0.11  0.16  0.03 -0.02  0.66 0.47 0.53 1.2
## Beaches               0.10  0.27 -0.20  0.12 -0.13 -0.12  0.45 0.37 0.63 2.9
## Parks                -0.01  0.74 -0.02  0.11 -0.18 -0.15  0.13 0.63 0.37 1.3
## Theatres              0.01  0.82 -0.06 -0.02  0.13 -0.12 -0.02 0.70 0.30 1.1
## Museums              -0.18  0.45 -0.09  0.12  0.62 -0.12 -0.15 0.68 0.32 2.4
## Malls                -0.38 -0.06  0.23 -0.07  0.61 -0.15 -0.12 0.61 0.39 2.3
## Zoo                  -0.24 -0.05  0.53  0.03  0.48 -0.16 -0.04 0.60 0.40 2.6
## Restaurants          -0.47 -0.20  0.42  0.14  0.34 -0.25 -0.07 0.65 0.35 4.1
## Pubs_Bars            -0.32 -0.15  0.71  0.00  0.04 -0.21 -0.11 0.68 0.32 1.8
## LocalServices        -0.08  0.02  0.64 -0.30 -0.07 -0.13 -0.12 0.54 0.46 1.7
## Burger_PizzaShops    -0.17 -0.02  0.18 -0.56  0.04  0.01 -0.11 0.39 0.61 1.5
## Hotels_OtherLodgings -0.09 -0.08  0.08 -0.69 -0.01 -0.12 -0.21 0.55 0.45 1.3
## JuiceBars            -0.28 -0.33 -0.21 -0.51  0.04 -0.03 -0.10 0.51 0.49 2.9
## ArtGalleries         -0.34 -0.42 -0.14 -0.10 -0.09 -0.13 -0.12 0.36 0.64 2.8
## DanceClubs           -0.11  0.01  0.01  0.17 -0.26  0.14 -0.11 0.14 0.86 3.2
## SwimmingPools         0.09 -0.03 -0.08  0.08 -0.13  0.54 -0.09 0.34 0.66 1.3
## Gyms                  0.09 -0.11 -0.15  0.04 -0.07  0.72 -0.03 0.56 0.44 1.2
## Bakeries              0.06 -0.19 -0.34 -0.01 -0.06  0.37  0.14 0.31 0.69 2.9
## BeautySpas            0.00 -0.21 -0.31  0.17 -0.19  0.03  0.08 0.21 0.79 3.4
## Cafes                 0.24 -0.24 -0.20  0.37 -0.22  0.07 -0.18 0.38 0.62 4.7
## ViewPoints            0.46  0.15 -0.10  0.37 -0.42 -0.09 -0.15 0.59 0.41 3.6
## Monuments             0.58  0.05 -0.07  0.23 -0.19  0.00 -0.07 0.44 0.56 1.6
## Gardens               0.68 -0.01 -0.15  0.11 -0.02  0.15  0.08 0.52 0.48 1.3
## 
##                        PA1  PA2  PA4  PA3  PA6  PA5  PA7
## SS loadings           2.26 2.03 1.95 1.67 1.59 1.27 0.98
## Proportion Var        0.09 0.08 0.08 0.07 0.07 0.05 0.04
## Cumulative Var        0.09 0.18 0.26 0.33 0.40 0.45 0.49
## Proportion Explained  0.19 0.17 0.17 0.14 0.14 0.11 0.08
## Cumulative Proportion 0.19 0.37 0.53 0.67 0.81 0.92 1.00
## 
## Mean item complexity =  2.3
## Test of the hypothesis that 7 factors are sufficient.
## 
## The degrees of freedom for the null model are  276  and the objective function was  28.93 with Chi Square of  107452.8
## The degrees of freedom for the model are 129  and the objective function was  21.66 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  3724 with the empirical chi square  1723.45  with prob <  9.7e-277 
## The total number of observations was  3724  with Likelihood Chi Square =  80360.28  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  -0.604
## RMSEA index =  0.409  and the 90 % confidence intervals are  0.406 0.411
## BIC =  79299.57
## Fit based upon off diagonal values = 0.98
print(fit$loadings, cutoff = 0.3, sort = T)
## 
## Loadings:
##                      PA1    PA2    PA4    PA3    PA6    PA5    PA7   
## Churches              0.591                                          
## Monuments             0.581                                          
## Gardens               0.675                                          
## Parks                        0.741                                   
## Theatres                     0.816                                   
## Zoo                                 0.532         0.483              
## Pubs_Bars            -0.316         0.709                            
## LocalServices                       0.635                            
## Burger_PizzaShops                         -0.564                     
## Hotels_OtherLodgings                      -0.689                     
## JuiceBars                   -0.329        -0.511                     
## Museums                      0.451                0.619              
## Malls                -0.382                       0.610              
## SwimmingPools                                            0.542       
## Gyms                                                     0.716       
## Resorts                                                         0.655
## Beaches                                                         0.448
## Restaurants          -0.473         0.420         0.343              
## ArtGalleries         -0.344 -0.424                                   
## DanceClubs                                                           
## Bakeries                           -0.336                0.368       
## BeautySpas                         -0.305                            
## Cafes                                      0.370                     
## ViewPoints            0.461                0.371 -0.424              
## 
##                  PA1   PA2   PA4   PA3   PA6   PA5   PA7
## SS loadings    2.260 2.031 1.948 1.672 1.589 1.270 0.979
## Proportion Var 0.094 0.085 0.081 0.070 0.066 0.053 0.041
## Cumulative Var 0.094 0.179 0.260 0.330 0.396 0.449 0.490
fit2 <- fa(X, nfactors = 7, rotate = "varimax", fm = "minres")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
fit2
## Factor Analysis using method =  minres
## Call: fa(r = X, nfactors = 7, rotate = "varimax", fm = "minres")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                        MR1   MR2   MR4   MR3   MR6   MR5   MR7   h2   u2 com
## Churches              0.59 -0.06 -0.22  0.15 -0.06  0.06  0.27 0.50 0.50 1.9
## Resorts               0.06  0.00 -0.11  0.16  0.03 -0.02  0.66 0.48 0.52 1.2
## Beaches               0.11  0.27 -0.21  0.12 -0.13 -0.12  0.45 0.37 0.63 2.9
## Parks                -0.01  0.74 -0.02  0.11 -0.18 -0.15  0.13 0.63 0.37 1.3
## Theatres              0.01  0.82 -0.06 -0.02  0.14 -0.12 -0.02 0.70 0.30 1.1
## Museums              -0.18  0.45 -0.09  0.12  0.62 -0.12 -0.15 0.68 0.32 2.4
## Malls                -0.38 -0.06  0.23 -0.07  0.61 -0.15 -0.12 0.61 0.39 2.3
## Zoo                  -0.24 -0.05  0.53  0.03  0.48 -0.16 -0.04 0.60 0.40 2.6
## Restaurants          -0.47 -0.20  0.42  0.14  0.34 -0.25 -0.07 0.65 0.35 4.1
## Pubs_Bars            -0.32 -0.15  0.71  0.00  0.04 -0.21 -0.11 0.68 0.32 1.8
## LocalServices        -0.08  0.02  0.63 -0.30 -0.07 -0.13 -0.12 0.54 0.46 1.7
## Burger_PizzaShops    -0.17 -0.02  0.18 -0.56  0.04  0.01 -0.11 0.39 0.61 1.5
## Hotels_OtherLodgings -0.09 -0.08  0.08 -0.69 -0.01 -0.12 -0.21 0.55 0.45 1.3
## JuiceBars            -0.28 -0.33 -0.21 -0.51  0.04 -0.03 -0.10 0.51 0.49 2.9
## ArtGalleries         -0.34 -0.42 -0.14 -0.10 -0.09 -0.13 -0.12 0.36 0.64 2.8
## DanceClubs           -0.11  0.01  0.01  0.17 -0.26  0.14 -0.11 0.14 0.86 3.2
## SwimmingPools         0.09 -0.03 -0.09  0.08 -0.13  0.54 -0.09 0.34 0.66 1.3
## Gyms                  0.09 -0.11 -0.15  0.04 -0.07  0.72 -0.03 0.57 0.43 1.2
## Bakeries              0.06 -0.19 -0.34 -0.01 -0.06  0.37  0.14 0.31 0.69 2.9
## BeautySpas            0.00 -0.21 -0.31  0.17 -0.19  0.03  0.08 0.21 0.79 3.4
## Cafes                 0.24 -0.24 -0.20  0.37 -0.22  0.07 -0.18 0.38 0.62 4.7
## ViewPoints            0.46  0.15 -0.10  0.37 -0.42 -0.09 -0.15 0.59 0.41 3.6
## Monuments             0.58  0.05 -0.07  0.23 -0.19  0.00 -0.07 0.44 0.56 1.6
## Gardens               0.68 -0.01 -0.15  0.11 -0.02  0.15  0.08 0.52 0.48 1.3
## 
##                        MR1  MR2  MR4  MR3  MR6  MR5  MR7
## SS loadings           2.26 2.03 1.95 1.67 1.59 1.27 0.98
## Proportion Var        0.09 0.08 0.08 0.07 0.07 0.05 0.04
## Cumulative Var        0.09 0.18 0.26 0.33 0.40 0.45 0.49
## Proportion Explained  0.19 0.17 0.17 0.14 0.14 0.11 0.08
## Cumulative Proportion 0.19 0.37 0.53 0.67 0.81 0.92 1.00
## 
## Mean item complexity =  2.3
## Test of the hypothesis that 7 factors are sufficient.
## 
## The degrees of freedom for the null model are  276  and the objective function was  28.93 with Chi Square of  107452.8
## The degrees of freedom for the model are 129  and the objective function was  21.66 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  3724 with the empirical chi square  1723.41  with prob <  9.9e-277 
## The total number of observations was  3724  with Likelihood Chi Square =  80359.23  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  -0.604
## RMSEA index =  0.409  and the 90 % confidence intervals are  0.406 0.411
## BIC =  79298.52
## Fit based upon off diagonal values = 0.98
print(fit2$loadings, cutoff = 0.3, sort = T)
## 
## Loadings:
##                      MR1    MR2    MR4    MR3    MR6    MR5    MR7   
## Churches              0.591                                          
## Monuments             0.581                                          
## Gardens               0.675                                          
## Parks                        0.741                                   
## Theatres                     0.815                                   
## Zoo                                 0.533         0.482              
## Pubs_Bars            -0.316         0.709                            
## LocalServices                       0.634                            
## Burger_PizzaShops                         -0.564                     
## Hotels_OtherLodgings                      -0.689                     
## JuiceBars                   -0.329        -0.511                     
## Museums                      0.450                0.621              
## Malls                -0.382                       0.610              
## SwimmingPools                                            0.541       
## Gyms                                                     0.718       
## Resorts                                                         0.659
## Beaches                                                         0.445
## Restaurants          -0.473         0.421         0.342              
## ArtGalleries         -0.344 -0.424                                   
## DanceClubs                                                           
## Bakeries                           -0.337                0.368       
## BeautySpas                         -0.306                            
## Cafes                                      0.370                     
## ViewPoints            0.461                0.371 -0.423              
## 
##                  MR1   MR2   MR4   MR3   MR6   MR5   MR7
## SS loadings    2.261 2.031 1.951 1.672 1.587 1.271 0.980
## Proportion Var 0.094 0.085 0.081 0.070 0.066 0.053 0.041
## Cumulative Var 0.094 0.179 0.260 0.330 0.396 0.449 0.490
fit3 <- fa(X, nfactors = 7, rotate = "varimax", fm = "ols")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done

## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
fit3
## Factor Analysis using method =  ols
## Call: fa(r = X, nfactors = 7, rotate = "varimax", fm = "ols")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                          1     2     3     4     5     6     7   h2   u2 com
## Churches              0.59 -0.06 -0.22  0.15 -0.06  0.06  0.27 0.50 0.50 1.9
## Resorts               0.06  0.00 -0.11  0.16  0.03 -0.02  0.66 0.48 0.52 1.2
## Beaches               0.11  0.27 -0.21  0.12 -0.13 -0.12  0.45 0.37 0.63 2.9
## Parks                -0.01  0.74 -0.02  0.11 -0.18 -0.15  0.13 0.63 0.37 1.3
## Theatres              0.01  0.82 -0.06 -0.02  0.14 -0.12 -0.02 0.70 0.30 1.1
## Museums              -0.18  0.45 -0.09  0.12  0.62 -0.12 -0.15 0.68 0.32 2.4
## Malls                -0.38 -0.06  0.23 -0.07  0.61 -0.15 -0.12 0.61 0.39 2.3
## Zoo                  -0.24 -0.05  0.53  0.03  0.48 -0.16 -0.04 0.60 0.40 2.6
## Restaurants          -0.47 -0.20  0.42  0.14  0.34 -0.25 -0.07 0.65 0.35 4.1
## Pubs_Bars            -0.32 -0.15  0.71  0.00  0.04 -0.21 -0.11 0.68 0.32 1.8
## LocalServices        -0.08  0.02  0.63 -0.30 -0.07 -0.13 -0.12 0.54 0.46 1.7
## Burger_PizzaShops    -0.17 -0.02  0.18 -0.56  0.04  0.01 -0.11 0.39 0.61 1.5
## Hotels_OtherLodgings -0.09 -0.08  0.08 -0.69 -0.01 -0.12 -0.21 0.55 0.45 1.3
## JuiceBars            -0.28 -0.33 -0.21 -0.51  0.04 -0.03 -0.10 0.51 0.49 2.9
## ArtGalleries         -0.34 -0.42 -0.14 -0.10 -0.09 -0.13 -0.12 0.36 0.64 2.8
## DanceClubs           -0.11  0.01  0.01  0.17 -0.26  0.14 -0.11 0.14 0.86 3.2
## SwimmingPools         0.09 -0.03 -0.09  0.08 -0.13  0.54 -0.09 0.34 0.66 1.3
## Gyms                  0.09 -0.11 -0.15  0.04 -0.07  0.72 -0.03 0.57 0.43 1.2
## Bakeries              0.06 -0.19 -0.34 -0.01 -0.06  0.37  0.14 0.31 0.69 2.9
## BeautySpas            0.00 -0.21 -0.31  0.17 -0.19  0.03  0.08 0.21 0.79 3.4
## Cafes                 0.24 -0.24 -0.20  0.37 -0.22  0.07 -0.18 0.38 0.62 4.7
## ViewPoints            0.46  0.15 -0.10  0.37 -0.42 -0.09 -0.15 0.59 0.41 3.6
## Monuments             0.58  0.05 -0.07  0.23 -0.19  0.00 -0.07 0.44 0.56 1.6
## Gardens               0.68 -0.01 -0.15  0.11 -0.02  0.15  0.08 0.52 0.48 1.3
## 
##                       [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## SS loadings           2.26 2.03 1.95 1.67 1.59 1.27 0.98
## Proportion Var        0.09 0.08 0.08 0.07 0.07 0.05 0.04
## Cumulative Var        0.09 0.18 0.26 0.33 0.40 0.45 0.49
## Proportion Explained  0.19 0.17 0.17 0.14 0.14 0.11 0.08
## Cumulative Proportion 0.19 0.37 0.53 0.67 0.81 0.92 1.00
## 
## Mean item complexity =  2.3
## Test of the hypothesis that 7 factors are sufficient.
## 
## The degrees of freedom for the null model are  276  and the objective function was  28.93 with Chi Square of  107452.8
## The degrees of freedom for the model are 129  and the objective function was  21.66 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  3724 with the empirical chi square  1723.41  with prob <  9.9e-277 
## The total number of observations was  3724  with Likelihood Chi Square =  80359.23  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  -0.604
## RMSEA index =  0.409  and the 90 % confidence intervals are  0.406 0.411
## BIC =  79298.52
## Fit based upon off diagonal values = 0.98
print(fit3$loadings, cutoff = 0.3, sort = T)
## 
## Loadings:
##                      [,1]   [,2]   [,3]   [,4]   [,5]   [,6]   [,7]  
## Churches              0.591                                          
## Monuments             0.581                                          
## Gardens               0.675                                          
## Parks                        0.741                                   
## Theatres                     0.815                                   
## Zoo                                 0.533         0.482              
## Pubs_Bars            -0.316         0.709                            
## LocalServices                       0.634                            
## Burger_PizzaShops                         -0.564                     
## Hotels_OtherLodgings                      -0.689                     
## JuiceBars                   -0.329        -0.511                     
## Museums                      0.450                0.621              
## Malls                -0.382                       0.610              
## SwimmingPools                                            0.541       
## Gyms                                                     0.718       
## Resorts                                                         0.659
## Beaches                                                         0.445
## Restaurants          -0.473         0.421         0.342              
## ArtGalleries         -0.344 -0.424                                   
## DanceClubs                                                           
## Bakeries                           -0.337                0.368       
## BeautySpas                         -0.306                            
## Cafes                                      0.370                     
## ViewPoints            0.461                0.371 -0.423              
## 
##                 [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]
## SS loadings    2.261 2.031 1.951 1.672 1.587 1.271 0.980
## Proportion Var 0.094 0.085 0.081 0.070 0.066 0.053 0.041
## Cumulative Var 0.094 0.179 0.260 0.330 0.396 0.449 0.490
fit4 <- fa(X, nfactors=8, rotate="varimax", fm = "pa")
## Warning in cor.smooth(R): Matrix was not positive definite, smoothing was done
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## In factor.scores, the correlation matrix is singular, an approximation is used
## Warning in cor.smooth(r): Matrix was not positive definite, smoothing was done
fit4 # print results
## Factor Analysis using method =  pa
## Call: fa(r = X, nfactors = 8, rotate = "varimax", fm = "pa")
## Standardized loadings (pattern matrix) based upon correlation matrix
##                        PA2   PA1   PA4   PA3   PA8   PA5   PA7   PA6   h2   u2
## Churches             -0.04  0.56 -0.21  0.18 -0.15  0.07 -0.24  0.12 0.49 0.51
## Resorts               0.01  0.08 -0.10  0.14  0.00 -0.02 -0.67  0.05 0.48 0.52
## Beaches               0.28  0.06 -0.20  0.14 -0.19 -0.10 -0.42  0.09 0.38 0.62
## Parks                 0.73 -0.05 -0.02  0.12 -0.17 -0.14 -0.12 -0.06 0.62 0.38
## Theatres              0.82  0.03 -0.07 -0.04  0.16 -0.13  0.01 -0.03 0.73 0.27
## Museums               0.44 -0.16 -0.11  0.11  0.60 -0.13  0.14  0.18 0.68 0.32
## Malls                -0.06 -0.33  0.21 -0.09  0.62 -0.17  0.10  0.14 0.61 0.39
## Zoo                  -0.05 -0.23  0.52  0.03  0.47 -0.17  0.04  0.17 0.61 0.39
## Restaurants          -0.21 -0.46  0.41  0.13  0.38 -0.25  0.06  0.07 0.66 0.34
## Pubs_Bars            -0.15 -0.30  0.70 -0.02  0.11 -0.22  0.09 -0.08 0.68 0.32
## LocalServices         0.02 -0.12  0.64 -0.26 -0.09 -0.13  0.15  0.05 0.54 0.46
## Burger_PizzaShops    -0.02 -0.11  0.19 -0.61  0.07 -0.02  0.11 -0.05 0.44 0.56
## Hotels_OtherLodgings -0.08 -0.13  0.09 -0.63 -0.10 -0.11  0.27  0.22 0.58 0.42
## JuiceBars            -0.33 -0.28 -0.21 -0.49  0.00 -0.02  0.13  0.13 0.50 0.50
## ArtGalleries         -0.45 -0.27 -0.16 -0.17  0.02 -0.17  0.07 -0.27 0.44 0.56
## DanceClubs            0.00 -0.06  0.01  0.10 -0.13  0.12  0.06 -0.36 0.18 0.82
## SwimmingPools        -0.03  0.11 -0.08  0.05 -0.07  0.53  0.07 -0.16 0.34 0.66
## Gyms                 -0.11  0.09 -0.14  0.03 -0.05  0.71  0.03 -0.05 0.56 0.44
## Bakeries             -0.18  0.00 -0.33  0.03 -0.13  0.42 -0.11  0.16 0.37 0.63
## BeautySpas           -0.21 -0.03 -0.30  0.18 -0.20  0.06 -0.08 -0.01 0.22 0.78
## Cafes                -0.24  0.28 -0.20  0.34 -0.15  0.06  0.14 -0.24 0.39 0.61
## ViewPoints            0.16  0.36 -0.09  0.47 -0.51 -0.06  0.21  0.01 0.69 0.31
## Monuments             0.06  0.54 -0.06  0.26 -0.23  0.02  0.07 -0.01 0.43 0.57
## Gardens               0.00  0.71 -0.13  0.10 -0.06  0.14 -0.08  0.03 0.57 0.43
##                      com
## Churches             2.3
## Resorts              1.2
## Beaches              3.4
## Parks                1.3
## Theatres             1.2
## Museums              2.7
## Malls                2.3
## Zoo                  2.9
## Restaurants          4.3
## Pubs_Bars            1.8
## LocalServices        1.7
## Burger_PizzaShops    1.4
## Hotels_OtherLodgings 2.0
## JuiceBars            3.3
## ArtGalleries         3.6
## DanceClubs           1.8
## SwimmingPools        1.4
## Gyms                 1.2
## Bakeries             3.1
## BeautySpas           3.7
## Cafes                5.4
## ViewPoints           3.5
## Monuments            2.0
## Gardens              1.2
## 
##                        PA2  PA1  PA4  PA3  PA8  PA5  PA7  PA6
## SS loadings           2.06 2.05 1.89 1.71 1.67 1.31 0.97 0.51
## Proportion Var        0.09 0.09 0.08 0.07 0.07 0.05 0.04 0.02
## Cumulative Var        0.09 0.17 0.25 0.32 0.39 0.45 0.49 0.51
## Proportion Explained  0.17 0.17 0.16 0.14 0.14 0.11 0.08 0.04
## Cumulative Proportion 0.17 0.34 0.49 0.63 0.77 0.88 0.96 1.00
## 
## Mean item complexity =  2.4
## Test of the hypothesis that 8 factors are sufficient.
## 
## The degrees of freedom for the null model are  276  and the objective function was  28.93 with Chi Square of  107452.8
## The degrees of freedom for the model are 112  and the objective function was  21.55 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.04 
## 
## The harmonic number of observations is  3724 with the empirical chi square  1431.89  with prob <  1e-227 
## The total number of observations was  3724  with Likelihood Chi Square =  79919.21  with prob <  0 
## 
## Tucker Lewis Index of factoring reliability =  -0.838
## RMSEA index =  0.437  and the 90 % confidence intervals are  0.435 0.44
## BIC =  78998.28
## Fit based upon off diagonal values = 0.99
print(fit4$loadings, cutoff = 0.3, sort = T)
## 
## Loadings:
##                      PA2    PA1    PA4    PA3    PA8    PA5    PA7    PA6   
## Parks                 0.734                                                 
## Theatres              0.822                                                 
## Churches                     0.560                                          
## Monuments                    0.540                                          
## Gardens                      0.711                                          
## Zoo                                 0.521         0.469                     
## Pubs_Bars                   -0.304  0.698                                   
## LocalServices                       0.635                                   
## Burger_PizzaShops                         -0.611                            
## Hotels_OtherLodgings                      -0.635                            
## Museums               0.443                       0.602                     
## Malls                       -0.331                0.621                     
## ViewPoints                   0.356         0.472 -0.509                     
## SwimmingPools                                            0.530              
## Gyms                                                     0.712              
## Resorts                                                        -0.666       
## Beaches                                                        -0.424       
## Restaurants                 -0.464  0.406         0.377                     
## JuiceBars            -0.333               -0.488                            
## ArtGalleries         -0.451                                                 
## DanceClubs                                                            -0.365
## Bakeries                           -0.330                0.419              
## BeautySpas                                                                  
## Cafes                                      0.339                            
## 
##                  PA2   PA1   PA4   PA3   PA8   PA5   PA7   PA6
## SS loadings    2.063 2.045 1.889 1.705 1.673 1.306 0.971 0.511
## Proportion Var 0.086 0.085 0.079 0.071 0.070 0.054 0.040 0.021
## Cumulative Var 0.086 0.171 0.250 0.321 0.391 0.445 0.486 0.507
# Interpretation of Factors


plot(fit$loadings[,1:2], type="n") 
text(fit$loadings[,1:2], labels = names(X), cex = 0.7)
abline(h = 0, v = 0)

plot(fit2$loadings[,1:2], type="n") 
text(fit2$loadings[,1:2], labels = names(X), cex = 0.7)
abline(h = 0, v = 0)

plot(fit3$loadings[,1:2], type="n") 
text(fit3$loadings[,1:2], labels = names(X), cex = 0.7)
abline(h = 0, v = 0)

plot(fit4$loadings[,1:2], type="n") 
text(fit4$loadings[,1:2], labels = names(X), cex = 0.7)
abline(h = 0, v = 0)

# Determine Number of Factors to Extract
ev <- eigen(cor(X)) # get eigenvalues
ap <- parallel(subject=nrow(X),var=ncol(X),
               rep=100,cent=.05)
nS <- nScree(x=ev$values, aparallel=ap$eigen$qevpea)
plotnScree(nS)

# Well at least this supports my PCA thoughts




fa.diagram(fit3)