Packages Required:

library(psych)
library(readr)
library(prettydoc)
library(tidyverse)
library(FactoMineR)
library(car)
library(skimr)
library(stats)
library(purrr)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(fig.width=12, fig.height=8)

# load dataset
download.file("https://www.dropbox.com/s/6yoi7hoewg6563n/Hospital.csv?dl=1", "hospital.csv")
hospital <- read_csv("hospital.csv")
df <- hospital  # to simplify and minimize amount of changes to make to original code

Data Prep to remove demographic variables

df <- df[-c(15:19)]

###Factor analysis step 2: Check correlation matrix

cor(df)

##             reputation    quality     distance     doctor    friends
## reputation   1.0000000 -0.5642245 -0.128171212  0.8574469 -0.6640828
## quality     -0.5642245  1.0000000 -0.205235351 -0.5724863  0.8389557
## distance    -0.1281712 -0.2052354  1.000000000 -0.1281710 -0.2177883
## doctor       0.8574469 -0.5724863 -0.128171001  1.0000000 -0.6636600
## friends     -0.6640828  0.8389557 -0.217788263 -0.6636600  1.0000000
## ads         -0.1823649 -0.3287447 -0.036027958 -0.1876315 -0.2571390
## staff       -0.6748534  0.8718534 -0.192615781 -0.6711862  0.9174313
## facilities   0.8576301 -0.5670271 -0.131866954  0.8434716 -0.6285909
## nursery      0.8337463 -0.5627049 -0.097004128  0.8395771 -0.6534683
## insurance   -0.2142136 -0.3015886 -0.003761886 -0.2047735 -0.2413311
## familiar     0.7510432 -0.1803307 -0.308038301  0.7077666 -0.2568143
## convenience  0.7323836 -0.1794451 -0.332817095  0.7072520 -0.2540840
## fullservice -0.6697384  0.3296036 -0.192999920 -0.6617422  0.4275858
## qualcare    -0.2777240  0.7503848 -0.387940545 -0.2622053  0.7484171
##                     ads      staff facilities     nursery    insurance
## reputation  -0.18236493 -0.6748534  0.8576301  0.83374630 -0.214213613
## quality     -0.32874471  0.8718534 -0.5670271 -0.56270487 -0.301588552
## distance    -0.03602796 -0.1926158 -0.1318670 -0.09700413 -0.003761886
## doctor      -0.18763150 -0.6711862  0.8434716  0.83957713 -0.204773548
## friends     -0.25713904  0.9174313 -0.6285909 -0.65346832 -0.241331110
## ads          1.00000000 -0.2790169 -0.1948477 -0.16296383  0.797335879
## staff       -0.27901685  1.0000000 -0.6422828 -0.66544636 -0.279254861
## facilities  -0.19484770 -0.6422828  1.0000000  0.81963063 -0.231850433
## nursery     -0.16296383 -0.6654464  0.8196306  1.00000000 -0.190847002
## insurance    0.79733588 -0.2792549 -0.2318504 -0.19084700  1.000000000
## familiar    -0.50867966 -0.2619830  0.7592179  0.69501094 -0.546273691
## convenience -0.49395653 -0.2445710  0.7375454  0.69562282 -0.513095934
## fullservice  0.56239074  0.4221076 -0.6677868 -0.64350090  0.553869069
## qualcare    -0.52456448  0.7806046 -0.2370941 -0.27191100 -0.522785030
##               familiar convenience fullservice    qualcare
## reputation   0.7510432   0.7323836 -0.66973840 -0.27772397
## quality     -0.1803307  -0.1794451  0.32960364  0.75038480
## distance    -0.3080383  -0.3328171 -0.19299992 -0.38794054
## doctor       0.7077666   0.7072520 -0.66174222 -0.26220535
## friends     -0.2568143  -0.2540840  0.42758583  0.74841713
## ads         -0.5086797  -0.4939565  0.56239074 -0.52456448
## staff       -0.2619830  -0.2445710  0.42210762  0.78060456
## facilities   0.7592179   0.7375454 -0.66778683 -0.23709408
## nursery      0.6950109   0.6956228 -0.64350090 -0.27191100
## insurance   -0.5462737  -0.5130959  0.55386907 -0.52278503
## familiar     1.0000000   0.8776665 -0.69536120  0.20000669
## convenience  0.8776665   1.0000000 -0.65626098  0.19837522
## fullservice -0.6953612  -0.6562610  1.00000000  0.08092793
## qualcare     0.2000067   0.1983752  0.08092793  1.00000000

#Factor analysis step 3: Conduct Principal Components Analysis

pca <-PCA(df, scale. = TRUE)

summary(pca)

## 
## Call:
## PCA(X = df, scale.unit = TRUE) 
## 
## 
## Eigenvalues
##                        Dim.1   Dim.2   Dim.3   Dim.4   Dim.5   Dim.6   Dim.7
## Variance               6.977   3.896   1.405   0.235   0.219   0.204   0.185
## % of var.             49.833  27.825  10.036   1.676   1.562   1.454   1.319
## Cumulative % of var.  49.833  77.658  87.694  89.370  90.932  92.386  93.705
##                        Dim.8   Dim.9  Dim.10  Dim.11  Dim.12  Dim.13  Dim.14
## Variance               0.166   0.158   0.142   0.127   0.119   0.104   0.066
## % of var.              1.187   1.126   1.013   0.908   0.847   0.741   0.475
## Cumulative % of var.  94.892  96.017  97.030  97.938  98.784  99.525 100.000
## 
## Individuals (the 10 first)
##                 Dist    Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3
## 1           |  3.590 |  3.377  0.605  0.885 | -0.077  0.001  0.000 |  0.185
## 2           |  5.235 | -0.331  0.006  0.004 | -2.982  0.846  0.325 | -3.949
## 3           |  3.910 |  3.673  0.716  0.883 |  0.249  0.006  0.004 | -0.104
## 4           |  3.555 |  3.299  0.578  0.861 |  0.272  0.007  0.006 | -0.179
## 5           |  3.899 |  3.700  0.727  0.901 |  0.525  0.026  0.018 | -0.019
## 6           |  3.776 |  3.532  0.662  0.875 |  0.543  0.028  0.021 | -0.031
## 7           |  2.668 | -2.115  0.237  0.629 |  1.169  0.130  0.192 | -0.070
## 8           |  2.378 | -1.955  0.203  0.676 |  0.825  0.065  0.120 | -0.152
## 9           |  6.016 | -2.640  0.370  0.193 | -5.026  2.402  0.698 |  1.318
## 10          |  3.931 |  3.557  0.672  0.819 | -0.293  0.008  0.006 |  0.591
##                ctr   cos2  
## 1            0.009  0.003 |
## 2            4.111  0.569 |
## 3            0.003  0.001 |
## 4            0.008  0.003 |
## 5            0.000  0.000 |
## 6            0.000  0.000 |
## 7            0.001  0.001 |
## 8            0.006  0.004 |
## 9            0.458  0.048 |
## 10           0.092  0.023 |
## 
## Variables (the 10 first)
##                Dim.1    ctr   cos2    Dim.2    ctr   cos2    Dim.3    ctr
## reputation  |  0.932 12.446  0.868 |  0.051  0.068  0.003 |  0.112  0.885
## quality     | -0.677  6.578  0.459 |  0.619  9.822  0.383 |  0.047  0.157
## distance    | -0.030  0.013  0.001 | -0.333  2.851  0.111 | -0.894 56.821
## doctor      |  0.922 12.174  0.849 |  0.043  0.047  0.002 |  0.108  0.823
## friends     | -0.761  8.296  0.579 |  0.563  8.138  0.317 |  0.081  0.462
## ads         | -0.204  0.597  0.042 | -0.813 16.962  0.661 |  0.400 11.375
## staff       | -0.769  8.470  0.591 |  0.587  8.834  0.344 |  0.048  0.161
## facilities  |  0.921 12.154  0.848 |  0.079  0.160  0.006 |  0.114  0.931
## nursery     |  0.907 11.786  0.822 |  0.027  0.019  0.001 |  0.095  0.648
## insurance   | -0.230  0.756  0.053 | -0.818 17.159  0.668 |  0.357  9.088
##               cos2  
## reputation   0.012 |
## quality      0.002 |
## distance     0.798 |
## doctor       0.012 |
## friends      0.006 |
## ads          0.160 |
## staff        0.002 |
## facilities   0.013 |
## nursery      0.009 |
## insurance    0.128 |

principal(df,nfactors=3,residuals=TRUE,rotate = "varimax",scores=TRUE)

## Principal Components Analysis
## Call: principal(r = df, nfactors = 3, residuals = TRUE, rotate = "varimax", 
##     scores = TRUE)
## Standardized loadings (pattern matrix) based upon correlation matrix
##               RC1   RC2   RC3   h2    u2 com
## reputation   0.94  0.06  0.01 0.88 0.117 1.0
## quality     -0.67  0.50  0.38 0.84 0.156 2.5
## distance    -0.15  0.06 -0.94 0.91 0.090 1.1
## doctor       0.93  0.06  0.00 0.86 0.137 1.0
## friends     -0.75  0.43  0.40 0.90 0.098 2.2
## ads         -0.14 -0.92  0.04 0.86 0.138 1.0
## staff       -0.76  0.46  0.38 0.94 0.063 2.2
## facilities   0.93  0.09  0.02 0.87 0.133 1.0
## nursery      0.91  0.05 -0.01 0.83 0.168 1.0
## insurance   -0.17 -0.91  0.01 0.85 0.151 1.1
## familiar     0.77  0.48  0.27 0.89 0.105 2.0
## convenience  0.76  0.45  0.30 0.88 0.122 2.0
## fullservice -0.70 -0.52  0.33 0.87 0.131 2.3
## qualcare    -0.34  0.67  0.56 0.89 0.115 2.5
## 
##                        RC1  RC2  RC3
## SS loadings           6.87 3.48 1.92
## Proportion Var        0.49 0.25 0.14
## Cumulative Var        0.49 0.74 0.88
## Proportion Explained  0.56 0.28 0.16
## Cumulative Proportion 0.56 0.84 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 3 components are sufficient.
## 
## The root mean square of the residuals (RMSR) is  0.02 
##  with the empirical chi square  19.98  with prob <  1 
## 
## Fit based upon off diagonal values = 1

Factor analysis step 4: Determine the number of factors

Check the % var. explained output from the call: summary(pca)

#Factor analysis step 7a: Calculate factor scores
#First calculate eigenvalues and eigenvectors
df.cov <-cov(df)
df.eigen <- eigen(df.cov)
str(df.eigen)

## List of 2
##  $ values : num [1:14] 15.518 6.041 1.236 0.523 0.43 ...
##  $ vectors: num [1:14, 1:14] 0.36598 -0.29733 -0.00374 0.38539 -0.32329 ...
##  - attr(*, "class")= chr "eigen"

#Now extract eigenvalues
(phi<-df.eigen$vectors[,1:3])

##               [,1]        [,2]        [,3]
##  [1,]  0.365977271  0.10673110  0.11288937
##  [2,] -0.297330001  0.35797404  0.08869184
##  [3,] -0.003740584 -0.11103984 -0.54120632
##  [4,]  0.385389235  0.10441532  0.11492342
##  [5,] -0.323287506  0.30781220  0.13737424
##  [6,] -0.025618333 -0.33477705  0.44780216
##  [7,] -0.325132825  0.31725125  0.08375900
##  [8,]  0.357624565  0.12246177  0.12115846
##  [9,]  0.363341486  0.09124877  0.11070834
## [10,] -0.031914466 -0.33127856  0.39503048
## [11,]  0.258666710  0.37738086  0.11866565
## [12,]  0.197159692  0.28547680  0.13011535
## [13,] -0.185043938 -0.16085964  0.45440443
## [14,] -0.131459580  0.38327590  0.16243255

phi<- -phi
row.names(phi) <- c("v1","v2","v3","v4","v5","v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14")
colnames(phi) <- c("PC1", "PC2", "PC3")

#Finally, calculate principal components scores
PC1 <- as.matrix(df) %*%phi[,1]
PC2 <- as.matrix(df) %*%phi[,2]
PC3 <- as.matrix(df) %*%phi[,3]
#Store the principal components scores
PC <- data.frame(PC1,PC2,PC3)

Q3

Cluster analysis step 4: Decide on the number of clusters

#Check the plots output for the call: plot() on line 20
#Conduct k means cluster analysis to store cluster assignments
set.seed(123)
kmeans3.df <- kmeans(df, 3,nstart = 25)
print(kmeans3.df)

## K-means clustering with 3 clusters of sizes 133, 96, 41
## 
## Cluster means:
##   reputation  quality distance   doctor  friends      ads    staff facilities
## 1   2.473684 5.511278 2.774436 2.127820 5.082707 2.172932 5.804511   2.406015
## 2   5.500000 2.739583 2.812500 5.281250 2.104167 2.197917 2.802083   5.343750
## 3   2.487805 2.487805 3.804878 2.195122 2.243902 4.219512 2.853659   2.268293
##    nursery insurance familiar convenience fullservice qualcare
## 1 2.157895  2.654135 3.661654    3.917293    4.000000 5.218045
## 2 5.156250  2.625000 5.614583    5.406250    2.531250 3.822917
## 3 2.317073  4.658537 1.560976    2.292683    4.487805 2.219512
## 
## Clustering vector:
##   [1] 2 3 2 2 2 2 1 1 3 2 1 2 1 1 1 2 1 2 1 1 2 2 1 3 1 2 1 3 1 2 2 3 2 2 2 1 1
##  [38] 3 3 3 2 2 2 2 1 1 2 1 1 1 1 3 1 1 1 1 1 1 1 1 1 2 1 3 1 1 2 1 3 2 1 1 1 1
##  [75] 2 3 2 1 1 2 3 1 1 2 1 1 1 3 2 2 2 2 1 2 2 1 1 1 1 3 3 1 1 3 1 1 1 1 2 1 2
## [112] 1 1 1 2 1 1 1 1 1 1 3 1 2 2 2 3 1 1 1 3 1 3 2 1 2 2 3 1 2 2 1 2 2 2 2 2 1
## [149] 1 1 1 3 3 1 3 3 2 1 2 1 1 1 1 2 2 2 1 3 1 1 3 1 1 3 2 2 2 2 2 1 2 2 3 1 2
## [186] 1 2 1 1 1 1 1 1 2 1 2 2 1 1 2 2 1 1 1 1 3 1 2 2 3 3 2 1 1 1 1 1 1 2 2 2 1
## [223] 2 2 2 2 1 3 2 2 3 2 3 2 1 2 3 1 2 2 1 3 3 2 1 2 1 2 3 1 1 1 3 1 1 1 2 1 1
## [260] 2 1 1 1 2 1 2 2 1 1 2
## 
## Within cluster sum of squares by cluster:
## [1] 494.8722 368.5625 503.0732
##  (between_SS / total_SS =  80.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

Q4

Cluster analysis step 4: Decide on the number of clusters

#Check the plots output for the call: plot() on line 20
#Conduct k means cluster analysis to store cluster assignments
set.seed(123)
kmeans4.df <- kmeans(df, 4,nstart = 25)
print(kmeans4.df)

## K-means clustering with 4 clusters of sizes 28, 133, 13, 96
## 
## Cluster means:
##   reputation  quality distance   doctor  friends      ads    staff facilities
## 1   2.500000 2.357143 2.892857 2.178571 2.321429 5.214286 2.857143   2.285714
## 2   2.473684 5.511278 2.774436 2.127820 5.082707 2.172932 5.804511   2.406015
## 3   2.461538 2.769231 5.769231 2.230769 2.076923 2.076923 2.846154   2.230769
## 4   5.500000 2.739583 2.812500 5.281250 2.104167 2.197917 2.802083   5.343750
##    nursery insurance familiar convenience fullservice qualcare
## 1 2.321429  5.571429 1.464286    2.285714    5.464286 2.285714
## 2 2.157895  2.654135 3.661654    3.917293    4.000000 5.218045
## 3 2.307692  2.692308 1.769231    2.307692    2.384615 2.076923
## 4 5.156250  2.625000 5.614583    5.406250    2.531250 3.822917
## 
## Clustering vector:
##   [1] 4 3 4 4 4 4 2 2 1 4 2 4 2 2 2 4 2 4 2 2 4 4 2 3 2 4 2 3 2 4 4 1 4 4 4 2 2
##  [38] 3 3 3 4 4 4 4 2 2 4 2 2 2 2 1 2 2 2 2 2 2 2 2 2 4 2 1 2 2 4 2 1 4 2 2 2 2
##  [75] 4 1 4 2 2 4 1 2 2 4 2 2 2 1 4 4 4 4 2 4 4 2 2 2 2 3 1 2 2 1 2 2 2 2 4 2 4
## [112] 2 2 2 4 2 2 2 2 2 2 1 2 4 4 4 3 2 2 2 1 2 1 4 2 4 4 1 2 4 4 2 4 4 4 4 4 2
## [149] 2 2 2 1 3 2 1 1 4 2 4 2 2 2 2 4 4 4 2 1 2 2 1 2 2 3 4 4 4 4 4 2 4 4 1 2 4
## [186] 2 4 2 2 2 2 2 2 4 2 4 4 2 2 4 4 2 2 2 2 1 2 4 4 1 3 4 2 2 2 2 2 2 4 4 4 2
## [223] 4 4 4 4 2 1 4 4 1 4 3 4 2 4 1 2 4 4 2 3 1 4 2 4 2 4 1 2 2 2 1 2 2 2 4 2 2
## [260] 4 2 2 2 4 2 4 4 2 2 4
## 
## Within cluster sum of squares by cluster:
## [1] 132.50000 494.87218  48.61538 368.56250
##  (between_SS / total_SS =  85.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

# a. Which clusters has the most number of prospective consumers below age of 25?
dfa <-
hospital %>% 
  filter(age < 25) %>% 
  select(-c(15:19))

set.seed(123)
kmeans4a.df <- kmeans(dfa, 4, nstart = 25)
print(kmeans4a.df)

## K-means clustering with 4 clusters of sizes 28, 133, 13, 96
## 
## Cluster means:
##   reputation  quality distance   doctor  friends      ads    staff facilities
## 1   2.500000 2.357143 2.892857 2.178571 2.321429 5.214286 2.857143   2.285714
## 2   2.473684 5.511278 2.774436 2.127820 5.082707 2.172932 5.804511   2.406015
## 3   2.461538 2.769231 5.769231 2.230769 2.076923 2.076923 2.846154   2.230769
## 4   5.500000 2.739583 2.812500 5.281250 2.104167 2.197917 2.802083   5.343750
##    nursery insurance familiar convenience fullservice qualcare
## 1 2.321429  5.571429 1.464286    2.285714    5.464286 2.285714
## 2 2.157895  2.654135 3.661654    3.917293    4.000000 5.218045
## 3 2.307692  2.692308 1.769231    2.307692    2.384615 2.076923
## 4 5.156250  2.625000 5.614583    5.406250    2.531250 3.822917
## 
## Clustering vector:
##   [1] 4 3 4 4 4 4 2 2 1 4 2 4 2 2 2 4 2 4 2 2 4 4 2 3 2 4 2 3 2 4 4 1 4 4 4 2 2
##  [38] 3 3 3 4 4 4 4 2 2 4 2 2 2 2 1 2 2 2 2 2 2 2 2 2 4 2 1 2 2 4 2 1 4 2 2 2 2
##  [75] 4 1 4 2 2 4 1 2 2 4 2 2 2 1 4 4 4 4 2 4 4 2 2 2 2 3 1 2 2 1 2 2 2 2 4 2 4
## [112] 2 2 2 4 2 2 2 2 2 2 1 2 4 4 4 3 2 2 2 1 2 1 4 2 4 4 1 2 4 4 2 4 4 4 4 4 2
## [149] 2 2 2 1 3 2 1 1 4 2 4 2 2 2 2 4 4 4 2 1 2 2 1 2 2 3 4 4 4 4 4 2 4 4 1 2 4
## [186] 2 4 2 2 2 2 2 2 4 2 4 4 2 2 4 4 2 2 2 2 1 2 4 4 1 3 4 2 2 2 2 2 2 4 4 4 2
## [223] 4 4 4 4 2 1 4 4 1 4 3 4 2 4 1 2 4 4 2 3 1 4 2 4 2 4 1 2 2 2 1 2 2 2 4 2 2
## [260] 4 2 2 2 4 2 4 4 2 2 4
## 
## Within cluster sum of squares by cluster:
## [1] 132.50000 494.87218  48.61538 368.56250
##  (between_SS / total_SS =  85.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

# b. Which clusters has the most number of prospective consumers  aged between
# 26 and 40?

# getting error messages. Need to re-check code
dfb <-
hospital %>% 
  filter( age <= 40 & age >= 26) %>% 
  select(-c(15:19))

#set.seed(123)
#kmeans4b.df <- kmeans(dfb, 4, nstart = 25)
#print(kmeans4b.df)

(Optional: Rescale the Data (From book))

##    reputation         quality            distance            doctor       
##  Min.   :-1.6356   Min.   :-1.94199   Min.   :-1.24755   Min.   :-1.3678  
##  1st Qu.:-0.9947   1st Qu.:-0.67547   1st Qu.: 0.07339   1st Qu.:-0.7624  
##  Median :-0.3537   Median :-0.04222   Median : 0.07339   Median :-0.1570  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 0.9282   3rd Qu.: 1.22430   3rd Qu.: 0.07339   3rd Qu.: 1.0539  
##  Max.   : 1.5692   Max.   : 1.22430   Max.   : 4.03618   Max.   : 1.6594  
##     friends             ads              staff           facilities     
##  Min.   :-1.6638   Min.   :-1.4388   Min.   :-1.4799   Min.   :-1.5701  
##  1st Qu.:-1.0220   1st Qu.:-0.4748   1st Qu.:-0.8334   1st Qu.:-0.9239  
##  Median :-0.3803   Median :-0.4748   Median :-0.8334   Median :-0.2776  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.9032   3rd Qu.: 0.4891   3rd Qu.: 1.1063   3rd Qu.: 1.0148  
##  Max.   : 1.5449   Max.   : 3.3810   Max.   : 1.1063   Max.   : 1.6610  
##     nursery          insurance           familiar         convenience     
##  Min.   :-1.4199   Min.   :-0.93389   Min.   :-2.08338   Min.   :-1.9365  
##  1st Qu.:-0.7883   1st Qu.:-0.93389   1st Qu.:-0.71140   1st Qu.:-0.1760  
##  Median :-0.1567   Median : 0.05107   Median :-0.02541   Median :-0.1760  
##  Mean   : 0.0000   Mean   : 0.00000   Mean   : 0.00000   Mean   : 0.0000  
##  3rd Qu.: 1.1064   3rd Qu.: 0.05107   3rd Qu.: 0.66058   3rd Qu.: 0.7042  
##  Max.   : 1.7380   Max.   : 3.00597   Max.   : 1.34658   Max.   : 1.5844  
##   fullservice         qualcare      
##  Min.   :-1.4978   Min.   :-1.9464  
##  1st Qu.:-0.5326   1st Qu.:-0.2290  
##  Median : 0.4325   Median :-0.2290  
##  Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.4325   3rd Qu.: 0.6297  
##  Max.   : 2.3628   Max.   : 1.4884

Alternate PCA. same as above

## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.6413 1.9737 1.1853 0.48442 0.46762 0.45116 0.42972
## Proportion of Variance 0.4983 0.2782 0.1004 0.01676 0.01562 0.01454 0.01319
## Cumulative Proportion  0.4983 0.7766 0.8769 0.89370 0.90932 0.92386 0.93705
##                            PC8     PC9    PC10    PC11    PC12    PC13    PC14
## Standard deviation     0.40760 0.39696 0.37652 0.35651 0.34428 0.32213 0.25774
## Proportion of Variance 0.01187 0.01126 0.01013 0.00908 0.00847 0.00741 0.00475
## Cumulative Proportion  0.94892 0.96017 0.97030 0.97938 0.98784 0.99525 1.00000

Scree Plot

The elbow occurs at either component 3 or 4, depending on interpretation, and this suggests that the first 2 or 3 components explain most of the variation in the observed hospital ratings.

Another solution is to use PCA

Cluster analysis step 2 & 3:

Select a distance measure & clustering procedure Plot Dendrogram

library(factoextra)

# First compute the disimilarity matrix
d <- dist(hospital.sc, method = "euclidean")

# Hierarchical clustering using Wards.D
hcl <-hclust(d, method = "ward.D2")

# Plot the obtained dendrogram
plot(hcl, cex = 0.6, hang =  -1, las = 1)

Cut the tree

Cluster Analysis Step 5: Interpret and Profile Clusters

# Check the cluster means output for the call: print() on line 27

Cluster Analysis Step 6: Assess reliability and validity

# Check the within cluster sum of squares output for the call print() on line 27

Hospital Survey Analysis

Ted Dickey

2020-10-01

Packages Required:

Data Prep to remove demographic variables

Factor analysis step 4: Determine the number of factors

Check the % var. explained output from the call: summary(pca)

Q3

Cluster analysis step 4: Decide on the number of clusters

Q4

Cluster analysis step 4: Decide on the number of clusters

(Optional: Rescale the Data (From book))

Alternate PCA. same as above

Cluster analysis step 2 & 3:

Cut the tree

Cluster Analysis Step 5: Interpret and Profile Clusters

Cluster Analysis Step 6: Assess reliability and validity