library(tidyverse)

## ── Attaching packages ──────────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──

## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.4     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.0

## ── Conflicts ─────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

library(ggpubr)      # Publication Ready Plots
library(rstatix)     # Pipe-Friendly Framework for Basic Statistical Tests

## 
## Attaching package: 'rstatix'

## The following object is masked from 'package:stats':
## 
##     filter

library(readxl)
library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

set.seed(123)

lisasdata <- read_excel("/Users/riku/Dropbox/zemizemi/lisas/lisas2.xlsx", sheet = "mat_mh")

library(factoextra)

## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa

rpca <- prcomp(x=lisasdata[2:7],  scale = TRUE)
fviz_eig(rpca)

summary(rpca)

## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     1.9498 1.0283 0.75563 0.62710 0.31046 0.28285
## Proportion of Variance 0.6337 0.1762 0.09516 0.06554 0.01606 0.01333
## Cumulative Proportion  0.6337 0.8099 0.90506 0.97060 0.98667 1.00000

library(Gifi)

lisas_ordinal_load <- lisasdata[2:7] # 2:7 means load part
lisas_ordinal_load$Q1H <- as.ordered(lisas_ordinal_load$Q1H)
lisas_ordinal_load$Q2H <- as.ordered(lisas_ordinal_load$Q2H)
lisas_ordinal_load$Q3H <- as.ordered(lisas_ordinal_load$Q3H)
lisas_ordinal_load$Q1M <- as.ordered(lisas_ordinal_load$Q1M)
lisas_ordinal_load$Q2M <- as.ordered(lisas_ordinal_load$Q2M)
lisas_ordinal_load$Q3M <- as.ordered(lisas_ordinal_load$Q3M)

#levels(lisas_ordinal_load$Q1H) <- c("1","2","3","4","5","6","7","8","9")
#levels(lisas_ordinal_load$Q2H) <- c("1","2","3","4","5","6","7","8","9")
#levels(lisas_ordinal_load$Q3H) <- c("1","2","3","4","5","6","7","8","9")
#levels(lisas_ordinal_load$Q1M) <- c("1","2","3","4","5","6","7","8","9")
#levels(lisas_ordinal_load$Q2M) <- c("1","2","3","4","5","6","7","8","9")
#levels(lisas_ordinal_load$Q3M) <- c("1","2","3","4","5","6","7","8","9")

lisas_ordinal_load_df <- as.data.frame(lisas_ordinal_load)

res_gifi_load <- princals(lisas_ordinal_load_df)
summary(res_gifi_load)

## 
## Loadings (cutoff = 0.1):
##     Comp1  Comp2 
## Q1H  0.995 -0.103
## Q2H  0.996       
## Q1M  0.995 -0.104
## Q2M  0.995 -0.104
## Q3M  0.995 -0.104
## Q3H  0.701  0.713
## 
## Importance (Variance Accounted For):
##                  Comp1    Comp2
## Eigenvalues     5.4404   0.5596
## VAF            90.6729   9.3265
## Cumulative VAF 90.6700 100.0000

lisas_ordinal_omo <- lisasdata[8:9] # 2:7 means load part
lisas_ordinal_omo$Q4H <- as.ordered(lisas_ordinal_omo$Q4H)
lisas_ordinal_omo$Q4M <- as.ordered(lisas_ordinal_omo$Q4M)


#levels(lisas_ordinal_omo$Q4H) <- c("1","2","3","4","5","6","7","8","9")
#levels(lisas_ordinal_omo$Q4M) <- c("1","2","3","4","5","6","7","8","9")

lisas_ordinal_omo_df <- as.data.frame(lisas_ordinal_omo)

res_gifi_omo <- princals(lisas_ordinal_omo_df)
summary(res_gifi_omo)

## 
## Loadings (cutoff = 0.1):
##     Comp1  Comp2 
## Q4M  0.822  0.569
## Q4H  0.594 -0.804
## 
## Importance (Variance Accounted For):
##                  Comp1    Comp2
## Eigenvalues     1.0306   0.9694
## VAF            51.5278  48.4722
## Cumulative VAF 51.5300 100.0000

res_gifi_load$objectscores[,1]

##          1          2          3          4          5          6          7 
## -5.8839479  0.2086914  0.1402580  0.2255299  0.1087897  0.2249436  0.2312451 
##          8          9         10         11         12         13         14 
##  0.1093364  0.2787663 -0.3362803  0.1090623  0.1393453  0.2740368  0.1385547 
##         15         16         17         18         19         20         21 
##  0.2740705  0.1434094  0.2788084  0.1380595  0.2077786  0.2273840  0.1380774 
##         22         23         24         25         26         27         28 
##  0.1103227  0.1102534  0.2224937  0.1425043  0.1085365  0.1415994  0.1087897 
##         29         30         31         32         33         34         35 
##  0.2055745  0.2737642  0.1129029  0.2783173  0.1398726  0.2077786  0.2298609 
##         36 
##  0.2315103

res_gifi_omo$objectscores[,1]

##           1           2           3           4           5           6 
##  0.07535994 -0.99370088  0.07535994  0.07535994  0.07535994 -0.06131973 
##           7           8           9          10          11          12 
## -0.06131973 -3.40137709  0.07535994  2.10908539  2.10908539  0.07535994 
##          13          14          15          16          17          18 
##  0.07535994  0.07535994  0.07535994  0.07535994  2.10908539  0.07535994 
##          19          20          21          22          23          24 
## -0.99370088  0.07535994 -0.06131973 -0.06131973 -0.99370088 -1.13038055 
##          25          26          27          28          29          30 
##  0.07535994  0.07535994 -0.06131973  0.07535994 -1.13038055 -0.99370088 
##          31          32          33          34          35          36 
##  0.07535994  0.07535994  0.07535994  2.10908539  0.07535994  0.07535994

gifi.df <- data.frame(res_gifi_load$objectscores[,1],res_gifi_omo$objectscores[,1]
)

gifi.df <- scale(gifi.df)

# K-meidods

library(cluster)
library(factoextra)
fviz_nbclust(gifi.df, pam, method = "silhouette")+
  theme_classic()

pam.res <- pam(gifi.df, 5)
print(pam.res)

## Medoids:
##    ID res_gifi_load.objectscores...1. res_gifi_omo.objectscores...1.
## 1   1                      -5.8016509                      0.0743059
## 2   2                       0.2057725                     -0.9798023
## 25 25                       0.1405111                      0.0743059
## 8   8                       0.1078071                     -3.3538030
## 34 34                       0.2048725                      2.0795862
## Clustering vector:
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 
##  1  2  3  3  3  3  3  4  3  5  5  3  3  3  3  3  5  3  2  3  3  3  2  2  3  3 
## 27 28 29 30 31 32 33 34 35 36 
##  3  3  2  2  3  3  3  5  3  3 
## Objective function:
##      build       swap 
## 0.07988144 0.07988144 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

gifi.dd <- cbind(gifi.df, cluster = pam.res$cluster)
head(gifi.dd, n = 3)

##   res_gifi_load.objectscores...1. res_gifi_omo.objectscores...1. cluster
## 1                      -5.8016509                      0.0743059       1
## 2                       0.2057725                     -0.9798023       2
## 3                       0.1382963                      0.0743059       3

fviz_cluster(pam.res, 
             palette = "Set1", # color palette
             repel = TRUE, # Avoid label overplotting (slow)
             ggtheme = theme_classic()
             )

res_gifi_load$objectscores[,1]

##          1          2          3          4          5          6          7 
## -5.8839479  0.2086914  0.1402580  0.2255299  0.1087897  0.2249436  0.2312451 
##          8          9         10         11         12         13         14 
##  0.1093364  0.2787663 -0.3362803  0.1090623  0.1393453  0.2740368  0.1385547 
##         15         16         17         18         19         20         21 
##  0.2740705  0.1434094  0.2788084  0.1380595  0.2077786  0.2273840  0.1380774 
##         22         23         24         25         26         27         28 
##  0.1103227  0.1102534  0.2224937  0.1425043  0.1085365  0.1415994  0.1087897 
##         29         30         31         32         33         34         35 
##  0.2055745  0.2737642  0.1129029  0.2783173  0.1398726  0.2077786  0.2298609 
##         36 
##  0.2315103

res_gifi_omo$objectscores[,1]

##           1           2           3           4           5           6 
##  0.07535994 -0.99370088  0.07535994  0.07535994  0.07535994 -0.06131973 
##           7           8           9          10          11          12 
## -0.06131973 -3.40137709  0.07535994  2.10908539  2.10908539  0.07535994 
##          13          14          15          16          17          18 
##  0.07535994  0.07535994  0.07535994  0.07535994  2.10908539  0.07535994 
##          19          20          21          22          23          24 
## -0.99370088  0.07535994 -0.06131973 -0.06131973 -0.99370088 -1.13038055 
##          25          26          27          28          29          30 
##  0.07535994  0.07535994 -0.06131973  0.07535994 -1.13038055 -0.99370088 
##          31          32          33          34          35          36 
##  0.07535994  0.07535994  0.07535994  2.10908539  0.07535994  0.07535994

gifi.df <- data.frame(res_gifi_load$objectscores[,1],res_gifi_omo$objectscores[,1]
)
gifi.df <- gifi.df[-1,]
gifi.df <- gifi.df[-9,]
gifi.df <- scale(gifi.df)

# K-meidods

library(cluster)
library(factoextra)
fviz_nbclust(gifi.df, pam, method = "silhouette")+
  theme_classic()

pam.res <- pam(gifi.df, 4)
print(pam.res)

## Medoids:
##    ID res_gifi_load.objectscores...1. res_gifi_omo.objectscores...1.
## 2   1                       0.4131988                     -0.9527516
## 18 16                      -0.7204861                      0.1431080
## 36 34                       0.7794570                      0.1431080
## 34 32                       0.3985484                      2.2278140
## Clustering vector:
##  2  3  4  5  6  7  8  9 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 
##  1  2  3  2  3  3  1  3  4  2  3  2  3  2  4  2  1  3  2  2  2  1  2  2  2  2 
## 29 30 31 32 33 34 35 36 
##  1  1  2  3  2  4  3  3 
## Objective function:
##     build      swap 
## 0.4520454 0.4270968 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

gifi.dd <- cbind(gifi.df, cluster = pam.res$cluster)
head(gifi.dd, n = 3)

##   res_gifi_load.objectscores...1. res_gifi_omo.objectscores...1. cluster
## 2                       0.4131988                     -0.9527516       1
## 3                      -0.6851985                      0.1431080       2
## 4                       0.6834673                      0.1431080       3

fviz_cluster(pam.res, 
             palette = "Set1", # color palette
             repel = TRUE, # Avoid label overplotting (slow)
             ggtheme = theme_classic()
             )

1 差でやってみると

lisasdata.sa <- read_excel("/Users/riku/Dropbox/zemizemi/lisas/lisas2.xlsx", sheet = "matminus")

#sa.pca <- prcomp(lisasdata.sa, scale = TRUE)
sa.pca2 <- prcomp(lisasdata.sa[2:5], scale = FALSE)

fviz_eig(sa.pca2)

summary(sa.pca2)

## Importance of components:
##                           PC1    PC2     PC3    PC4
## Standard deviation     2.4823 1.1748 0.73368 0.5619
## Proportion of Variance 0.7339 0.1644 0.06411 0.0376
## Cumulative Proportion  0.7339 0.8983 0.96240 1.0000

sa.df <- data.frame(sa.pca2$x[,1],lisasdata.sa$q4z)
sa.df <- scale(sa.df)
# K-meidods

fviz_nbclust(sa.df, pam, method = "silhouette")+
  theme_classic()

pam.res.sa <- pam(sa.df, 6)
print(pam.res.sa)

## Medoids:
##      ID sa.pca2.x...1. lisasdata.sa.q4z
## [1,] 31     -0.4248869        1.0697028
## [2,]  3     -0.6218045       -0.7214275
## [3,]  4      0.3933285        0.1741377
## [4,] 17      0.7301468       -0.7214275
## [5,] 10     -3.5658396       -0.7214275
## [6,] 13      1.7078692        0.1741377
## Clustering vector:
##  [1] 1 1 2 3 3 4 2 2 3 5 4 3 6 2 6 2 4 2 3 3 2 2 1 6 1 3 4 2 3 6 1 2 3 4 3 1
## Objective function:
##     build      swap 
## 0.4801917 0.4621304 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

sa.dd <- cbind(sa.df, cluster = pam.res.sa$cluster)
head(sa.dd, n = 3)

##      sa.pca2.x...1. lisasdata.sa.q4z cluster
## [1,]    -0.55422851        1.0697028       1
## [2,]    -0.03904342        1.9652679       1
## [3,]    -0.62180454       -0.7214275       2

fviz_cluster(pam.res.sa, 
             palette = "Set1", # color palette
             repel = TRUE, # Avoid label overplotting (slow)
             ggtheme = theme_classic()
             )

#青　認知負荷が激しく上がって、面白さも上がる
#赤

2 K-meansでやってみる

3 K-meansでgifi

fviz_nbclust(gifi.df, kmeans, method = "wss") +
    geom_vline(xintercept = 4, linetype = 2)

gifi.km.res <- kmeans(gifi.df, 4, nstart = 25)
print(gifi.km.res)

## K-means clustering with 4 clusters of sizes 3, 1, 15, 15
## 
## Cluster means:
##   res_gifi_load.objectscores...1. res_gifi_omo.objectscores...1.
## 1                       0.2504197                     2.22781404
## 2                      -1.1815102                    -3.42078250
## 3                      -0.8805828                     0.04202949
## 4                       0.9092662                    -0.25954013
## 
## Clustering vector:
##  2  3  4  5  6  7  8  9 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 
##  4  3  4  3  4  4  2  4  1  3  4  3  4  3  1  3  4  4  3  3  3  4  3  3  3  3 
## 29 30 31 32 33 34 35 36 
##  4  4  3  4  3  1  4  4 
## 
## Within cluster sum of squares by cluster:
## [1] 3.744446 0.000000 1.966744 7.043714
##  (between_SS / total_SS =  80.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

fviz_cluster(gifi.km.res, data = gifi.df,
             palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"), 
             ellipse.type = "euclid", # Concentration ellipse
             star.plot = TRUE, # Add segments from centroids to items
             repel = TRUE, # Avoid label overplotting (slow)
             ggtheme = theme_minimal()
             )

## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

k-medoidsと一緒です。

3.1 k-meansで差

fviz_nbclust(sa.df, kmeans, method = "wss") +
    geom_vline(xintercept = 4, linetype = 2)

sa.km.res <- kmeans(sa.df, 4, nstart = 25)
print(sa.km.res)

## K-means clustering with 4 clusters of sizes 10, 11, 14, 1
## 
## Cluster means:
##   sa.pca2.x...1. lisasdata.sa.q4z
## 1     -0.4943161        0.8905898
## 2     -0.2941403       -1.0470875
## 3      0.8388959        0.2381066
## 4     -3.5658396       -0.7214275
## 
## Clustering vector:
##  [1] 1 1 2 3 1 2 2 2 3 4 2 3 3 2 3 1 3 2 3 3 2 2 1 3 1 3 2 2 3 3 1 1 1 3 3 1
## 
## Within cluster sum of squares by cluster:
## [1]  5.149181  7.180704 10.401265  0.000000
##  (between_SS / total_SS =  67.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

fviz_cluster(sa.km.res, data = sa.df,
             palette = c("#2E9FDF", "#00AFBB", "#E7B800", "#FC4E07"), 
             ellipse.type = "euclid", # Concentration ellipse
             star.plot = TRUE, # Add segments from centroids to items
             repel = TRUE, # Avoid label overplotting (slow)
             ggtheme = theme_minimal()
             )

## Too few points to calculate an ellipse

lisasdata.num <- read_excel("/Users/riku/Dropbox/zemizemi/lisas/lisas2.xlsx", sheet = "mat_mh")

num.pca <- prcomp(lisasdata.num[2:7], scale = FALSE)

fviz_eig(num.pca)

summary(num.pca)

## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6
## Standard deviation     3.2427 1.7677 1.17250 1.01060 0.50768 0.41608
## Proportion of Variance 0.6385 0.1898 0.08349 0.06202 0.01565 0.01051
## Cumulative Proportion  0.6385 0.8283 0.91181 0.97383 0.98949 1.00000

num.pca.omo <- prcomp(lisasdata.num[8:9], scale = FALSE)
fviz_eig(num.pca.omo)

summary(num.pca.omo)

## Importance of components:
##                           PC1    PC2
## Standard deviation     1.6082 0.7870
## Proportion of Variance 0.8068 0.1932
## Cumulative Proportion  0.8068 1.0000

num.df <- data.frame(num.pca$x[,1],num.pca.omo$x[,1])
num.df <- scale(num.df)
# K-meidods

fviz_nbclust(num.df, pam, method = "silhouette")+
  theme_classic()

pam.res.sa <- pam(num.df, 3)
print(pam.res.sa)

## Medoids:
##      ID num.pca.x...1. num.pca.omo.x...1.
## [1,] 28    -1.18023753         -0.7847706
## [2,] 27     0.03502907          0.9722008
## [3,]  4     0.66321899         -0.3255470
## Clustering vector:
##  [1] 1 2 3 3 1 2 3 2 3 1 1 2 3 3 3 3 3 2 2 3 2 2 2 2 3 1 2 1 2 2 1 3 2 1 3 3
## Objective function:
##     build      swap 
## 0.8096224 0.6923647 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

num.dd <- cbind(num.df, cluster = pam.res.sa$cluster)
head(num.dd, n = 3)

##      num.pca.x...1. num.pca.omo.x...1. cluster
## [1,]     -3.4020872        -0.74480907       1
## [2,]      0.5929899         0.59290029       2
## [3,]      0.3074664         0.09371509       3

fviz_cluster(pam.res.sa, 
             palette = "Set1", # color palette
             repel = TRUE, # Avoid label overplotting (slow)
             ggtheme = theme_classic()
             )

num.df <- lisasdata[2:9]
# K-meidods

fviz_nbclust(num.df, pam, method = "silhouette")+
  theme_classic()

pam.res.sa <- pam(num.df, 2)
print(pam.res.sa)

## Medoids:
##      ID Q1H Q2H Q3H Q1M Q2M Q3M Q4H Q4M
## [1,] 22   7   7   4   5   6   4   6   6
## [2,]  4   8   8   8   6   6   6   8   7
## Clustering vector:
##  [1] 1 2 2 2 1 2 2 1 2 1 1 2 2 2 2 2 2 1 2 2 1 1 1 1 2 1 1 1 1 2 1 2 2 2 2 2
## Objective function:
##    build     swap 
## 3.592678 3.444757 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"

num.dd <- cbind(num.df, cluster = pam.res.sa$cluster)
head(num.dd, n = 3)

fviz_cluster(pam.res.sa, 
             palette = "Set1", # color palette
             repel = TRUE, # Avoid label overplotting (slow)
             ggtheme = theme_classic()
             )

c1 <- read_excel("/Users/riku/Dropbox/c1.xlsx")
c2 <- read_excel("/Users/riku/Dropbox/c2.xlsx")

c1m <- filter(c1, load == "M")
c1h <- filter(c1, load == "H")

c1.df <- data.frame(M = c1m$score, H = c1h$score , Q = c1m$Q )
c1.df.q1 <- filter(c1.df, Q == "Q1")
c1.df.q2 <- filter(c1.df, Q == "Q2")
c1.df.q3 <- filter(c1.df, Q == "Q3")
c1.df.q4 <- filter(c1.df, Q == "Q4")

ggpaired(c1.df.q1, cond1 = "M", cond2 = "H")

ggpaired(c1.df.q2, cond1 = "M", cond2 = "H")

ggpaired(c1.df.q3, cond1 = "M", cond2 = "H")

ggpaired(c1.df.q4, cond1 = "M", cond2 = "H")

c2m <- filter(c2, load == "M")
c2h <- filter(c2, load == "H")

c2.df <- data.frame(M = c2m$score, H = c2h$score , Q = c2m$Q )
c2.df.q1 <- filter(c2.df, Q == "Q1")
c2.df.q2 <- filter(c2.df, Q == "Q2")
c2.df.q3 <- filter(c2.df, Q == "Q3")
c2.df.q4 <- filter(c2.df, Q == "Q4")

ggpaired(c2.df.q1, cond1 = "M", cond2 = "H")

ggpaired(c2.df.q2, cond1 = "M", cond2 = "H")

ggpaired(c2.df.q3, cond1 = "M", cond2 = "H")

ggpaired(c2.df.q4, cond1 = "M", cond2 = "H")

20201022 ZEMI DRAFT

1 差でやってみると

2 K-meansでやってみる

3 K-meansでgifi

3.1 k-meansで差