頤珍公司資料個案讀書會
第一周
1.人格特質BIG FIVE問卷 + BIGFIVE_doula.xlsx + BIGFIVE_customer.xlsx 2.BIG FIVE 問卷對應 + Big5_item.csv 3.月嫂個人基本資料 + doula_BasicInfo.xlsx
五大人格特質介紹: + Surgency 外向性 + Agreeableness 親和性 + Adjustment 適應障礙 + Conscientiousness 盡責性 + Openness_to_Experience 開放性
基本參數設定
## [1] ""
讀入基本資料
d = dir("../data","xlsx",full=T) # 讀入特定路徑下所有.xlsx的檔案
c5 = read_xlsx(d[1]) # 消費者
m5 = read_xlsx(d[2]) # 月嫂
sat = read_xlsx(d[3]) # 基本資料
item = read_xlsx(d[4]) # 基本資料
item = item[,c(1,4)]
names(c5)[1:2] = names(m5)[1:2] = names(sat)[1:2] = c("time","name")
names(c5)[3:27] = names(m5)[3:27] = item$category# 有基本資料可是沒有填問卷的月嫂(找出在sat可是不在m5裡面的name)
#a = setdiff(sat$name, m5$name);
#which(a %in% c5$name) # 確認這些人也不在客戶名單
# 把重覆填表單的人篩掉
c5_a = c5[!duplicated(c5$name),]
m5_a = m5[!duplicated(m5$name),]# 把數字作scale(避免分數偏高或分數偏低)
c5_x = c5_a[,3:27] %>% t %>% scale(T,F) %>% t %>% data.frame()
m5_x = m5_a[,3:27] %>% t %>% scale(T,F) %>% t %>% data.frame()
#c5_x = t(apply(c5_a[,3:27], 1, function(x)(x-min(x))/(max(x)-min(x))))
#m5_x = t(apply(m5_a[,3:27], 1, function(x)(x-min(x))/(max(x)-min(x))))
# 依據不同的column name取平均數
prefix <- unique(unlist(strsplit(names(c5[3:27]), "\\_[0-9]"))) # colnames有哪些
c5_rM = sapply(prefix, function(i)
rowMeans(c5_x[,grepl(i, names(c5_x))]))
m5_rM = sapply(prefix, function(i)
rowMeans(m5_x[,grepl(i, names(m5_x))])) ## c5_kmg
## 1 2 3 4 5
## 10 13 14 7 15
# 另一種畫法
# c5_pca <- princomp(c5_rM) #
fviz_pca_biplot(
pca, col.var="black", labelsize=3,
axes=c(1,2),
col.ind=c5_kmg, alpha.ind=0.6, pointshape=16,
addEllipses = T, ellipse.level = 0.65,
mean.point = FALSE) +
theme(legend.position = "none")## m5_kmg
## 1 2 3 4 5
## 21 23 26 35 27
fviz_pca_biplot(
pca, col.var="black", labelsize=3,
axes=c(1,2),
col.ind=m5_kmg, alpha.ind=0.6, pointshape=16,
addEllipses = T, ellipse.level = 0.65,
mean.point = FALSE) +
theme(legend.position = "none")看一下各主成份的比例 loadings:the matrix of variable loadings (columns are eigenvectors)
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 0.7336369 0.6483998 0.4852933 0.33087691 3.332001e-08
## Proportion of Variance 0.4128635 0.3225001 0.1806562 0.08398023 8.516367e-16
## Cumulative Proportion 0.4128635 0.7353636 0.9160198 1.00000000 1.000000e+00
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4
## Surgency 0.41726151 0.60276630 0.47229816 0.19874622
## Agreeableness 0.65616413 -0.50225957 -0.33710369 -0.05954045
## Adjustment -0.38886935 -0.51012036 0.61468914 -0.10351378
## Conscientiousness -0.41232613 0.06286730 -0.42125644 0.66975957
## Openness_to_Experience -0.27223016 0.34674634 -0.32862717 -0.70545156
## Comp.5
## Surgency 0.44721360
## Agreeableness 0.44721360
## Adjustment 0.44721360
## Conscientiousness 0.44721360
## Openness_to_Experience 0.44721360
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## SS loadings 1.0 1.0 1.0 1.0 1.0
## Proportion Var 0.2 0.2 0.2 0.2 0.2
## Cumulative Var 0.2 0.4 0.6 0.8 1.0
## Importance of components:
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## Standard deviation 0.7288911 0.6176855 0.5052674 0.33092539 0
## Proportion of Variance 0.4158361 0.2986287 0.1998202 0.08571503 0
## Cumulative Proportion 0.4158361 0.7144648 0.9142850 1.00000000 1
##
## Loadings:
## Comp.1 Comp.2 Comp.3 Comp.4
## Surgency 0.73619841 0.45631418 0.22312395 0.00223143
## Agreeableness 0.30383180 -0.77556909 -0.31178171 -0.09471530
## Adjustment -0.37184036 -0.21560330 0.76996261 0.14969148
## Conscientiousness -0.37988821 0.28677486 -0.22216762 -0.72393831
## Openness_to_Experience -0.28830164 0.24808336 -0.45913723 0.66673069
## Comp.5
## Surgency 0.44721360
## Agreeableness 0.44721360
## Adjustment 0.44721360
## Conscientiousness 0.44721360
## Openness_to_Experience 0.44721360
##
## Comp.1 Comp.2 Comp.3 Comp.4 Comp.5
## SS loadings 1.0 1.0 1.0 1.0 1.0
## Proportion Var 0.2 0.2 0.2 0.2 0.2
## Cumulative Var 0.2 0.4 0.6 0.8 1.0