## 使用R分析市场调查数据 

# 更改工作空间####
setwd("/Users/daitu/R/市场调查")
getwd()
## [1] "/Users/daitu/R/市场调查"
## 加载包####
library(readxl)
## Warning: package 'readxl' was built under R version 3.3.2
library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(gplots)
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library(ca)
## Warning: package 'ca' was built under R version 3.3.2
library(psych)   # 因子分析包
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## 导入需要的函数
source("Lin_scneed_fun.R")
## 加载R格式的数据表格 ####
# 数据已经经过筛选,选出的数据为答题时间在40~300(s)之间,城市为山西
load("usedata.RData")
# 查看数据
summary(usedata)
##     use_time         sheng      q1           q2      q3a     q3b    
##  Min.   : 43.0   山西   :302   男:146   大一  : 55   0:157   0:241  
##  1st Qu.: 79.0   北京   :  0   女:156   大二  : 97   1:145   1: 61  
##  Median :102.0   重庆   :  0            大三  :142                  
##  Mean   :111.1   福建   :  0            大四  :  7                  
##  3rd Qu.:130.5   广东   :  0            研究生:  1                  
##  Max.   :299.0   海南   :  0                                        
##                  (Other):  0                                        
##  q3c     q3d     q3e               q4             q5             q6     
##  0:246   0:147   0:249   500元以下  : 27   非常满意: 10   0—5次  :252  
##  1: 56   1:155   1: 53   500—900   :128   比较满意: 70   5—10次 : 34  
##                          900—1300元:122   满意    : 47   10—15次: 10  
##                          1300元以上 : 25   一般    :145   15次以上:  6  
##                                            差      : 30                 
##                                                                         
##                                                                         
##  q7a     q7b     q7c     q7d     q7e     q7f            q8     
##  0:233   0:252   0:121   0:196   0:175   0:154   6元以下 : 42  
##  1: 69   1: 50   1:181   1:106   1:127   1:148   6—10元 :219  
##                                                  10—15元: 38  
##                                                  15-20元:  1  
##                                                  20元以上:  2  
##                                                                
##                                                                
##    q9zhiliang      q9anquan        q9stime         q9jiage     
##  Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:6.00   1st Qu.:6.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :8.00   Median :8.000   Median :7.000   Median :7.000  
##  Mean   :7.07   Mean   :6.997   Mean   :6.447   Mean   :6.205  
##  3rd Qu.:8.00   3rd Qu.:8.000   3rd Qu.:8.000   3rd Qu.:8.000  
##  Max.   :8.00   Max.   :8.000   Max.   :8.000   Max.   :8.000  
##                                                                
##   q9baozhuang       q9kouwei      q9xiaoliang        q9kefu     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:5.000   1st Qu.:6.000   1st Qu.:5.000   1st Qu.:6.000  
##  Median :6.000   Median :7.000   Median :6.000   Median :7.000  
##  Mean   :5.748   Mean   :6.599   Mean   :5.791   Mean   :6.517  
##  3rd Qu.:7.000   3rd Qu.:8.000   3rd Qu.:7.750   3rd Qu.:8.000  
##  Max.   :8.000   Max.   :8.000   Max.   :8.000   Max.   :8.000  
##                                                                 
##                  q10           q10a1                q10a2    
##  使用过            :186   未填    :  0   跳过          :116  
##  听说过,但未使用过:101   跳过    :116   30—60分钟    : 95  
##  没听说过          : 15   空      :  0   10—30分钟    : 70  
##                           0—5次  :161   60—90分钟    : 10  
##                           5—10次 : 20   10分钟内      :  7  
##                           10—15次:  3   一个半小时以上:  2  
##                           15次以上:  2   (Other)       :  2  
##     q10a3_1          q10a3_2          q10a3_3           q10a3_4       
##  Min.   :-3.000   Min.   :-3.000   Min.   :-3.0000   Min.   :-3.0000  
##  1st Qu.:-3.000   1st Qu.:-3.000   1st Qu.:-3.0000   1st Qu.:-3.0000  
##  Median : 3.000   Median : 3.000   Median : 3.0000   Median : 3.0000  
##  Mean   : 1.007   Mean   : 1.219   Mean   : 0.9735   Mean   : 0.9702  
##  3rd Qu.: 4.000   3rd Qu.: 4.000   3rd Qu.: 4.0000   3rd Qu.: 4.0000  
##  Max.   : 5.000   Max.   : 5.000   Max.   : 5.0000   Max.   : 5.0000  
##                                                                       
##     q10a3_5           q10a3_6          q10a4_1          q10a4_2      
##  Min.   :-3.0000   Min.   :-3.000   Min.   :-3.000   Min.   :-3.000  
##  1st Qu.:-3.0000   1st Qu.:-3.000   1st Qu.:-3.000   1st Qu.:-3.000  
##  Median : 2.0000   Median : 3.000   Median : 3.000   Median : 3.000  
##  Mean   : 0.8874   Mean   : 0.957   Mean   : 1.199   Mean   : 1.079  
##  3rd Qu.: 4.0000   3rd Qu.: 4.000   3rd Qu.: 4.000   3rd Qu.: 4.000  
##  Max.   : 5.0000   Max.   : 5.000   Max.   : 5.000   Max.   : 5.000  
##                                                                      
##     q10a4_3          q10a4_4       q10a5text        
##  Min.   :-3.000   Min.   :-3.00   Length:302        
##  1st Qu.:-3.000   1st Qu.:-3.00   Class :character  
##  Median : 3.000   Median : 3.00   Mode  :character  
##  Mean   : 1.126   Mean   : 1.03                     
##  3rd Qu.: 4.000   3rd Qu.: 4.00                     
##  Max.   : 5.000   Max.   : 5.00                     
##                                                     
##                 q10b1        score          
##  未填              :  0   Length:302        
##  跳过              :201   Class :character  
##  空                :  0   Mode  :character  
##  我在使用美团      : 24                     
##  我有喜欢的外卖电话: 16                     
##  其他              : 61                     
## 
## 单选题的频数统计和分析,数据可视化可以使用饼图等####
## 4 您每个月的生活费大概是:( )
# A 500元以下   B500—900元    C900—1300元    D1300元以上
q4t <- table(usedata$q4)
q4t
## 
##   500元以下    500—900 900—1300元  1300元以上 
##          27         128         122          25
q4dt = data.frame(q4t)
myLabel = as.vector(q4dt$Var1)   ## 转成向量,否则图例的标签可能与实际顺序不一致
myLabel = paste(myLabel, "(", round(q4dt$Freq / sum(q4dt$Freq) * 100, 2), "%)", sep = "")   ## 用 round() 对结果保留两位小数

p4 = ggplot(q4dt, aes(x = "", y =Freq, fill = Var1)) + 
  geom_bar(stat = "identity", width = 2) +    
  theme_grey(base_family = "STKaiti") +
  coord_polar(theta = "y") + labs(x = "", y = "", title = "生活费") +  ## 将标签设为空
  theme(axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5 )) + 
  theme(legend.title = element_blank(), legend.position = "bottom") +  ## 将图例标题设为空,并把土方放在上
  scale_fill_discrete(breaks = q4dt$Var1,labels = myLabel)  ## 将原来的图例标签换成现在的myLabel
p4

## 中空饼图 
p4 = ggplot(q4dt, aes(x = "", y =Freq, fill = Var1)) + 
  geom_bar(stat = "identity", width = 1) +   
  theme_minimal(base_family = "STKaiti") +
  annotate("text",x = 0,y = 0,label =  "") +
  coord_polar(theta = "y") + labs(x = "", y = "", title = "生活费") +  
  theme(axis.title.x = element_blank(),axis.title.y = element_blank(),
        axis.ticks=element_blank(),panel.grid=element_blank(),
        axis.text = element_blank(),plot.title = element_text(hjust = 0.5 )) + 
  theme(legend.title = element_blank(), legend.position = "right") +  
  scale_fill_discrete(breaks = q4dt$Var1,labels = myLabel)  
p4

## 频数条形图
## 2您的年级:( )   A大一   B大二    C大三     D大四     E研究生
q2t <- table(usedata$q2)
q2tdf = data.frame(q2t)
p2 <- ggplot(q2tdf,aes(x = reorder(Var1,Freq),y = Freq)) +
  theme_bw(base_family = "STKaiti") + 
  geom_bar(stat = "identity",fill = "lightblue") +
  coord_flip() +
  labs(x = "年级",title = "接受调查的人员分布",
       y = "频数")+
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_y_continuous(labels = function(x) paste(x , "人",sep = "")) +
  geom_text(aes(x = reorder(Var1,Freq),y = Freq+5,
                label = Freq))

p2

##==========================================================
## 多选题的简单频数分析 ####
## 7在您从未接触过外卖APP的情况下,以下何种因素会促使您使用(多选):( )
# A知名度   B广告宣传    C有折扣    D 快餐食品店种类多    E速度快
# F好友推荐
## 1:求多选题每个选项频率的函数####
my_fre <- function(x) {
  x <- as.numeric(x)
  return(sum(x)/length(x))
}
q7dt <- data.frame(question = c("知名度","广告宣传","有折扣 ",
                                "快餐食品店种类多","速度快","好友推荐"),
                   Fre=apply(usedata[,13:18], 2, my_fre))
q7dt
##             question       Fre
## q7a           知名度 0.2284768
## q7b         广告宣传 0.1655629
## q7c          有折扣  0.5993377
## q7d 快餐食品店种类多 0.3509934
## q7e           速度快 0.4205298
## q7f         好友推荐 0.4900662
## 构建条形图查看数据
p7 <- ggplot(q7dt,mapping = aes(x=question,y=Fre)) + 
  geom_bar(stat = 'identity',fill = 'steelblue', colour = 'darkred',width=0.7) +
  theme_grey(base_family = "STKaiti") + labs(x="",y="频率",title="为何使用APP") +
  theme(axis.title.y=element_text(),plot.title = element_text(hjust = 0.5)) +
  theme(axis.text.x=element_text(size=10,angle = 30),axis.text.y=element_text(size=10))

p7

##==========================================================
## 对量表题进行分析####
# 9如果您订购外卖,请您为以下因素打分(满分10分,越重要分越高):

## 使用单因素方差分析进行分析 --------
q9dt <- usedata[,20:27]

A <- factor(rep(1:dim(q9dt)[2],each=dim(q9dt)[1]),
            labels = c("质量","安全","送餐时间","价格","快餐包装","餐厅口味","餐厅的销量","客服态度"))
length(A)
## [1] 2416
q9dt <- unlist(q9dt)
head(q9dt)
## q9zhiliang1 q9zhiliang2 q9zhiliang3 q9zhiliang4 q9zhiliang5 q9zhiliang6 
##           8           8           6           7           8           8
## 单因素方差分析
q9data <- data.frame(q9dt,A)
a <- aov(q9dt~A,q9data)
anova(a)
## Analysis of Variance Table
## 
## Response: q9dt
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## A            7  510.1  72.869  22.539 < 2.2e-16 ***
## Residuals 2408 7785.1   3.233                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 方差分析结果具有差异 

## 打分的箱线图
xtext = c("质量","安全","送餐时间","价格","快餐包装","餐厅口味","餐厅的销量","客服态度")
p9 <- ggplot(q9data,aes(x=A,y=q9dt)) + theme_grey(base_family = "STKaiti") +
  geom_boxplot(fill = "white", colour = "#3366FF",outlier.colour = "red", outlier.shape = 1) +
  labs(x = "因素",y = "得分",title = "影响订购外卖的因素") +
  theme(axis.text.x=element_text(,angle = 30),axis.text.y=element_text()) +
  theme(plot.title = element_text(hjust = 0.5)) +
  scale_x_discrete(labels = xtext)  # 设置x轴的标签

p9

## 各总体均值的可视化
# 计算每个因数的平均的分
q9me <- apply(usedata[,20:27], 2,mean)
q9me
##  q9zhiliang    q9anquan     q9stime     q9jiage q9baozhuang    q9kouwei 
##    7.069536    6.996689    6.447020    6.205298    5.748344    6.599338 
## q9xiaoliang      q9kefu 
##    5.791391    6.516556
par(family='STKaiti',cex.axis = 0.75)
plotmeans(q9dt~A,q9data,xlab = "因素",ylab = "得分",col = "red",
          main = "“影响订购外卖的因素")
text(1:length(q9me),q9me-0.1,labels = round(q9me,3))

## 进行因子分析:如果您订购外卖,请您为以下因素打分(满分8分,越重要分越高)
q9yinzi <- usedata[,20:27]
## 判断需提取的公共因子数 碎石图
fa.parallel(q9yinzi,fa="fa")

## Parallel analysis suggests that the number of factors =  3  and the number of components =  NA
## 提取2个公共因子
q9yinzi_fa <- fa(cov(q9yinzi),nfactors=2)
## Loading required namespace: GPArotation
q9yinzi_fa
## Factor Analysis using method =  minres
## Call: fa(r = cov(q9yinzi), nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
##               MR1   MR2   h2   u2 com
## q9zhiliang   0.99 -0.08 0.90 0.10 1.0
## q9anquan     0.96 -0.08 0.84 0.16 1.0
## q9stime      0.72  0.11 0.62 0.38 1.0
## q9jiage      0.54  0.33 0.60 0.40 1.7
## q9baozhuang  0.27  0.53 0.52 0.48 1.5
## q9kouwei     0.61  0.27 0.63 0.37 1.4
## q9xiaoliang -0.03  0.88 0.75 0.25 1.0
## q9kefu       0.56  0.35 0.65 0.35 1.7
## 
##                        MR1  MR2
## SS loadings           3.79 1.72
## Proportion Var        0.47 0.21
## Cumulative Var        0.47 0.69
## Proportion Explained  0.69 0.31
## Cumulative Proportion 0.69 1.00
## 
##  With factor correlations of 
##      MR1  MR2
## MR1 1.00 0.57
## MR2 0.57 1.00
## 
## Mean item complexity =  1.3
## Test of the hypothesis that 2 factors are sufficient.
## 
## The degrees of freedom for the null model are  28  and the objective function was  5.95
## The degrees of freedom for the model are 13  and the objective function was  0.21 
## 
## The root mean square of the residuals (RMSR) is  0.03 
## The df corrected root mean square of the residuals is  0.05 
## 
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1  MR2
## Correlation of (regression) scores with factors   0.98 0.92
## Multiple R square of scores with factors          0.95 0.84
## Minimum correlation of possible factor scores     0.90 0.68
## 
##  Factor scores estimated using the  regression  method  have correlations of 
##                                                  MR1   MR2
## Correlation of scores with factors              0.59  0.50
## Multiple R square of scores with factors        0.34  0.25
## Minimum correlation of possible factor scores  -0.31 -0.50
##  列连表分析 对应分析 ####
# 4 您每个月的生活费大概是:( )yu 6您每周平均定外卖的次数?:( )
t46 <- table(usedata$q4,usedata$q6)
t46
##              
##               0—5次 5—10次 10—15次 15次以上
##   500元以下       24       1        0        2
##   500—900       116       9        3        0
##   900—1300元     93      21        6        2
##   1300元以上      19       3        1        2
# 卡方检验  有显著性关系
chisq.test(t46)
## Warning in chisq.test(t46): Chi-squared approximation may be incorrect
## 
##  Pearson's Chi-squared test
## 
## data:  t46
## X-squared = 22.472, df = 9, p-value = 0.007497
## 对应分析
df46 <- tab2df(usedata$q4,usedata$q6)
df46ca <- ca(df46)
df46ca
## 
##  Principal inertias (eigenvalues):
##            1        2        3       
## Value      0.037846 0.036262 0.000303
## Percentage 50.86%   48.73%   0.41%   
## 
## 
##  Rows:
##         900—1300元  500—900 1300元以上 500元以下
## Mass       0.089404  0.423841   0.403974  0.082781
## ChiDist    0.485032  0.211532   0.214798  0.436535
## Inertia    0.021033  0.018965   0.018639  0.015775
## Dim. 1     2.255129 -0.563764  -0.337629  2.098563
## Dim. 2     1.070890  0.949244  -1.073143 -0.779755
## 
## 
##  Columns:
##            0—5次  10—15次  15次以上   5—10次
## Mass     0.834437  0.112583  0.033113  0.019868
## ChiDist  0.084842  0.461934  0.473594  1.363842
## Inertia  0.006006  0.024023  0.007427  0.036955
## Dim. 1  -0.057122 -0.546273 -0.831959  6.881256
## Dim. 2   0.441702 -2.357133 -2.295323 -1.368879
# 查看卡方检验值
summary(df46ca)
## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.037846  50.9  50.9  *************            
##  2      0.036262  48.7  99.6  ************             
##  3      0.000303   0.4 100.0                           
##         -------- -----                                 
##  Total: 0.074411 100.0                                 
## 
## 
## Rows:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 | 9001 |   89  995  283 |  439 818 455 |  204 177 103 |
## 2 | 5009 |  424  999  255 | -110 269 135 |  181 730 382 |
## 3 | 1300 |  404  999  250 |  -66  94  46 | -204 905 465 |
## 4 |  500 |   83  990  212 |  408 875 365 | -148 116  50 |
## 
## Columns:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |   05 |  834 1000   81 |  -11  17   3 |   84 983 163 |
## 2 | 1015 |  113  997  323 | -106  53  34 | -449 944 626 |
## 3 |   15 |   33  969  100 | -162 117  23 | -437 852 174 |
## 4 |  510 |   20 1000  497 | 1339 963 941 | -261  37  37 |
## 对应分析图
par(family = "STKaiti",mfrow = c(1,1))
plot.ca(df46ca,main = "生活费VS每周平均定外卖的次数")
plot(df46ca,main = "生活费VS每周平均定外卖的次数")