lin_shichangana

## 市场调查大赛数据分析,数据详细分析####
# 大学生对饿了么的使用情况
# 组名：晓
# @ 带土

# 更改工作空间####
setwd("/Users/daitu/R/市场调查")
getwd()

## [1] "/Users/daitu/R/市场调查"

## 加载包####
library(readxl)
library(stringr)
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 3.3.2

library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(gplots)

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

library(ca)
library(psych)   # 因子分析包

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

## 导入需要的函数
source("Lin_scneed_fun.R")
## 加载R格式的数据表格 ####
# 数据已经经过筛选，选出的数据为答题时间在40～300（s）之间，城市为山西
load("usedata.RData")
# 查看数据
typeof(usedata)

## [1] "list"

str(usedata)

## Classes 'tbl_df' and 'data.frame':   302 obs. of  43 variables:
##  $ use_time   : num  49 79 143 135 120 100 131 112 76 121 ...
##  $ sheng      : Factor w/ 24 levels "北京","重庆",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ q1         : Factor w/ 2 levels "男","女": 1 1 2 2 1 2 2 2 1 1 ...
##  $ q2         : Factor w/ 5 levels "大一","大二",..: 3 3 3 3 2 2 2 3 2 3 ...
##  $ q3a        : Factor w/ 2 levels "0","1": 2 2 2 2 1 2 1 2 1 1 ...
##  $ q3b        : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 1 2 1 ...
##  $ q3c        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 1 1 1 ...
##  $ q3d        : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 1 2 1 ...
##  $ q3e        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 2 1 2 ...
##  $ q4         : Factor w/ 4 levels "500元以下","500—900",..: 3 2 3 3 3 2 3 2 4 3 ...
##  $ q5         : Factor w/ 5 levels "非常满意","比较满意",..: 4 3 4 4 5 4 4 4 4 4 ...
##  $ q6         : Factor w/ 4 levels "0—5次","5—10次",..: 1 1 1 1 3 1 1 1 1 1 ...
##  $ q7a        : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
##  $ q7b        : Factor w/ 2 levels "0","1": 1 1 1 2 1 1 1 2 1 1 ...
##  $ q7c        : Factor w/ 2 levels "0","1": 2 2 1 1 2 2 2 2 1 2 ...
##  $ q7d        : Factor w/ 2 levels "0","1": 1 2 2 2 1 2 2 1 2 1 ...
##  $ q7e        : Factor w/ 2 levels "0","1": 2 2 1 1 1 2 2 1 1 2 ...
##  $ q7f        : Factor w/ 2 levels "0","1": 1 2 2 1 1 1 1 1 2 2 ...
##  $ q8         : Factor w/ 5 levels "6元以下","6—10元",..: 2 2 2 2 2 2 1 2 3 2 ...
##  $ q9zhiliang : num  8 8 6 7 8 8 8 6 8 8 ...
##  $ q9anquan   : num  7 8 6 7 8 8 8 6 8 8 ...
##  $ q9stime    : num  8 8 6 7 8 8 6 6 8 6 ...
##  $ q9jiage    : num  7 7 6 5 5 7 8 6 8 6 ...
##  $ q9baozhuang: num  7 6 6 5 1 6 4 6 8 4 ...
##  $ q9kouwei   : num  6 7 7 6 8 5 5 6 8 4 ...
##  $ q9xiaoliang: num  7 7 7 4 4 6 5 5 8 4 ...
##  $ q9kefu     : num  6 5 7 6 8 5 5 7 8 4 ...
##  $ q10        : Factor w/ 3 levels "使用过","听说过，但未使用过",..: 2 1 1 1 1 2 2 1 2 1 ...
##  $ q10a1      : Factor w/ 7 levels "未填","跳过",..: 2 4 4 4 6 2 2 4 2 5 ...
##  $ q10a2      : Factor w/ 9 levels "未填","跳过",..: 2 6 6 6 6 2 2 5 2 4 ...
##  $ q10a3_1    : num  -3 4 4 4 5 -3 -3 3 -3 3 ...
##  $ q10a3_2    : num  -3 5 4 4 5 -3 -3 4 -3 4 ...
##  $ q10a3_3    : num  -3 4 4 4 5 -3 -3 4 -3 4 ...
##  $ q10a3_4    : num  -3 4 4 4 5 -3 -3 4 -3 4 ...
##  $ q10a3_5    : num  -3 3 4 3 5 -3 -3 4 -3 4 ...
##  $ q10a3_6    : num  -3 4 4 4 5 -3 -3 4 -3 3 ...
##  $ q10a4_1    : num  -3 5 4 4 2 -3 -3 4 -3 4 ...
##  $ q10a4_2    : num  -3 4 4 3 2 -3 -3 4 -3 4 ...
##  $ q10a4_3    : num  -3 5 4 5 2 -3 -3 3 -3 4 ...
##  $ q10a4_4    : num  -3 4 5 3 2 -3 -3 3 -3 4 ...
##  $ q10a5text  : chr  "(跳过)" "送餐时间尽量再短点！" "(空)" "(空)" ...
##  $ q10b1      : Factor w/ 6 levels "未填","跳过",..: 6 2 2 2 2 5 4 2 6 2 ...
##  $ score      : chr  "56.000000" "98.000000" "92.000000" "85.000000" ...

summary(usedata)

##     use_time         sheng      q1           q2      q3a     q3b    
##  Min.   : 43.0   山西   :302   男:146   大一  : 55   0:157   0:241  
##  1st Qu.: 79.0   北京   :  0   女:156   大二  : 97   1:145   1: 61  
##  Median :102.0   重庆   :  0            大三  :142                  
##  Mean   :111.1   福建   :  0            大四  :  7                  
##  3rd Qu.:130.5   广东   :  0            研究生:  1                  
##  Max.   :299.0   海南   :  0                                        
##                  (Other):  0                                        
##  q3c     q3d     q3e               q4             q5             q6     
##  0:246   0:147   0:249   500元以下  : 27   非常满意: 10   0—5次  :252  
##  1: 56   1:155   1: 53   500—900   :128   比较满意: 70   5—10次 : 34  
##                          900—1300元:122   满意    : 47   10—15次: 10  
##                          1300元以上 : 25   一般    :145   15次以上:  6  
##                                            差      : 30                 
##                                                                         
##                                                                         
##  q7a     q7b     q7c     q7d     q7e     q7f            q8     
##  0:233   0:252   0:121   0:196   0:175   0:154   6元以下 : 42  
##  1: 69   1: 50   1:181   1:106   1:127   1:148   6—10元 :219  
##                                                  10—15元: 38  
##                                                  15－20元:  1  
##                                                  20元以上:  2  
##                                                                
##                                                                
##    q9zhiliang      q9anquan        q9stime         q9jiage     
##  Min.   :1.00   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:6.00   1st Qu.:6.000   1st Qu.:5.000   1st Qu.:5.000  
##  Median :8.00   Median :8.000   Median :7.000   Median :7.000  
##  Mean   :7.07   Mean   :6.997   Mean   :6.447   Mean   :6.205  
##  3rd Qu.:8.00   3rd Qu.:8.000   3rd Qu.:8.000   3rd Qu.:8.000  
##  Max.   :8.00   Max.   :8.000   Max.   :8.000   Max.   :8.000  
##                                                                
##   q9baozhuang       q9kouwei      q9xiaoliang        q9kefu     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:5.000   1st Qu.:6.000   1st Qu.:5.000   1st Qu.:6.000  
##  Median :6.000   Median :7.000   Median :6.000   Median :7.000  
##  Mean   :5.748   Mean   :6.599   Mean   :5.791   Mean   :6.517  
##  3rd Qu.:7.000   3rd Qu.:8.000   3rd Qu.:7.750   3rd Qu.:8.000  
##  Max.   :8.000   Max.   :8.000   Max.   :8.000   Max.   :8.000  
##                                                                 
##                  q10           q10a1                q10a2    
##  使用过            :186   未填    :  0   跳过          :116  
##  听说过，但未使用过:101   跳过    :116   30—60分钟    : 95  
##  没听说过          : 15   空      :  0   10—30分钟    : 70  
##                           0—5次  :161   60—90分钟    : 10  
##                           5—10次 : 20   10分钟内      :  7  
##                           10—15次:  3   一个半小时以上:  2  
##                           15次以上:  2   (Other)       :  2  
##     q10a3_1          q10a3_2          q10a3_3           q10a3_4       
##  Min.   :-3.000   Min.   :-3.000   Min.   :-3.0000   Min.   :-3.0000  
##  1st Qu.:-3.000   1st Qu.:-3.000   1st Qu.:-3.0000   1st Qu.:-3.0000  
##  Median : 3.000   Median : 3.000   Median : 3.0000   Median : 3.0000  
##  Mean   : 1.007   Mean   : 1.219   Mean   : 0.9735   Mean   : 0.9702  
##  3rd Qu.: 4.000   3rd Qu.: 4.000   3rd Qu.: 4.0000   3rd Qu.: 4.0000  
##  Max.   : 5.000   Max.   : 5.000   Max.   : 5.0000   Max.   : 5.0000  
##                                                                       
##     q10a3_5           q10a3_6          q10a4_1          q10a4_2      
##  Min.   :-3.0000   Min.   :-3.000   Min.   :-3.000   Min.   :-3.000  
##  1st Qu.:-3.0000   1st Qu.:-3.000   1st Qu.:-3.000   1st Qu.:-3.000  
##  Median : 2.0000   Median : 3.000   Median : 3.000   Median : 3.000  
##  Mean   : 0.8874   Mean   : 0.957   Mean   : 1.199   Mean   : 1.079  
##  3rd Qu.: 4.0000   3rd Qu.: 4.000   3rd Qu.: 4.000   3rd Qu.: 4.000  
##  Max.   : 5.0000   Max.   : 5.000   Max.   : 5.000   Max.   : 5.000  
##                                                                      
##     q10a4_3          q10a4_4       q10a5text        
##  Min.   :-3.000   Min.   :-3.00   Length:302        
##  1st Qu.:-3.000   1st Qu.:-3.00   Class :character  
##  Median : 3.000   Median : 3.00   Mode  :character  
##  Mean   : 1.126   Mean   : 1.03                     
##  3rd Qu.: 4.000   3rd Qu.: 4.00                     
##  Max.   : 5.000   Max.   : 5.00                     
##                                                     
##                 q10b1        score          
##  未填              :  0   Length:302        
##  跳过              :201   Class :character  
##  空                :  0   Mode  :character  
##  我在使用美团      : 24                     
##  我有喜欢的外卖电话: 16                     
##  其他              : 61                     
##

## 关于答题时间的分析 40~300(s)
## 直方图
# par(family='STKaiti',bty="o")
# timedata <- hist(usedata$use_time,breaks = 15,
#                  col = "blue", probability = F, border = "pink",
#                  main = "填写问卷使用时间",xlab = "时间(s)", ylab = "频数",
#                  ylim = c(0,length(usedata$use_time)/4),
#                  xlim = c(min(usedata$use_time),max(usedata$use_time)))
# text(timedata$mids,timedata$counts+2,timedata$counts)
# lines(density(usedata$use_time,bw = 1),col="red",lwd=2)
# 
# density(usedata$use_time,bw = 1)

## 单选题的频数统计和分析 ####
## 1 您的性别：（ ）   A男                   B女
q1t <- table(usedata$q1)
par(family='STKaiti',bty="o")
pie(q1t,radius = 0.9,col = rainbow(length(q1t)),clockwise = TRUE,main = "填写问卷的情况")

barplot(q1t,col= "blue",main = "填写问卷的情况",ylim = c(0,max(q1t)+10))

q1dt <- data.frame(table(usedata$q1))
mylabel <- as.vector(q1dt$Var1)
mylabel <- paste(mylabel,"(",round(q1dt$Freq / sum(q1dt$Freq) * 100,2),"%)",sep = "")
p1 = ggplot(q1dt,aes(x="",y=Freq,fill=Var1)) + 
  geom_bar(stat = "identity", width = 1) + 
  theme_grey(base_family = "STKaiti") + 
  coord_polar(theta = "y") + labs(x = "", y = "", title = "男女比例") +
  theme(axis.ticks = element_blank()) +
  theme(legend.title = element_blank(), legend.position = "right") +
  scale_fill_discrete(breaks = q1dt$Var1,labels = mylabel) 
p1

# bmp(filename ="男女饼图.bmp")
# par(family='STKaiti',bty="o")
# pie(q1t,radius = 0.9,col = rainbow(length(q1t)),clockwise = TRUE,main = "填写问卷的情况")
# dev.off()

# tiff(filename = "男女饼图.tiff",res = 600,width = 2400, height = 2400,compression = "lzw")
# par(family='STKaiti',bty="o")
# pie(q1t,radius = 0.9,col = rainbow(length(q1t)),clockwise = TRUE,main = "填写问卷的情况")
# dev.off()

## 2您的年级：（ ）   A大一   B大二    C大三     D大四     E研究生
q2t <- table(usedata$q2)
table(usedata$q2) / length(usedata$q2)

## 
##        大一        大二        大三        大四      研究生 
## 0.182119205 0.321192053 0.470198675 0.023178808 0.003311258

par(family='STKaiti',bty="o")
a <- barplot(q2t,horiz = TRUE,main = "年级情况",xlim=c(0,max(q2t)),col = "red",
             xlab = "频数",ylab = "年级")
box()

## 4 您每个月的生活费大概是：（ ）
# A 500元以下   B500—900元    C900—1300元    D1300元以上
q4t <- table(usedata$q4)
q4t

## 
##   500元以下    500—900 900—1300元  1300元以上 
##          27         128         122          25

q4dt = data.frame(q4t)
myLabel = as.vector(q4dt$Var1)   ## 转成向量，否则图例的标签可能与实际顺序不一致
myLabel = paste(myLabel, "(", round(q4dt$Freq / sum(q4dt$Freq) * 100, 2), "%)", sep = "")   ## 用 round() 对结果保留两位小数

p4 = ggplot(q4dt, aes(x = "", y =Freq, fill = Var1)) + 
  geom_bar(stat = "identity", width = 1) +    ## width >= 1 时中心的杂点将消失
  theme_grey(base_family = "STKaiti") +
  coord_polar(theta = "y") + labs(x = "", y = "", title = "生活费") +  ## 将标签设为空
  theme(axis.ticks = element_blank()) + ## 把左上角多出来的“小胡子”去掉
  theme(legend.title = element_blank(), legend.position = "bottom") +  ## 将图例标题设为空，并把土方放在上
  scale_fill_discrete(breaks = q4dt$Var1,labels = myLabel)  ## 将原来的图例标签换成现在的myLabel
p4

## 5 您对食堂饭菜的满意度：（ ）
# A非常满意    B比较满意    C满意    D一般    E差
q5t <- table(usedata$q5)
q5dt <- data.frame(q5t)
# biaoqian
myLabel = as.vector(q5dt$Var1)
myLabel = paste(myLabel,"(",round(q5dt$Freq / sum(q5dt$Freq)*100,2),"%)",sep = "")
p5 <- ggplot(q5dt,aes(x="",y = Freq, fill = Var1)) +
  geom_bar(stat = "identity",width = 1) +
  theme_grey(base_family = "STKaiti") +    # 设置字体为楷体
  coord_polar(theta = "y") +labs(x="",y="",title = "对食堂饭菜的满意度") +
  theme(axis.ticks = element_blank()) + 
  theme(legend.title = element_blank(),legend.position = "bottom") + 
  scale_fill_discrete(breaks = q5dt$Var1,labels = myLabel)
p5

## 6您每周平均定外卖的次数？：（ ）
# A 0—5次     B 5—10次    C 10—15次    D 15次以上
q6t <- table(usedata$q6)
q6dt <- data.frame(q6t)
# biaoqian
myLabel = as.vector(q6dt$Var1)
myLabel = paste(myLabel,"(",round(q6dt$Freq / sum(q6dt$Freq)*100,2),"%)",sep = "")
p6 <- ggplot(q6dt,aes(x="",y = Freq, fill = Var1)) +
  geom_bar(stat = "identity",width = 1) +
  theme_grey(base_family = "STKaiti") +    # 设置字体为楷体
  coord_polar(theta = "y") +labs(x="",y="",title = "平均每周定外卖的次数") +
  theme(axis.ticks = element_blank()) + 
  theme(legend.title = element_blank(),legend.position = "bottom") + 
  scale_fill_discrete(breaks = q6dt$Var1,labels = myLabel)
p6

## 8如果您选择外卖，那您对快餐理想的价格是？：（ ）
# A 6元以下   B 6—10元  C10—15元  D 15－20元  E 20元以上
q8t <- table(usedata$q8)
q8dt <- data.frame(q8t)
# biaoqian
myLabel = as.vector(q8dt$Var1)
myLabel = paste(myLabel,"(",round(q8dt$Freq / sum(q8dt$Freq)*100,2),"%)",sep = "")
p8 <- ggplot(q8dt,aes(x="",y = Freq, fill = Var1)) +
  geom_bar(stat = "identity",width = 1) +
  theme_grey(base_family = "STKaiti") +    # 设置字体为楷体
  coord_polar(theta = "y") +labs(x="",y="",title = "对快餐理想的价格") +
  theme(axis.ticks = element_blank()) + 
  theme(legend.title = element_blank(),legend.position = "right") + 
  scale_fill_discrete(breaks = q8dt$Var1,labels = myLabel)
p8

## 





## 10您是否有使用过或者听说过“饿了么”外卖？：（ ）
# A使用过      B听说过，但未使用过     C没听说过（终止调查）
q10t <- table(usedata$q10)
q10dt <- data.frame(q10t)
# biaoqian
myLabel = as.vector(q10dt$Var1)
myLabel = paste(myLabel,"(",round(q10dt$Freq / sum(q10dt$Freq)*100,2),"%)",sep = "")
p10 <- ggplot(q10dt,aes(x="",y = Freq, fill = Var1)) +
  geom_bar(stat = "identity",width = 1) +
  theme_grey(base_family = "STKaiti") +    # 设置字体为楷体
  coord_polar(theta = "y") +labs(x="",y="",title = "“饿了么”使用情况") +
  theme(axis.ticks = element_blank()) + 
  theme(legend.title = element_blank(),legend.position = "right") + 
  scale_fill_discrete(breaks = q10dt$Var1,labels = myLabel)
p10

## 多选题的简单频数分析 ####
# 3 您认为你属于下列哪一种类型的人呢？(多选)：（ ）
# A 宅男宅女   B 运动达人   C美食家   D 单身汪    E 恋爱中


q3dt <- data.frame(question = c("宅男宅女","运动达人","美食家","单身汪","恋爱中"),
                  Fre=apply(usedata[,5:9], 2, my_fre))
q3dt

##     question       Fre
## q3a 宅男宅女 0.4801325
## q3b 运动达人 0.2019868
## q3c   美食家 0.1854305
## q3d   单身汪 0.5132450
## q3e   恋爱中 0.1754967

## 构建条形图查看数据
# pdf(file="fig13.pdf",pointsize=16)
p3 <- ggplot(q3dt,mapping = aes(x=question,y=Fre)) + 
  geom_bar(stat = 'identity',fill = 'steelblue', colour = 'darkred',width=0.7) +
  theme_grey(base_family = "STKaiti") + labs(x="",y="频率",title="生活状态") +
  theme(axis.title.x =element_text(size=14), axis.title.y=element_text(size=14)) +
  theme(axis.text.x=element_text(size=14),axis.text.y=element_text(size=12))
# dev.off()
p3

## 7在您从未接触过外卖APP的情况下，以下何种因素会促使您使用（多选）：（ ）
# A知名度   B广告宣传    C有折扣    D 快餐食品店种类多    E速度快
# F好友推荐
q7dt <- data.frame(question = c("知名度","广告宣传","有折扣 ",
                                "快餐食品店种类多","速度快","好友推荐"),
                   Fre=apply(usedata[,13:18], 2, my_fre))
q7dt

##             question       Fre
## q7a           知名度 0.2284768
## q7b         广告宣传 0.1655629
## q7c          有折扣  0.5993377
## q7d 快餐食品店种类多 0.3509934
## q7e           速度快 0.4205298
## q7f         好友推荐 0.4900662

## 构建条形图查看数据
p7 <- ggplot(q7dt,mapping = aes(x=question,y=Fre)) + 
  geom_bar(stat = 'identity',fill = 'steelblue', colour = 'darkred',width=0.7) +
  theme_grey(base_family = "STKaiti") + labs(x="",y="频率",title="为何使用APP") +
  theme(axis.title.y=element_text(size=14)) +
  theme(axis.text.x=element_text(size=12,angle = 30),axis.text.y=element_text(size=12))

p7

## 对量表题进行分析####
# 9如果您订购外卖，请您为以下因素打分（满分10分，越重要分越高）：

## 使用单因素方差分析进行分析 －－－－－－－－
# length(q9data)
q9dt <- usedata[,20:27]
str(usedata$q9kefu)

##  num [1:302] 6 5 7 6 8 5 5 7 8 4 ...

# mean(as.matrix(usedata[1:302,20:27]))
# 计算每个因数的平均的分
q9me <- apply(as.matrix(usedata[1:dim(q9dt)[1],20:27]), 2,mean)
q9me

##  q9zhiliang    q9anquan     q9stime     q9jiage q9baozhuang    q9kouwei 
##    7.069536    6.996689    6.447020    6.205298    5.748344    6.599338 
## q9xiaoliang      q9kefu 
##    5.791391    6.516556

# 定义因子变量代表各个因素
A <- factor(rep(1:dim(q9dt)[2],each=dim(q9dt)[1]),
            labels = c("质量","安全","送餐时间","价格","快餐包装","餐厅口味","餐厅的销量","客服态度"))
str(A)

##  Factor w/ 8 levels "质量","安全",..: 1 1 1 1 1 1 1 1 1 1 ...

length(A)

## [1] 2416

q9dt <- unlist(q9dt)
head(q9dt)

## q9zhiliang1 q9zhiliang2 q9zhiliang3 q9zhiliang4 q9zhiliang5 q9zhiliang6 
##           8           8           6           7           8           8

# q9data <- data.frame(q9dt)
# head(q9data)
## 单因素方差分析
q9data <- data.frame(q9dt,A)
head(q9data)

##             q9dt    A
## q9zhiliang1    8 质量
## q9zhiliang2    8 质量
## q9zhiliang3    6 质量
## q9zhiliang4    7 质量
## q9zhiliang5    8 质量
## q9zhiliang6    8 质量

a <- aov(q9dt~A,q9data)
anova(a)

## Analysis of Variance Table
## 
## Response: q9dt
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## A            7  510.1  72.869  22.539 < 2.2e-16 ***
## Residuals 2408 7785.1   3.233                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 各总体均值的可视化
par(family='STKaiti',cex.axis = 0.7)
plotmeans(q9dt~A,q9data,xlab = "影响因素",ylab = "得分",col = "red",
          main = "影响订购外卖的因素",ylim = c(5,7.5))
text(1:8,q9me-0.1,labels = round(q9me,3))

q9dt <- usedata[,20:27]

A <- factor(rep(1:dim(q9dt)[2],each=dim(q9dt)[1]))
length(A)

## [1] 2416

q9dt <- unlist(q9dt)
head(q9dt)

## q9zhiliang1 q9zhiliang2 q9zhiliang3 q9zhiliang4 q9zhiliang5 q9zhiliang6 
##           8           8           6           7           8           8

# q9data <- data.frame(q9dt)
# head(q9data)
## 单因素方差分析
q9data <- data.frame(q9dt,A)
a <- aov(q9dt~A,q9data)
anova(a)

## Analysis of Variance Table
## 
## Response: q9dt
##             Df Sum Sq Mean Sq F value    Pr(>F)    
## A            7  510.1  72.869  22.539 < 2.2e-16 ***
## Residuals 2408 7785.1   3.233                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 箱线图

# boxplot(usedata[,20:27])
xtext = c("质量","安全","送餐时间","价格","快餐包装","餐厅口味","餐厅的销量","客服态度")
p9 <- ggplot(q9data,aes(x=A,y=q9dt)) + theme_grey(base_family = "STKaiti") +
  geom_boxplot(fill = "white", colour = "#3366FF",outlier.colour = "red", outlier.shape = 1) +
  labs(x = "因素",y = "得分",title = "影响订购外卖的因素") +
  theme(axis.title.x =element_text(size=14), axis.title.y=element_text(size=14)) +
  theme(axis.text.x=element_text(size=12,angle = 30),axis.text.y=element_text(size=12)) +
  scale_x_discrete(labels = xtext)  # 设置x轴的标签

p9

## 进行因子分析：如果您订购外卖，请您为以下因素打分（满分8分，越重要分越高）
q9yinzi <- usedata[,20:27]
q9yinzi

## # A tibble: 302 × 8
##    q9zhiliang q9anquan q9stime q9jiage q9baozhuang q9kouwei q9xiaoliang
##         <dbl>    <dbl>   <dbl>   <dbl>       <dbl>    <dbl>       <dbl>
## 1           8        7       8       7           7        6           7
## 2           8        8       8       7           6        7           7
## 3           6        6       6       6           6        7           7
## 4           7        7       7       5           5        6           4
## 5           8        8       8       5           1        8           4
## 6           8        8       8       7           6        5           6
## 7           8        8       6       8           4        5           5
## 8           6        6       6       6           6        6           5
## 9           8        8       8       8           8        8           8
## 10          8        8       6       6           4        4           4
## # ... with 292 more rows, and 1 more variables: q9kefu <dbl>

# factanal(q9duiying,factors = 2,scores = "regression")
## 判断需提取的公共因子数 碎石图
fa.parallel(q9yinzi)

## Parallel analysis suggests that the number of factors =  3  and the number of components =  1

## 提取公共因子
q9yinzi_fa <- fa(cov(q9yinzi),nfactors=2)

## Loading required namespace: GPArotation

##
factor.plot(q9yinzi_fa)

## 提取使用过的数据，进行后续分析####

# levels(usedata$q10) <- 1:3
## 使用过的用户
elmdataA <- usedata[which(usedata$q10 == "使用过"),]
dim(elmdataA)

## [1] 186  43

## 听说过但未使用过的用户数据
elmdataB <- usedata[which(usedata$q10 == "听说过，但未使用过"),]
dim(elmdataB)

## [1] 101  43

## 没有使用过的用户
elmdataC <- usedata[which(usedata$q10 == "没听说过"),]
dim(elmdataC)

## [1] 15 43

## 为何不使用   饿了么   ####
q10b1data <- elmdataB$q10b1
q10b1data <- factor(q10b1data,labels = unique(q10b1data))
q10b1dt <- as.data.frame(table(q10b1data))
dim(q10b1dt)

## [1] 3 2

# q10a1dt <- q10a1dt[which(q10a1dt$Freq != 0),]

q10b1dt = q10b1dt[order(q10b1dt$Freq, decreasing = TRUE),] # 使饼图排序
q10b1dt

##            q10b1data Freq
## 3       我在使用美团   61
## 1               其他   24
## 2 我有喜欢的外卖电话   16

## 饼图
mylabel <- as.vector(q10b1dt$q10b1data)
mylabel <- paste(mylabel,"(",round(q10b1dt$Freq / sum(q10b1dt$Freq) * 100,2),"%)",sep = "")
p10b1 = ggplot(q10b1dt,aes(x="",y=Freq,fill=q10b1data)) +
  geom_bar(stat = "identity", width = 1) +
  theme_grey(base_family = "STKaiti") +
  coord_polar(theta = "y") + labs(x = "", y = "", title = "为何不使用\"饿了么\"") +
  theme(axis.ticks = element_blank()) +
  theme(legend.title = element_blank(), legend.position = "top") +
  scale_fill_discrete(breaks = q10b1dt$q10b1data,labels = mylabel) +
  theme(axis.text.x = element_blank()) 
  ## 白色的外框即是原柱状图的X轴，把X轴的刻度文字去掉即可
p10b1

## 保存所需要的数据 ####

save(elmdataA,file = "elmdataA.RData")
save(elmdataB,file = "elmdataB.RData")







##  列连表分析 对应分析 ####
# 4 您每个月的生活费大概是：（ ）yu 6您每周平均定外卖的次数？：（ ）
t46 <- table(usedata$q4,usedata$q6)
t46

##              
##               0—5次 5—10次 10—15次 15次以上
##   500元以下       24       1        0        2
##   500—900       116       9        3        0
##   900—1300元     93      21        6        2
##   1300元以上      19       3        1        2

# 卡方检验  有显著性关系
chisq.test(t46)

## Warning in chisq.test(t46): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  t46
## X-squared = 22.472, df = 9, p-value = 0.007497

## 对应分析
df46 <- tab2df(usedata$q4,usedata$q6)
df46ca <- ca(df46)
df46ca

## 
##  Principal inertias (eigenvalues):
##            1        2        3       
## Value      0.037846 0.036262 0.000303
## Percentage 50.86%   48.73%   0.41%   
## 
## 
##  Rows:
##         900—1300元  500—900 1300元以上 500元以下
## Mass       0.089404  0.423841   0.403974  0.082781
## ChiDist    0.485032  0.211532   0.214798  0.436535
## Inertia    0.021033  0.018965   0.018639  0.015775
## Dim. 1     2.255129 -0.563764  -0.337629  2.098563
## Dim. 2     1.070890  0.949244  -1.073143 -0.779755
## 
## 
##  Columns:
##            0—5次  10—15次  15次以上   5—10次
## Mass     0.834437  0.112583  0.033113  0.019868
## ChiDist  0.084842  0.461934  0.473594  1.363842
## Inertia  0.006006  0.024023  0.007427  0.036955
## Dim. 1  -0.057122 -0.546273 -0.831959  6.881256
## Dim. 2   0.441702 -2.357133 -2.295323 -1.368879

# 查看卡方检验值
summary(df46ca)

## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.037846  50.9  50.9  *************            
##  2      0.036262  48.7  99.6  ************             
##  3      0.000303   0.4 100.0                           
##         -------- -----                                 
##  Total: 0.074411 100.0                                 
## 
## 
## Rows:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 | 9001 |   89  995  283 |  439 818 455 |  204 177 103 |
## 2 | 5009 |  424  999  255 | -110 269 135 |  181 730 382 |
## 3 | 1300 |  404  999  250 |  -66  94  46 | -204 905 465 |
## 4 |  500 |   83  990  212 |  408 875 365 | -148 116  50 |
## 
## Columns:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |   05 |  834 1000   81 |  -11  17   3 |   84 983 163 |
## 2 | 1015 |  113  997  323 | -106  53  34 | -449 944 626 |
## 3 |   15 |   33  969  100 | -162 117  23 | -437 852 174 |
## 4 |  510 |   20 1000  497 | 1339 963 941 | -261  37  37 |

## 对应分析图
par(family = "STKaiti",mfrow = c(1,1))
plot.ca(df46ca,main = "生活费VS每周平均定外卖的次数")
plot(df46ca,main = "生活费VS每周平均定外卖的次数")

# xtabs(~q4+q6,data = usedata)



## 5 您对食堂饭菜的满意度：（ ）yu  6 您每周平均定外卖的次数？：（ ）
t56 <- table(usedata$q5,usedata$q6)
t56

##           
##            0—5次 5—10次 10—15次 15次以上
##   非常满意      9       0        1        0
##   比较满意     63       5        1        1
##   满意         36       7        3        1
##   一般        122      18        3        2
##   差           22       4        2        2

# 卡方检验  有显著性关系
chisq.test(t56)

## Warning in chisq.test(t56): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  t56
## X-squared = 13.095, df = 12, p-value = 0.3622

## 对应分析
df56 <- tab2df(usedata$q5,usedata$q6)
df56ca <- ca(df56)
df56ca

## 
##  Principal inertias (eigenvalues):
##            1      2        3       
## Value      0.0263 0.010987 0.006072
## Percentage 60.66% 25.34%   14%     
## 
## 
##  Rows:
##             一般      满意       差  比较满意  非常满意
## Mass    0.033113  0.231788 0.155629  0.480132  0.099338
## ChiDist 0.522221  0.180188 0.214368  0.088101  0.400392
## Inertia 0.009030  0.007526 0.007152  0.003727  0.015925
## Dim. 1  0.230311 -0.959275 1.105482 -0.385952  2.295050
## Dim. 2  4.939594  0.444342 0.102153 -0.537430 -0.245790
## 
## 
##  Columns:
##            0—5次  10—15次 15次以上   5—10次
## Mass     0.834437  0.112583 0.033113  0.019868
## ChiDist  0.064615  0.297900 0.716628  0.805149
## Inertia  0.003484  0.009991 0.017005  0.012879
## Dim. 1  -0.370932  0.938547 3.711894  4.074228
## Dim. 2   0.195105 -2.166247 3.421658 -1.621767

# 查看卡方检验值
summary(df56ca)

## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.026300  60.7  60.7  ***************          
##  2      0.010987  25.3  86.0  ******                   
##  3      0.006072  14.0 100.0  ****                     
##         -------- -----                                 
##  Total: 0.043360 100.0                                 
## 
## 
## Rows:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |      |   33  988  208 |   37   5   2 |  518 983 808 |
## 2 |      |  232  812  174 | -156 745 213 |   47  67  46 |
## 3 |      |  156  702  165 |  179 699 190 |   11   2   2 |
## 4 |      |  480  914   86 |  -63 505  72 |  -56 409 139 |
## 5 |      |   99  868  367 |  372 864 523 |  -26   4   6 |
## 
## Columns:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |   05 |  834  967   80 |  -60 867 115 |   20 100  32 |
## 2 | 1015 |  113  842  230 |  152 261  99 | -227 581 528 |
## 3 |   15 |   33  956  392 |  602 706 456 |  359 250 388 |
## 4 |  510 |   20  718  297 |  661 673 330 | -170  45  52 |

## 对应分析图
par(family = "STKaiti",mfrow = c(1,1))
plot.ca(df56ca,main = "对食堂饭菜的满意度VS每周平均定外卖的次数")

## 2您的年级：（ ）yu 6您每周平均定外卖的次数？：（ ）
## 1:去掉研究生的数据 ＃＃
useda1234 <- usedata[which(usedata$q2 != "研究生"),]
useda1234$q2 <- factor(useda1234$q2,labels = c("大一","大二","大三","大四"))
str(useda1234$q2)

##  Factor w/ 4 levels "大一","大二",..: 3 3 3 3 2 2 2 3 2 3 ...

t26 <- table(useda1234$q2,useda1234$q6)
t26

##       
##        0—5次 5—10次 10—15次 15次以上
##   大一     41      12        1        1
##   大二     82      11        2        2
##   大三    123      11        6        2
##   大四      6       0        1        0

# 卡方检验  有显著性关系
chisq.test(t26)

## Warning in chisq.test(t26): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  t26
## X-squared = 12.465, df = 9, p-value = 0.1884

## 对应分析
df26 <- tab2df(useda1234$q2,useda1234$q6)
df26ca <- ca(df26)
df26ca

## 
##  Principal inertias (eigenvalues):
##            1       2        3      
## Value      0.03503 0.006283 9.8e-05
## Percentage 84.59%  15.17%   0.24%  
## 
## 
##  Rows:
##              大三      大二      大四     大一
## Mass     0.182724  0.322259  0.471761 0.023256
## ChiDist  0.339170  0.076352  0.122453 0.701311
## Inertia  0.021020  0.001879  0.007074 0.011438
## Dim. 1  -1.745152 -0.191519  0.651189 3.155986
## Dim. 2   1.152163 -0.837940 -0.108227 4.754208
## 
## 
##  Columns:
##            0—5次  10—15次 15次以上   5—10次
## Mass     0.837209  0.112957 0.033223  0.016611
## ChiDist  0.052953  0.477914 0.609877  0.233578
## Inertia  0.002348  0.025800 0.012357  0.000906
## Dim. 1   0.249683 -2.496319 2.636696 -0.882451
## Dim. 2  -0.313353  1.268333 4.517980 -1.867611

# 查看卡方检验值
summary(df26ca)

## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.035030  84.6  84.6  *********************    
##  2      0.006283  15.2  99.8  ****                     
##  3      9.8e-050   0.2 100.0                           
##         -------- -----                                 
##  Total: 0.041411 100.0                                 
## 
## 
## Rows:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 | 大三 |  183 1000  508 | -327 927 556 |   91  73 243 |
## 2 | 大二 |  322  977   45 |  -36 220  12 |  -66 757 226 |
## 3 | 大四 |  472  996  171 |  122 991 200 |   -9   5   6 |
## 4 | 大一 |   23  998  276 |  591 709 232 |  377 289 526 |
## 
## Columns:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |   05 |  837  999   57 |   47 779  52 |  -25 220  82 |
## 2 | 1015 |  113 1000  623 | -467 956 704 |  101  44 182 |
## 3 |   15 |   33 1000  298 |  493 655 231 |  358 345 678 |
## 4 |  510 |   17  902   22 | -165 500  13 | -148 402  58 |

## 对应分析图
par(family = "STKaiti",mfrow = c(1,1))
plot.ca(df26ca,main = "年级VS每周平均定外卖的次数")

## 8如果您选择外卖，那您对快餐理想的价格是？：（ ）yu 6您每周平均定外卖的次数？：（ ）
t86 <- table(usedata$q8,usedata$q6)
t86

##           
##            0—5次 5—10次 10—15次 15次以上
##   6元以下      38       4        0        0
##   6—10元     186      23        7        3
##   10—15元     27       6        3        2
##   15－20元      0       1        0        0
##   20元以上      1       0        0        1

# 卡方检验  没有显著性关系
chisq.test(t86)

## Warning in chisq.test(t86): Chi-squared approximation may be incorrect

## 
##  Pearson's Chi-squared test
## 
## data:  t86
## X-squared = 40.688, df = 12, p-value = 5.524e-05

## 对应分析
df86 <- tab2df(usedata$q8,usedata$q6)
df86ca <- ca(df86)
df86ca

## 
##  Principal inertias (eigenvalues):
##            1       2        3       
## Value      0.09097 0.032868 0.010891
## Percentage 67.52%  24.4%    8.08%   
## 
## 
##  Rows:
##           6—10元   6元以下  10—15元   15－20元  20元以上
## Mass     0.139073  0.725166  0.125828   0.003311  0.006623
## ChiDist  0.248151  0.052233  0.392574   2.807553  3.447164
## Inertia  0.008564  0.001978  0.019392   0.026101  0.078695
## Dim. 1  -0.558713 -0.152348  0.907864  -0.139359 11.235360
## Dim. 2   0.620925  0.133987 -1.212938 -14.568751  2.619214
## 
## 
##  Columns:
##            0—5次  10—15次  15次以上  5—10次
## Mass     0.834437  0.112583  0.033113 0.019868
## ChiDist  0.091477  0.489192  0.625277 2.102911
## Inertia  0.006983  0.026942  0.012946 0.087859
## Dim. 1  -0.181829 -0.042032  0.549432 6.959282
## Dim. 2   0.402449 -2.641265 -1.489772 0.547260

# 查看卡方检验值
summary(df86ca)

## 
## Principal inertias (eigenvalues):
## 
##  dim    value      %   cum%   scree plot               
##  1      0.090970  67.5  67.5  *****************        
##  2      0.032868  24.4  91.9  ******                   
##  3      0.010891   8.1 100.0  **                       
##         -------- -----                                 
##  Total: 0.134730 100.0                                 
## 
## 
## Rows:
##     name   mass  qlt  inr    k=1 cor ctr     k=2 cor ctr  
## 1 |  610 |  139  667   64 | -169 461  43 |   113 206  54 |
## 2 |    6 |  725  990   15 |  -46 774  17 |    24 216  13 |
## 3 | 1015 |  126  800  144 |  274 487 104 |  -220 314 185 |
## 4 | 1520 |    3  885  194 |  -42   0   0 | -2641 885 703 |
## 5 |   20 |    7  985  584 | 3389 966 836 |   475  19  45 |
## 
## Columns:
##     name   mass  qlt  inr    k=1 cor ctr    k=2 cor ctr  
## 1 |   05 |  834  996   52 |  -55 359  28 |   73 636 135 |
## 2 | 1015 |  113  959  200 |  -13   1   0 | -479 958 785 |
## 3 |   15 |   33  257   96 |  166  70  10 | -270 187  73 |
## 4 |  510 |   20  999  652 | 2099 996 962 |   99   2   6 |

## 对应分析图
par(family = "STKaiti",mfrow = c(1,1))
plot.ca(df86ca,main = "对外卖理想的价格VS每周平均定外卖的次数")

lin_shichangana_2.R

daitu

Wed Dec 21 09:20:46 2016