## 使用R分析市场调查数据
# 更改工作空间####
setwd("/Users/daitu/R/市场调查")
getwd()
## [1] "/Users/daitu/R/市场调查"
## 加载包####
library(readxl)
## Warning: package 'readxl' was built under R version 3.3.2
library(stringr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(plotly)
## Warning: package 'plotly' was built under R version 3.3.2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(gplots)
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
library(ca)
## Warning: package 'ca' was built under R version 3.3.2
library(psych) # 因子分析包
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## 导入需要的函数
source("Lin_scneed_fun.R")
## 加载R格式的数据表格 ####
# 数据已经经过筛选,选出的数据为答题时间在40~300(s)之间,城市为山西
load("usedata.RData")
# 查看数据
summary(usedata)
## use_time sheng q1 q2 q3a q3b
## Min. : 43.0 山西 :302 男:146 大一 : 55 0:157 0:241
## 1st Qu.: 79.0 北京 : 0 女:156 大二 : 97 1:145 1: 61
## Median :102.0 重庆 : 0 大三 :142
## Mean :111.1 福建 : 0 大四 : 7
## 3rd Qu.:130.5 广东 : 0 研究生: 1
## Max. :299.0 海南 : 0
## (Other): 0
## q3c q3d q3e q4 q5 q6
## 0:246 0:147 0:249 500元以下 : 27 非常满意: 10 0—5次 :252
## 1: 56 1:155 1: 53 500—900 :128 比较满意: 70 5—10次 : 34
## 900—1300元:122 满意 : 47 10—15次: 10
## 1300元以上 : 25 一般 :145 15次以上: 6
## 差 : 30
##
##
## q7a q7b q7c q7d q7e q7f q8
## 0:233 0:252 0:121 0:196 0:175 0:154 6元以下 : 42
## 1: 69 1: 50 1:181 1:106 1:127 1:148 6—10元 :219
## 10—15元: 38
## 15-20元: 1
## 20元以上: 2
##
##
## q9zhiliang q9anquan q9stime q9jiage
## Min. :1.00 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:6.00 1st Qu.:6.000 1st Qu.:5.000 1st Qu.:5.000
## Median :8.00 Median :8.000 Median :7.000 Median :7.000
## Mean :7.07 Mean :6.997 Mean :6.447 Mean :6.205
## 3rd Qu.:8.00 3rd Qu.:8.000 3rd Qu.:8.000 3rd Qu.:8.000
## Max. :8.00 Max. :8.000 Max. :8.000 Max. :8.000
##
## q9baozhuang q9kouwei q9xiaoliang q9kefu
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:5.000 1st Qu.:6.000 1st Qu.:5.000 1st Qu.:6.000
## Median :6.000 Median :7.000 Median :6.000 Median :7.000
## Mean :5.748 Mean :6.599 Mean :5.791 Mean :6.517
## 3rd Qu.:7.000 3rd Qu.:8.000 3rd Qu.:7.750 3rd Qu.:8.000
## Max. :8.000 Max. :8.000 Max. :8.000 Max. :8.000
##
## q10 q10a1 q10a2
## 使用过 :186 未填 : 0 跳过 :116
## 听说过,但未使用过:101 跳过 :116 30—60分钟 : 95
## 没听说过 : 15 空 : 0 10—30分钟 : 70
## 0—5次 :161 60—90分钟 : 10
## 5—10次 : 20 10分钟内 : 7
## 10—15次: 3 一个半小时以上: 2
## 15次以上: 2 (Other) : 2
## q10a3_1 q10a3_2 q10a3_3 q10a3_4
## Min. :-3.000 Min. :-3.000 Min. :-3.0000 Min. :-3.0000
## 1st Qu.:-3.000 1st Qu.:-3.000 1st Qu.:-3.0000 1st Qu.:-3.0000
## Median : 3.000 Median : 3.000 Median : 3.0000 Median : 3.0000
## Mean : 1.007 Mean : 1.219 Mean : 0.9735 Mean : 0.9702
## 3rd Qu.: 4.000 3rd Qu.: 4.000 3rd Qu.: 4.0000 3rd Qu.: 4.0000
## Max. : 5.000 Max. : 5.000 Max. : 5.0000 Max. : 5.0000
##
## q10a3_5 q10a3_6 q10a4_1 q10a4_2
## Min. :-3.0000 Min. :-3.000 Min. :-3.000 Min. :-3.000
## 1st Qu.:-3.0000 1st Qu.:-3.000 1st Qu.:-3.000 1st Qu.:-3.000
## Median : 2.0000 Median : 3.000 Median : 3.000 Median : 3.000
## Mean : 0.8874 Mean : 0.957 Mean : 1.199 Mean : 1.079
## 3rd Qu.: 4.0000 3rd Qu.: 4.000 3rd Qu.: 4.000 3rd Qu.: 4.000
## Max. : 5.0000 Max. : 5.000 Max. : 5.000 Max. : 5.000
##
## q10a4_3 q10a4_4 q10a5text
## Min. :-3.000 Min. :-3.00 Length:302
## 1st Qu.:-3.000 1st Qu.:-3.00 Class :character
## Median : 3.000 Median : 3.00 Mode :character
## Mean : 1.126 Mean : 1.03
## 3rd Qu.: 4.000 3rd Qu.: 4.00
## Max. : 5.000 Max. : 5.00
##
## q10b1 score
## 未填 : 0 Length:302
## 跳过 :201 Class :character
## 空 : 0 Mode :character
## 我在使用美团 : 24
## 我有喜欢的外卖电话: 16
## 其他 : 61
##
## 单选题的频数统计和分析,数据可视化可以使用饼图等####
## 4 您每个月的生活费大概是:( )
# A 500元以下 B500—900元 C900—1300元 D1300元以上
q4t <- table(usedata$q4)
q4t
##
## 500元以下 500—900 900—1300元 1300元以上
## 27 128 122 25
q4dt = data.frame(q4t)
myLabel = as.vector(q4dt$Var1) ## 转成向量,否则图例的标签可能与实际顺序不一致
myLabel = paste(myLabel, "(", round(q4dt$Freq / sum(q4dt$Freq) * 100, 2), "%)", sep = "") ## 用 round() 对结果保留两位小数
p4 = ggplot(q4dt, aes(x = "", y =Freq, fill = Var1)) +
geom_bar(stat = "identity", width = 2) +
theme_grey(base_family = "STKaiti") +
coord_polar(theta = "y") + labs(x = "", y = "", title = "生活费") + ## 将标签设为空
theme(axis.ticks = element_blank(),plot.title = element_text(hjust = 0.5 )) +
theme(legend.title = element_blank(), legend.position = "bottom") + ## 将图例标题设为空,并把土方放在上
scale_fill_discrete(breaks = q4dt$Var1,labels = myLabel) ## 将原来的图例标签换成现在的myLabel
p4

## 中空饼图
p4 = ggplot(q4dt, aes(x = "", y =Freq, fill = Var1)) +
geom_bar(stat = "identity", width = 1) +
theme_minimal(base_family = "STKaiti") +
annotate("text",x = 0,y = 0,label = "") +
coord_polar(theta = "y") + labs(x = "", y = "", title = "生活费") +
theme(axis.title.x = element_blank(),axis.title.y = element_blank(),
axis.ticks=element_blank(),panel.grid=element_blank(),
axis.text = element_blank(),plot.title = element_text(hjust = 0.5 )) +
theme(legend.title = element_blank(), legend.position = "right") +
scale_fill_discrete(breaks = q4dt$Var1,labels = myLabel)
p4

## 频数条形图
## 2您的年级:( ) A大一 B大二 C大三 D大四 E研究生
q2t <- table(usedata$q2)
q2tdf = data.frame(q2t)
p2 <- ggplot(q2tdf,aes(x = reorder(Var1,Freq),y = Freq)) +
theme_bw(base_family = "STKaiti") +
geom_bar(stat = "identity",fill = "lightblue") +
coord_flip() +
labs(x = "年级",title = "接受调查的人员分布",
y = "频数")+
theme(plot.title = element_text(hjust = 0.5)) +
scale_y_continuous(labels = function(x) paste(x , "人",sep = "")) +
geom_text(aes(x = reorder(Var1,Freq),y = Freq+5,
label = Freq))
p2

##==========================================================
## 多选题的简单频数分析 ####
## 7在您从未接触过外卖APP的情况下,以下何种因素会促使您使用(多选):( )
# A知名度 B广告宣传 C有折扣 D 快餐食品店种类多 E速度快
# F好友推荐
## 1:求多选题每个选项频率的函数####
my_fre <- function(x) {
x <- as.numeric(x)
return(sum(x)/length(x))
}
q7dt <- data.frame(question = c("知名度","广告宣传","有折扣 ",
"快餐食品店种类多","速度快","好友推荐"),
Fre=apply(usedata[,13:18], 2, my_fre))
q7dt
## question Fre
## q7a 知名度 0.2284768
## q7b 广告宣传 0.1655629
## q7c 有折扣 0.5993377
## q7d 快餐食品店种类多 0.3509934
## q7e 速度快 0.4205298
## q7f 好友推荐 0.4900662
## 构建条形图查看数据
p7 <- ggplot(q7dt,mapping = aes(x=question,y=Fre)) +
geom_bar(stat = 'identity',fill = 'steelblue', colour = 'darkred',width=0.7) +
theme_grey(base_family = "STKaiti") + labs(x="",y="频率",title="为何使用APP") +
theme(axis.title.y=element_text(),plot.title = element_text(hjust = 0.5)) +
theme(axis.text.x=element_text(size=10,angle = 30),axis.text.y=element_text(size=10))
p7

##==========================================================
## 对量表题进行分析####
# 9如果您订购外卖,请您为以下因素打分(满分10分,越重要分越高):
## 使用单因素方差分析进行分析 --------
q9dt <- usedata[,20:27]
A <- factor(rep(1:dim(q9dt)[2],each=dim(q9dt)[1]),
labels = c("质量","安全","送餐时间","价格","快餐包装","餐厅口味","餐厅的销量","客服态度"))
length(A)
## [1] 2416
q9dt <- unlist(q9dt)
head(q9dt)
## q9zhiliang1 q9zhiliang2 q9zhiliang3 q9zhiliang4 q9zhiliang5 q9zhiliang6
## 8 8 6 7 8 8
## 单因素方差分析
q9data <- data.frame(q9dt,A)
a <- aov(q9dt~A,q9data)
anova(a)
## Analysis of Variance Table
##
## Response: q9dt
## Df Sum Sq Mean Sq F value Pr(>F)
## A 7 510.1 72.869 22.539 < 2.2e-16 ***
## Residuals 2408 7785.1 3.233
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 方差分析结果具有差异
## 打分的箱线图
xtext = c("质量","安全","送餐时间","价格","快餐包装","餐厅口味","餐厅的销量","客服态度")
p9 <- ggplot(q9data,aes(x=A,y=q9dt)) + theme_grey(base_family = "STKaiti") +
geom_boxplot(fill = "white", colour = "#3366FF",outlier.colour = "red", outlier.shape = 1) +
labs(x = "因素",y = "得分",title = "影响订购外卖的因素") +
theme(axis.text.x=element_text(,angle = 30),axis.text.y=element_text()) +
theme(plot.title = element_text(hjust = 0.5)) +
scale_x_discrete(labels = xtext) # 设置x轴的标签
p9

## 各总体均值的可视化
# 计算每个因数的平均的分
q9me <- apply(usedata[,20:27], 2,mean)
q9me
## q9zhiliang q9anquan q9stime q9jiage q9baozhuang q9kouwei
## 7.069536 6.996689 6.447020 6.205298 5.748344 6.599338
## q9xiaoliang q9kefu
## 5.791391 6.516556
par(family='STKaiti',cex.axis = 0.75)
plotmeans(q9dt~A,q9data,xlab = "因素",ylab = "得分",col = "red",
main = "“影响订购外卖的因素")
text(1:length(q9me),q9me-0.1,labels = round(q9me,3))

## 进行因子分析:如果您订购外卖,请您为以下因素打分(满分8分,越重要分越高)
q9yinzi <- usedata[,20:27]
## 判断需提取的公共因子数 碎石图
fa.parallel(q9yinzi,fa="fa")

## Parallel analysis suggests that the number of factors = 3 and the number of components = NA
## 提取2个公共因子
q9yinzi_fa <- fa(cov(q9yinzi),nfactors=2)
## Loading required namespace: GPArotation
q9yinzi_fa
## Factor Analysis using method = minres
## Call: fa(r = cov(q9yinzi), nfactors = 2)
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## q9zhiliang 0.99 -0.08 0.90 0.10 1.0
## q9anquan 0.96 -0.08 0.84 0.16 1.0
## q9stime 0.72 0.11 0.62 0.38 1.0
## q9jiage 0.54 0.33 0.60 0.40 1.7
## q9baozhuang 0.27 0.53 0.52 0.48 1.5
## q9kouwei 0.61 0.27 0.63 0.37 1.4
## q9xiaoliang -0.03 0.88 0.75 0.25 1.0
## q9kefu 0.56 0.35 0.65 0.35 1.7
##
## MR1 MR2
## SS loadings 3.79 1.72
## Proportion Var 0.47 0.21
## Cumulative Var 0.47 0.69
## Proportion Explained 0.69 0.31
## Cumulative Proportion 0.69 1.00
##
## With factor correlations of
## MR1 MR2
## MR1 1.00 0.57
## MR2 0.57 1.00
##
## Mean item complexity = 1.3
## Test of the hypothesis that 2 factors are sufficient.
##
## The degrees of freedom for the null model are 28 and the objective function was 5.95
## The degrees of freedom for the model are 13 and the objective function was 0.21
##
## The root mean square of the residuals (RMSR) is 0.03
## The df corrected root mean square of the residuals is 0.05
##
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.98 0.92
## Multiple R square of scores with factors 0.95 0.84
## Minimum correlation of possible factor scores 0.90 0.68
##
## Factor scores estimated using the regression method have correlations of
## MR1 MR2
## Correlation of scores with factors 0.59 0.50
## Multiple R square of scores with factors 0.34 0.25
## Minimum correlation of possible factor scores -0.31 -0.50
## 列连表分析 对应分析 ####
# 4 您每个月的生活费大概是:( )yu 6您每周平均定外卖的次数?:( )
t46 <- table(usedata$q4,usedata$q6)
t46
##
## 0—5次 5—10次 10—15次 15次以上
## 500元以下 24 1 0 2
## 500—900 116 9 3 0
## 900—1300元 93 21 6 2
## 1300元以上 19 3 1 2
# 卡方检验 有显著性关系
chisq.test(t46)
## Warning in chisq.test(t46): Chi-squared approximation may be incorrect
##
## Pearson's Chi-squared test
##
## data: t46
## X-squared = 22.472, df = 9, p-value = 0.007497
## 对应分析
df46 <- tab2df(usedata$q4,usedata$q6)
df46ca <- ca(df46)
df46ca
##
## Principal inertias (eigenvalues):
## 1 2 3
## Value 0.037846 0.036262 0.000303
## Percentage 50.86% 48.73% 0.41%
##
##
## Rows:
## 900—1300元 500—900 1300元以上 500元以下
## Mass 0.089404 0.423841 0.403974 0.082781
## ChiDist 0.485032 0.211532 0.214798 0.436535
## Inertia 0.021033 0.018965 0.018639 0.015775
## Dim. 1 2.255129 -0.563764 -0.337629 2.098563
## Dim. 2 1.070890 0.949244 -1.073143 -0.779755
##
##
## Columns:
## 0—5次 10—15次 15次以上 5—10次
## Mass 0.834437 0.112583 0.033113 0.019868
## ChiDist 0.084842 0.461934 0.473594 1.363842
## Inertia 0.006006 0.024023 0.007427 0.036955
## Dim. 1 -0.057122 -0.546273 -0.831959 6.881256
## Dim. 2 0.441702 -2.357133 -2.295323 -1.368879
# 查看卡方检验值
summary(df46ca)
##
## Principal inertias (eigenvalues):
##
## dim value % cum% scree plot
## 1 0.037846 50.9 50.9 *************
## 2 0.036262 48.7 99.6 ************
## 3 0.000303 0.4 100.0
## -------- -----
## Total: 0.074411 100.0
##
##
## Rows:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | 9001 | 89 995 283 | 439 818 455 | 204 177 103 |
## 2 | 5009 | 424 999 255 | -110 269 135 | 181 730 382 |
## 3 | 1300 | 404 999 250 | -66 94 46 | -204 905 465 |
## 4 | 500 | 83 990 212 | 408 875 365 | -148 116 50 |
##
## Columns:
## name mass qlt inr k=1 cor ctr k=2 cor ctr
## 1 | 05 | 834 1000 81 | -11 17 3 | 84 983 163 |
## 2 | 1015 | 113 997 323 | -106 53 34 | -449 944 626 |
## 3 | 15 | 33 969 100 | -162 117 23 | -437 852 174 |
## 4 | 510 | 20 1000 497 | 1339 963 941 | -261 37 37 |
## 对应分析图
par(family = "STKaiti",mfrow = c(1,1))
plot.ca(df46ca,main = "生活费VS每周平均定外卖的次数")
plot(df46ca,main = "生活费VS每周平均定外卖的次数")
