对各个指标计算各种描述统计量。
a= read.csv("运营商数据.csv",header=T)
names(a) = c("Y", "X1", "X2", "X3")
N = sapply(a,length)
MU = sapply(a,mean)#算均值
SD = sapply(a,sd)#算方差
MIN = sapply(a,min)
MED = sapply(a,median)
MAX = sapply(a,max)
result = cbind(N,MU,SD,MIN,MED,MAX)
result
## N MU SD MIN MED MAX
## Y 1123 0.1930176 0.1324444 -0.49801016 0.1870535 0.9925818
## X1 1123 2.5804381 0.4083604 0.77815125 2.5831988 3.6009729
## X2 1123 0.8457207 0.1538765 0.09867452 0.8983452 1.0000000
## X3 1123 0.2521307 0.2112832 0.00000000 0.1983730 0.9734904
绘制箱线图初步查看异常值
a$cat = as.factor((a$Y > median(a$Y))*1)
levels(a$cat) = c("低","高")
a$cat = factor(a$cat,levels=c("高","低"))
par(mfrow = c(1,3))
boxplot(X1~cat,data=a,xlab="间接价值", ylab="通话时长(百分钟)")
boxplot(X2~cat,data=a,xlab="间接价值", ylab="大网占比(%)")
boxplot(X3~cat,data=a,xlab="间接价值", ylab="小网占比(%)")
