多元第二章作业

consumer（scatter plot, bar plot, box plot and ridged plot）

Consumer = read.csv("C://Users//zhy//Desktop//作业//consumer2018.csv",fileEncoding = 'GBK')
attach(Consumer)

Plots

低级绘图函数（食品烟酒~衣着回归分析，诊断图）

par(mai = c(0.7,0.7,0.4,0.4),cex = 0.8)
d = read.csv("C://Users//zhy//Desktop//作业//Income1.csv",header = TRUE)


plot(d$x,d$y,xlab = "受教育年限",ylab = "收入",pch = 19,cex = 1.3,col = 'red')
grid(col = "grey60")                                                                        #添加网格线
axis(side = 4,lty = 1)                                                                      #添加坐标轴
points(mean(d$x),mean(d$y),pch = 19,cex = 4,col = 'black')                                  #添加均值点
abline(v = mean(d$x),h = mean(d$y),lty = 2,col = "gray30")                                  #添加均值垂直线和水平线
abline(lm(d$y~d$x),lwd = 2,col = 'blue')                                                    #添加回归直线
fit = lm(d$y~d$x)                                                            
d$predicted = predict(fit)                                                                  #保存预测值
d$residuals = residuals(fit)                                                                #保存残差
segments(d$x,d$y,d$x,d$predicted)                                                           #添加实际值与预测值的连线
arrows(17,55,18.5,46,code = 2,angle = 25,length = 0.06,col = 'blue',lwd = 2)                #添加带箭头的线段
text(19,45,expression(hat(y) == hat(beta)[0]+hat(beta)[1]*x))                               #添加文本
arrows(17.9,65,15,70,code = 2,angle = 25,length = 0.06,col = 'black',lwd = 2)               #添加带箭头的线段
text(13.7,71,expression(hat(epsilon[i]) == y[i]-hat(beta)[0]+hat(beta)[1]*x[i]))            #添加文本
legend("topleft",legend = "回归直线",lty = c(1,6),col = 'blue',
       cex = 0.95,fill = "blue",box.col = "grey60",ncol = 1,inset = 0.01,x.intersp = 0.3)   #添加图例
box(col = 1,lwd = 2)

轮廓图(outline plot)

#{r} library(DescTools) par(mai=c(0.6,0.6,0.6,0.6),cex=0.7,cex.main=1,font.main=1)in Consumer = read.csv("C://Users//zhy//Desktop//作业//consumer2018.csv",fileEncoding = 'GBK') attach(Consumer) data.m = as.matrix(Consumer[,4:11]) rownames(data.m) = Consumer[,1] PlotLinesA(t(data.m), xlab="消费项目", ylab="支出金额(单位：元)", args.legend=NA, col=rainbow(31), pch=21, pch.col=1, pch.bg="white", pch.cex=1) legend(x="topright", legend=Consumer[,1], lty=1, col=rainbow(31), box.col="grey1", inset=0.01, ncol=4, cex=0.8)

library(ggplot2)
library(GGally)

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

ggparcoord(Consumer,columns = 4:11, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
  theme_bw()+                                          #设置图形主题
  theme(legend.text = element_text(size = "10"),       #设置图例字体大小
        axis.text = element_text(size = 10))+          #设置坐标轴字体大小
  labs(x = "消费项目", y = "支出金额(单位：元)")

library(plotrix)
par(mai=c(0.5,0.5,0.5,0.5),cex=1)
data.m = as.matrix(Consumer[,4:11])
rownames(data.m) = Consumer[,1]
ladderplot(data.m, col=rainbow(31), pch=21, lty=1)
legend(x="topright", legend=Consumer[,1], lty=1, col=rainbow(31), 
box.col="grey1", inset=0.01, ncol=4, cex=0.8)

雷达图(radar chart)

library(ggplot2); library(ggiraphExtra)
ggRadar(data = Consumer, aes(group = 地区), alpha = 0)+
  theme(axis.text = element_text(size = 10),
        legend.position = "right",                  #设置图例位置
        legend.text = element_text(size = "10"))    #设置图例字体大小

星图(star plot)

data.m = as.matrix(Consumer[,4:11])
rownames(data.m) = Consumer[,1]
stars(data.m,
      draw.segments = T,           #绘制线段图
      key.loc = c(10.5,1.8,5),     #调整比例尺的坐标
      cex = 1.3,                   #设置标签字体大小
      mar = c(0.85,0.1,0.1,1))     #设置图形边界

脸谱图(face plot)

library(aplpack)
rownames(data.m) = Consumer[,1]
faces(data.m,
      face.type = 1,              #绘制彩色脸谱图
      scale = TRUE)               #将变量标准化

## effect of variables:
##  modified item       Var             
##  "height of face   " "食品烟酒"      
##  "width of face    " "衣着"          
##  "structure of face" "居住"          
##  "height of mouth  " "生活用品及服务"
##  "width of mouth   " "交通通信"      
##  "smiling          " "教育文化娱乐"  
##  "height of eyes   " "医疗保健"      
##  "width of eyes    " "其他用品及服务"
##  "height of hair   " "食品烟酒"      
##  "width of hair   "  "衣着"          
##  "style of hair   "  "居住"          
##  "height of nose  "  "生活用品及服务"
##  "width of nose   "  "交通通信"      
##  "width of ear    "  "教育文化娱乐"  
##  "height of ear   "  "医疗保健"

散点图(scatter plot)

library(ggpubr)
ggscatter(data = Consumer, x = "衣着", y = "居住",
          add = "reg.line", conf.int = TRUE)+                       #添加回归线和置信区间
  stat_regline_equation(label.x = 1700,label.y = 10700,size = 6)+   #设置回归方程位置坐标
  stat_cor(label.x = 1700,label.y = 11400,size = 6)+                #设置相关系数位置坐标
  theme_bw()+                                                       #设置图形主题
  theme(axis.title.x = element_text(size = 15),axis.title.y = element_text(size = 15))

## `geom_smooth()` using formula 'y ~ x'

矩阵散点图 (matrix scatter plot)

library(GGally)
ggpairs(data = Consumer, columns = 4:11)

三维(3D)散点图

library(MASS); library(ade4); library(scatterplot3d)
data(iris)

par(mar = c(0, 0, 0, 0))
pane1 = function(X, Y) {
XY = cbind.data.frame(X, Y)
  s.class(XY, iris$Species, include.ori = F, add.p = T, clab = 1.5,
  col = c("blue", "black", "red"), cpoi = 2, csta = 0.5)
}
pairs(iris[, 1:4], panel = pane1)

detach(package:ade4)

par(mfrow = c(2, 2)); mar0 = c(3, 3, 1, 3)
scatterplot3d(iris[, 1], iris[, 2], iris[, 3], mar = mar0, 
              color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "萼片长度",ylab = "萼片宽度",zlab = "花瓣长度")
scatterplot3d(iris[, 2], iris[, 3], iris[, 4], mar = mar0, 
              color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "萼片宽度",ylab = "花瓣长度",zlab = "花瓣宽度")
scatterplot3d(iris[, 3], iris[, 4], iris[, 1], mar = mar0, 
              color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "花瓣长度",ylab = "花瓣宽度",zlab = "萼片长度")
scatterplot3d(iris[, 4], iris[, 1], iris[, 2], mar = mar0, 
              color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "花瓣宽度",ylab = "萼片长度",zlab = "萼片宽度")

习题

1.什么是数据可视化?举几个数据可视化应用的例子. 答：数据可视化是指将数据以视觉形式来呈现，如图表或地图，以帮助人们了解这些数据的意义。以上的示例就是数据可视化的例子。

x = rnorm(100,0,1000)
y = rnorm(100,0,10)
s = cor(x,y,method = "pearson")
summary(s)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.1665 -0.1665 -0.1665 -0.1665 -0.1665 -0.1665

plot(x,y)

此时负相关

x = rnorm(100,0,100)
y = rnorm(100,0,10)
s = cor(x,y,method = "pearson")
summary(s)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -0.2196 -0.2196 -0.2196 -0.2196 -0.2196 -0.2196

plot(x,y)

此时正相关

##第三题

student = read.csv("C://Users//zhy//Desktop//作业//student.csv")

library(ggpubr)
ggscatter(data = student, x = "Weight", y = "Height",
          add = "reg.line", conf.int = TRUE)+                       #添加回归线和置信区间
  stat_regline_equation(label.x = 80,label.y = 50,size = 6)+   #设置回归方程位置坐标
  stat_cor(label.x = 80,label.y = 55,size = 6)+                #设置相关系数位置坐标
  theme_bw()+                                                       #设置图形主题
  theme(axis.title.x = element_text(size = 15),axis.title.y = element_text(size = 15))

## `geom_smooth()` using formula 'y ~ x'

## 第四题（1）（2）以在轮廓图与雷达图中给出（3）在上述矩阵散点图已给出

五题

第一组

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(GGally)
ggparcoord(data = iris[2:50,],columns = 1:4, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
  theme_bw()+                                          #设置图形主题
  theme(legend.text = element_text(size = "10"),       #设置图例字体大小
        axis.text = element_text(size = 10))+          #设置坐标轴字体大小
  labs(x = "鸢尾花特征", y = "鸢尾花的长宽（cm）")

第二组

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(GGally)
ggparcoord(data = iris[51:100,],columns = 1:4, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
  theme_bw()+                                          #设置图形主题
  theme(legend.text = element_text(size = "10"),       #设置图例字体大小
        axis.text = element_text(size = 10))+          #设置坐标轴字体大小
  labs(x = "鸢尾花特征", y = "鸢尾花的长宽（cm）")

第三组

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(GGally)
ggparcoord(data = iris[101:150,],columns = 1:4, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
  theme_bw()+                                          #设置图形主题
  theme(legend.text = element_text(size = "10"),       #设置图例字体大小
        axis.text = element_text(size = 10))+          #设置坐标轴字体大小
  labs(x = "鸢尾花特征", y = "鸢尾花的长宽（cm）")

雷达图

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(ggiraphExtra)
ggRadar(data = iris, aes(group = Species), alpha = 0)+
  theme(axis.text = element_text(size = 10),
        legend.position = "right",                  #设置图例位置
        legend.text = element_text(size = "10"))

气泡图

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

ggplot(iris,aes(x=Sepal.Length,y=Sepal.Width,size=Species))+
  geom_point(shape=21,colour='black',fill='cornsilk')

## Warning: Using size for a discrete variable is not advised.

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
ggplot(iris,aes(x=Petal.Length,y=Petal.Width,size=Species))+
  geom_point(shape=21,colour='black',fill='cornsilk')

## Warning: Using size for a discrete variable is not advised.

星图

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

data.m = as.matrix(iris[,1:4])
rownames(data.m) = iris[,5]
stars(data.m,
      draw.segmnts = T,           #绘制线段图
      key.loc = c(20.5,0.8,5),     #调整比例尺的坐标
      cex = 0.4,                   #设置标签字体大小
      mar = c(1,0.1,0.1,1))     #设置图形边界

## Warning in plot.window(...): "draw.segmnts"不是图形参数

## Warning in plot.xy(xy, type, ...): "draw.segmnts"不是图形参数

## Warning in title(...): "draw.segmnts"不是图形参数

脸谱图

iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
head(iris)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

library(aplpack)
rownames(data.m) = iris[,1]
faces(data.m,
      face.type = 1,              #绘制彩色脸谱图
      scale = TRUE)

## effect of variables:
##  modified item       Var           
##  "height of face   " "Sepal.Length"
##  "width of face    " "Sepal.Width" 
##  "structure of face" "Petal.Length"
##  "height of mouth  " "Petal.Width" 
##  "width of mouth   " "Sepal.Length"
##  "smiling          " "Sepal.Width" 
##  "height of eyes   " "Petal.Length"
##  "width of eyes    " "Petal.Width" 
##  "height of hair   " "Sepal.Length"
##  "width of hair   "  "Sepal.Width" 
##  "style of hair   "  "Petal.Length"
##  "height of nose  "  "Petal.Width" 
##  "width of nose   "  "Sepal.Length"
##  "width of ear    "  "Sepal.Width" 
##  "height of ear   "  "Petal.Length"

第六题

mtcars = read.csv("C://Users//zhy//Desktop//作业//mtcars.csv")
library(GGally)
head(mtcars)

##                 mpg  cyl disp  hp  dr   at    wt  qsec vs am.ge ar.car b
## 1         Mazda RX4 21.0    6 160 110 3.90 2.620 16.46  0     1      4 4
## 2     Mazda RX4 Wag 21.0    6 160 110 3.90 2.875 17.02  0     1      4 4
## 3        Datsun 710 22.8    4 108  93 3.85 2.320 18.61  1     1      4 1
## 4    Hornet 4 Drive 21.4    6 258 110 3.08 3.215 19.44  1     0      3 1
## 5 Hornet Sportabout 18.7    8 360 175 3.15 3.440 17.02  0     0      3 2
## 6           Valiant 18.1    6 225 105 2.76 3.460 20.22  1     0      3 1

ggpairs(data = mtcars, columns = 2:11)

（2）绘制该数据集中任意三个变量的三维散点图和气泡图. #{r} #散点图 scatterplot3d(mtcars[, 3], mtcars[, 4], mtcars[, 5], mar = mar0, color = c("red"), pch = 19,xlab = "disp",ylab = "hp",zlab = "drat") #气泡图 p1<-ggplot(mtcars,aes(wt,drat,fill=vs))+ geom_point(aes(size=hp,color=vs)) col<-c("#000000", "#be0027", "#cf8d2e","#e4e932","#2c9f45") p1