Consumer = read.csv("C://Users//zhy//Desktop//作业//consumer2018.csv",fileEncoding = 'GBK')
attach(Consumer)
par(mai = c(0.7,0.7,0.4,0.4),cex = 0.8)
d = read.csv("C://Users//zhy//Desktop//作业//Income1.csv",header = TRUE)
plot(d$x,d$y,xlab = "受教育年限",ylab = "收入",pch = 19,cex = 1.3,col = 'red')
grid(col = "grey60") #添加网格线
axis(side = 4,lty = 1) #添加坐标轴
points(mean(d$x),mean(d$y),pch = 19,cex = 4,col = 'black') #添加均值点
abline(v = mean(d$x),h = mean(d$y),lty = 2,col = "gray30") #添加均值垂直线和水平线
abline(lm(d$y~d$x),lwd = 2,col = 'blue') #添加回归直线
fit = lm(d$y~d$x)
d$predicted = predict(fit) #保存预测值
d$residuals = residuals(fit) #保存残差
segments(d$x,d$y,d$x,d$predicted) #添加实际值与预测值的连线
arrows(17,55,18.5,46,code = 2,angle = 25,length = 0.06,col = 'blue',lwd = 2) #添加带箭头的线段
text(19,45,expression(hat(y) == hat(beta)[0]+hat(beta)[1]*x)) #添加文本
arrows(17.9,65,15,70,code = 2,angle = 25,length = 0.06,col = 'black',lwd = 2) #添加带箭头的线段
text(13.7,71,expression(hat(epsilon[i]) == y[i]-hat(beta)[0]+hat(beta)[1]*x[i])) #添加文本
legend("topleft",legend = "回归直线",lty = c(1,6),col = 'blue',
cex = 0.95,fill = "blue",box.col = "grey60",ncol = 1,inset = 0.01,x.intersp = 0.3) #添加图例
box(col = 1,lwd = 2)
#{r} library(DescTools) par(mai=c(0.6,0.6,0.6,0.6),cex=0.7,cex.main=1,font.main=1)in Consumer = read.csv("C://Users//zhy//Desktop//作业//consumer2018.csv",fileEncoding = 'GBK') attach(Consumer) data.m = as.matrix(Consumer[,4:11]) rownames(data.m) = Consumer[,1] PlotLinesA(t(data.m), xlab="消费项目", ylab="支出金额(单位:元)", args.legend=NA, col=rainbow(31), pch=21, pch.col=1, pch.bg="white", pch.cex=1) legend(x="topright", legend=Consumer[,1], lty=1, col=rainbow(31), box.col="grey1", inset=0.01, ncol=4, cex=0.8)
library(ggplot2)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
ggparcoord(Consumer,columns = 4:11, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
theme_bw()+ #设置图形主题
theme(legend.text = element_text(size = "10"), #设置图例字体大小
axis.text = element_text(size = 10))+ #设置坐标轴字体大小
labs(x = "消费项目", y = "支出金额(单位:元)")
library(plotrix)
par(mai=c(0.5,0.5,0.5,0.5),cex=1)
data.m = as.matrix(Consumer[,4:11])
rownames(data.m) = Consumer[,1]
ladderplot(data.m, col=rainbow(31), pch=21, lty=1)
legend(x="topright", legend=Consumer[,1], lty=1, col=rainbow(31),
box.col="grey1", inset=0.01, ncol=4, cex=0.8)
library(ggplot2); library(ggiraphExtra)
ggRadar(data = Consumer, aes(group = 地区), alpha = 0)+
theme(axis.text = element_text(size = 10),
legend.position = "right", #设置图例位置
legend.text = element_text(size = "10")) #设置图例字体大小
data.m = as.matrix(Consumer[,4:11])
rownames(data.m) = Consumer[,1]
stars(data.m,
draw.segments = T, #绘制线段图
key.loc = c(10.5,1.8,5), #调整比例尺的坐标
cex = 1.3, #设置标签字体大小
mar = c(0.85,0.1,0.1,1)) #设置图形边界
library(aplpack)
rownames(data.m) = Consumer[,1]
faces(data.m,
face.type = 1, #绘制彩色脸谱图
scale = TRUE) #将变量标准化
## effect of variables:
## modified item Var
## "height of face " "食品烟酒"
## "width of face " "衣着"
## "structure of face" "居住"
## "height of mouth " "生活用品及服务"
## "width of mouth " "交通通信"
## "smiling " "教育文化娱乐"
## "height of eyes " "医疗保健"
## "width of eyes " "其他用品及服务"
## "height of hair " "食品烟酒"
## "width of hair " "衣着"
## "style of hair " "居住"
## "height of nose " "生活用品及服务"
## "width of nose " "交通通信"
## "width of ear " "教育文化娱乐"
## "height of ear " "医疗保健"
library(ggpubr)
ggscatter(data = Consumer, x = "衣着", y = "居住",
add = "reg.line", conf.int = TRUE)+ #添加回归线和置信区间
stat_regline_equation(label.x = 1700,label.y = 10700,size = 6)+ #设置回归方程位置坐标
stat_cor(label.x = 1700,label.y = 11400,size = 6)+ #设置相关系数位置坐标
theme_bw()+ #设置图形主题
theme(axis.title.x = element_text(size = 15),axis.title.y = element_text(size = 15))
## `geom_smooth()` using formula 'y ~ x'
library(GGally)
ggpairs(data = Consumer, columns = 4:11)
library(MASS); library(ade4); library(scatterplot3d)
data(iris)
par(mar = c(0, 0, 0, 0))
pane1 = function(X, Y) {
XY = cbind.data.frame(X, Y)
s.class(XY, iris$Species, include.ori = F, add.p = T, clab = 1.5,
col = c("blue", "black", "red"), cpoi = 2, csta = 0.5)
}
pairs(iris[, 1:4], panel = pane1)
detach(package:ade4)
par(mfrow = c(2, 2)); mar0 = c(3, 3, 1, 3)
scatterplot3d(iris[, 1], iris[, 2], iris[, 3], mar = mar0,
color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "萼片长度",ylab = "萼片宽度",zlab = "花瓣长度")
scatterplot3d(iris[, 2], iris[, 3], iris[, 4], mar = mar0,
color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "萼片宽度",ylab = "花瓣长度",zlab = "花瓣宽度")
scatterplot3d(iris[, 3], iris[, 4], iris[, 1], mar = mar0,
color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "花瓣长度",ylab = "花瓣宽度",zlab = "萼片长度")
scatterplot3d(iris[, 4], iris[, 1], iris[, 2], mar = mar0,
color = c("blue","black", "red")[iris$Species], pch = 19,xlab = "花瓣宽度",ylab = "萼片长度",zlab = "萼片宽度")
1.什么是数据可视化?举几个数据可视化应用的例子. 答:数据可视化是指将数据以视觉形式来呈现,如图表或地图,以帮助人们了解这些数据的意义。以上的示例就是数据可视化的例子。
x = rnorm(100,0,1000)
y = rnorm(100,0,10)
s = cor(x,y,method = "pearson")
summary(s)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.1665 -0.1665 -0.1665 -0.1665 -0.1665 -0.1665
plot(x,y)
此时负相关
x = rnorm(100,0,100)
y = rnorm(100,0,10)
s = cor(x,y,method = "pearson")
summary(s)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.2196 -0.2196 -0.2196 -0.2196 -0.2196 -0.2196
plot(x,y)
此时正相关
##第三题
student = read.csv("C://Users//zhy//Desktop//作业//student.csv")
library(ggpubr)
ggscatter(data = student, x = "Weight", y = "Height",
add = "reg.line", conf.int = TRUE)+ #添加回归线和置信区间
stat_regline_equation(label.x = 80,label.y = 50,size = 6)+ #设置回归方程位置坐标
stat_cor(label.x = 80,label.y = 55,size = 6)+ #设置相关系数位置坐标
theme_bw()+ #设置图形主题
theme(axis.title.x = element_text(size = 15),axis.title.y = element_text(size = 15))
## `geom_smooth()` using formula 'y ~ x'
## 第四题 (1)(2)以在轮廓图与雷达图中给出
(3)在上述矩阵散点图已给出
第一组
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(GGally)
ggparcoord(data = iris[2:50,],columns = 1:4, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
theme_bw()+ #设置图形主题
theme(legend.text = element_text(size = "10"), #设置图例字体大小
axis.text = element_text(size = 10))+ #设置坐标轴字体大小
labs(x = "鸢尾花特征", y = "鸢尾花的长宽(cm)")
第二组
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(GGally)
ggparcoord(data = iris[51:100,],columns = 1:4, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
theme_bw()+ #设置图形主题
theme(legend.text = element_text(size = "10"), #设置图例字体大小
axis.text = element_text(size = 10))+ #设置坐标轴字体大小
labs(x = "鸢尾花特征", y = "鸢尾花的长宽(cm)")
第三组
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(GGally)
ggparcoord(data = iris[101:150,],columns = 1:4, groupColumn = 1, scale = "globalminmax", showPoints = TRUE)+
theme_bw()+ #设置图形主题
theme(legend.text = element_text(size = "10"), #设置图例字体大小
axis.text = element_text(size = 10))+ #设置坐标轴字体大小
labs(x = "鸢尾花特征", y = "鸢尾花的长宽(cm)")
雷达图
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2); library(ggiraphExtra)
ggRadar(data = iris, aes(group = Species), alpha = 0)+
theme(axis.text = element_text(size = 10),
legend.position = "right", #设置图例位置
legend.text = element_text(size = "10"))
气泡图
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
ggplot(iris,aes(x=Sepal.Length,y=Sepal.Width,size=Species))+
geom_point(shape=21,colour='black',fill='cornsilk')
## Warning: Using size for a discrete variable is not advised.
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
ggplot(iris,aes(x=Petal.Length,y=Petal.Width,size=Species))+
geom_point(shape=21,colour='black',fill='cornsilk')
## Warning: Using size for a discrete variable is not advised.
星图
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
data.m = as.matrix(iris[,1:4])
rownames(data.m) = iris[,5]
stars(data.m,
draw.segmnts = T, #绘制线段图
key.loc = c(20.5,0.8,5), #调整比例尺的坐标
cex = 0.4, #设置标签字体大小
mar = c(1,0.1,0.1,1)) #设置图形边界
## Warning in plot.window(...): "draw.segmnts"不是图形参数
## Warning in plot.xy(xy, type, ...): "draw.segmnts"不是图形参数
## Warning in title(...): "draw.segmnts"不是图形参数
脸谱图
iris = read.csv("C://Users//zhy//Desktop//作业//iris.csv")
library(ggplot2)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
library(aplpack)
rownames(data.m) = iris[,1]
faces(data.m,
face.type = 1, #绘制彩色脸谱图
scale = TRUE)
## effect of variables:
## modified item Var
## "height of face " "Sepal.Length"
## "width of face " "Sepal.Width"
## "structure of face" "Petal.Length"
## "height of mouth " "Petal.Width"
## "width of mouth " "Sepal.Length"
## "smiling " "Sepal.Width"
## "height of eyes " "Petal.Length"
## "width of eyes " "Petal.Width"
## "height of hair " "Sepal.Length"
## "width of hair " "Sepal.Width"
## "style of hair " "Petal.Length"
## "height of nose " "Petal.Width"
## "width of nose " "Sepal.Length"
## "width of ear " "Sepal.Width"
## "height of ear " "Petal.Length"
mtcars = read.csv("C://Users//zhy//Desktop//作业//mtcars.csv")
library(GGally)
head(mtcars)
## mpg cyl disp hp dr at wt qsec vs am.ge ar.car b
## 1 Mazda RX4 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4
## 2 Mazda RX4 Wag 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4
## 3 Datsun 710 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1
## 4 Hornet 4 Drive 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1
## 5 Hornet Sportabout 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2
## 6 Valiant 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1
ggpairs(data = mtcars, columns = 2:11)
(2)绘制该数据集中任意三个变量的三维散点图和气泡图.
#{r} #散点图 scatterplot3d(mtcars[, 3], mtcars[, 4], mtcars[, 5], mar = mar0, color = c("red"), pch = 19,xlab = "disp",ylab = "hp",zlab = "drat") #气泡图 p1<-ggplot(mtcars,aes(wt,drat,fill=vs))+ geom_point(aes(size=hp,color=vs)) col<-c("#000000", "#be0027", "#cf8d2e","#e4e932","#2c9f45") p1