作者:带土;Daitu; Adam 邮箱:2505131775@qq.com
1、可以快速帮助研究者检查数据的缺失值,及数据的分布情况,帮助数据清洗的快速完成。
## Loading required package: colorspace
## Loading required package: grid
## Loading required package: data.table
## VIM is ready to use.
## Since version 4.0.0 the GUI is in its own package VIMGUI.
##
## Please use the package to use the new (and old) GUI.
## Suggestions and bug-reports can be submitted at: https://github.com/alexkowa/VIM/issues
##
## Attaching package: 'VIM'
## The following object is masked from 'package:datasets':
##
## sleep
## BodyWgt BrainWgt NonD Dream Sleep Span Gest Pred Exp Danger
## 1 6654.000 5712.0 NA NA 3.3 38.6 645 3 5 3
## 2 1.000 6.6 6.3 2.0 8.3 4.5 42 3 1 3
## 3 3.385 44.5 NA NA 12.5 14.0 60 1 1 1
## 4 0.920 5.7 NA NA 16.5 NA 25 5 2 3
## 5 2547.000 4603.0 2.1 1.8 3.9 69.0 624 3 5 4
## 6 10.550 179.5 9.1 0.7 9.8 27.0 180 4 4 4
## 可视化缺失数据的分布
par(mfrow = c(2,2))
hist(sleep$NonD,breaks = 12, col = "lightblue")
hist(sleep$Dream,breaks = 12, col = "lightblue")
hist(sleep$Span,breaks = 12, col = "lightblue")
hist(sleep$Gest,breaks = 12, col = "lightblue")2、通过对数据进行可视化,可以发现数据中是否存在异常值,或者对数据选择合适的模型。
## x1 y1 x2 y2 x3 y3 x4 y4
## 1 10 8.04 10 9.14 10 7.46 8 6.58
## 2 8 6.95 8 8.14 8 6.77 8 5.76
## 3 13 7.58 13 8.74 13 12.74 8 7.71
## 4 9 8.81 9 8.77 9 7.11 8 8.84
## 5 11 8.33 11 9.26 11 7.81 8 8.47
## 6 14 9.96 14 8.10 14 8.84 8 7.04
## 可视化数据的走势,使用合适的处理方法
p1 <- ggplot(lmdata4,aes(x=x1,y=y1))+
geom_point()+geom_smooth(method = "lm")
p2 <- ggplot(lmdata4,aes(x=x2,y=y2))+
geom_point()+geom_smooth(method = "lm")
p3 <- ggplot(lmdata4,aes(x=x3,y=y3))+
geom_point()+geom_smooth(method = "lm")
p4 <- ggplot(lmdata4,aes(x=x4,y=y4))+
geom_point()+geom_smooth(method = "lm")
gridExtra::grid.arrange(p1,p2,p3,p4,nrow=2)## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## 可视化适合Kmeans算法的图像
kdata <- data.frame(rbind(matrix(rnorm(100, sd = 0.3), ncol = 2),
matrix(rnorm(100, mean = 2, sd = 0.4), ncol = 2)))
colnames(kdata) <- c("X1", "X2")
p1 <- ggplot(kdata,aes(x = X1,y = X2))+geom_point()
## 可视化双月图像,适合密度聚类
moons <- read.csv("data/moonsdatas.csv")
head(moons)## X1 X2 Y
## 1 0.7424201 0.58556710 0
## 2 1.7444393 0.03909624 1
## 3 1.6934791 -0.19061851 1
## 4 0.7395695 0.63927458 0
## 5 -0.3780247 0.97481407 0
## 6 0.8943966 0.26841801 0
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## mpg cyl disp hp drat wt qsec vs am gear carb
## mpg 1.00 -0.85 -0.85 -0.78 0.68 -0.87 0.42 0.66 0.60 0.48 -0.55
## cyl -0.85 1.00 0.90 0.83 -0.70 0.78 -0.59 -0.81 -0.52 -0.49 0.53
## disp -0.85 0.90 1.00 0.79 -0.71 0.89 -0.43 -0.71 -0.59 -0.56 0.39
## hp -0.78 0.83 0.79 1.00 -0.45 0.66 -0.71 -0.72 -0.24 -0.13 0.75
## drat 0.68 -0.70 -0.71 -0.45 1.00 -0.71 0.09 0.44 0.71 0.70 -0.09
## wt -0.87 0.78 0.89 0.66 -0.71 1.00 -0.17 -0.55 -0.69 -0.58 0.43
## qsec 0.42 -0.59 -0.43 -0.71 0.09 -0.17 1.00 0.74 -0.23 -0.21 -0.66
## vs 0.66 -0.81 -0.71 -0.72 0.44 -0.55 0.74 1.00 0.17 0.21 -0.57
## am 0.60 -0.52 -0.59 -0.24 0.71 -0.69 -0.23 0.17 1.00 0.79 0.06
## gear 0.48 -0.49 -0.56 -0.13 0.70 -0.58 -0.21 0.21 0.79 1.00 0.27
## carb -0.55 0.53 0.39 0.75 -0.09 0.43 -0.66 -0.57 0.06 0.27 1.00
ggcorrplot(corr,method = "circle",type = "upper",title = "corr heatmap",
show.diag = TRUE,ggtheme = ggplot2::theme_bw,lab = TRUE,
lab_size = 2.5)##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(stringr)
library(RColorBrewer)
## 数据准备
data(AirPassengers)
AirPas <- as.data.frame(AirPassengers)
AirPas$x <- as.numeric(AirPas$x)
AirPas$yearmonth <- yearmon(time(AirPassengers))
AirPas$year <- format(AirPas$yearmonth,format = "%Y")
AirPas$month <- format(AirPas$yearmonth,"%B")
## 热力图分析数据的变化
AirPas$month <- factor(AirPas$month,levels = c("一月","二月","三月",
"四月","五月", "六月","七月",
"八月","九月", "十月","十一月",
"十二月"))
head(AirPas)## x yearmonth year month
## 1 112 1 1949 1949 一月
## 2 118 2 1949 1949 二月
## 3 132 3 1949 1949 三月
## 4 129 4 1949 1949 四月
## 5 121 5 1949 1949 五月
## 6 135 6 1949 1949 六月
ggplot(AirPas,aes(x=year,y=month))+
geom_tile(aes(fill = x))+
scale_fill_gradientn(colours=rev(brewer.pal(10,"RdYlGn")))+
geom_text(aes(label=x),size=2.5)+
ggtitle("飞机乘客数量变化")## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 数据kmeans聚类为两类
k2 <- kmeans(iris[,1:4],centers = 2)
k3 <- kmeans(iris[,1:4],centers = 3)
k4 <- kmeans(iris[,1:4],centers = 4)
k5 <- kmeans(iris[,1:4],centers = 5)
p2 <- fviz_cluster(k2,iris[,1:4],geom = "point",main = "2 clusters")
p3 <- fviz_cluster(k3,iris[,1:4],geom = "point",main = "3 clusters")
p4 <- fviz_cluster(k4,iris[,1:4],geom = "point",main = "4 clusters")
p5 <- fviz_cluster(k5,iris[,1:4],geom = "point",main = "5 clusters")
grid.arrange(p2,p3,p4,p5,nrow=2)## Loading required package: Rcpp
## Loading required package: rlang
##
## Attaching package: 'rlang'
## The following object is masked from 'package:data.table':
##
## :=
## Loading required package: timeDate
##
## Attaching package: 'timeSeries'
## The following object is masked from 'package:zoo':
##
## time<-
## 数据准备
data(AirPassengers)
AirPas <- as.data.frame(AirPassengers)
colnames(AirPas) <- "y"
AirPas$ds <- as.yearmon(time(AirPassengers))
head(AirPas)## y ds
## 1 112 1 1949
## 2 118 2 1949
## 3 132 3 1949
## 4 129 4 1949
## 5 121 5 1949
## 6 135 6 1949
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
## 预测后面两年的数据,并将预测结果可视化
future1 <- make_future_dataframe(mode1, periods = 24,freq = "month")
forecast1 <- predict(mode1, future1)
p1 <- plot(mode1, forecast1)+labs(title = "model 1",x = "")
## 建立不同的模型,模型2
mode2 <- prophet(AirPas,growth = "linear",
yearly.seasonality = TRUE,weekly.seasonality = FALSE,
daily.seasonality = FALSE,seasonality.mode = "multiplicative")
## 预测后面两年的数据,并将预测结果可视化
future2 <- make_future_dataframe(mode2, periods = 24,freq = "month")
forecast2 <- predict(mode2, future2)
p2 <- plot(mode2, forecast2)+labs(title = "model 2",x = "")
grid.arrange(p1,p2,nrow=2)## pclass survived sex age sibsp parch
## 1 1st survived female 29.0000 0 0
## 2 1st survived male 0.9167 1 2
## 3 1st died female 2.0000 1 2
## 4 1st died male 30.0000 1 2
## 5 1st died female 25.0000 1 2
## 6 1st survived male 48.0000 0 0
## n= 1309
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 1309 500 died (0.6180290 0.3819710)
## 2) sex=male 843 161 died (0.8090154 0.1909846)
## 4) age>=9.5 796 136 died (0.8291457 0.1708543) *
## 5) age< 9.5 47 22 survived (0.4680851 0.5319149)
## 10) sibsp>=2.5 20 1 died (0.9500000 0.0500000) *
## 11) sibsp< 2.5 27 3 survived (0.1111111 0.8888889) *
## 3) sex=female 466 127 survived (0.2725322 0.7274678)
## 6) pclass=3rd 216 106 died (0.5092593 0.4907407)
## 12) sibsp>=2.5 21 3 died (0.8571429 0.1428571) *
## 13) sibsp< 2.5 195 92 survived (0.4717949 0.5282051)
## 26) age>=16.5 162 79 died (0.5123457 0.4876543)
## 52) parch>=3.5 9 1 died (0.8888889 0.1111111) *
## 53) parch< 3.5 153 75 survived (0.4901961 0.5098039)
## 106) age>=27.5 44 17 died (0.6136364 0.3863636) *
## 107) age< 27.5 109 48 survived (0.4403670 0.5596330)
## 214) age< 21.5 28 11 died (0.6071429 0.3928571) *
## 215) age>=21.5 81 31 survived (0.3827160 0.6172840) *
## 27) age< 16.5 33 9 survived (0.2727273 0.7272727) *
## 7) pclass=1st,2nd 250 17 survived (0.0680000 0.9320000) *
## 可视化神将网络的模型
library(neuralnet)
library(NeuralNetTools)
data("iris")
iris$Species <- as.integer(iris$Species )
head(iris)## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 1
## 2 4.9 3.0 1.4 0.2 1
## 3 4.7 3.2 1.3 0.2 1
## 4 4.6 3.1 1.5 0.2 1
## 5 5.0 3.6 1.4 0.2 1
## 6 5.4 3.9 1.7 0.4 1
mlpfit <- neuralnet(Species~Sepal.Length+Sepal.Width+Petal.Length+Petal.Width,
data = iris,hidden = c(5,8,5),linear.output = FALSE)
par(cex = 0.6)
plotnet(mlpfit,pos_col = "red", neg_col = "grey")## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 1
## 2 4.9 3.0 1.4 0.2 1
## 3 4.7 3.2 1.3 0.2 1
## 4 4.6 3.1 1.5 0.2 1
## 5 5.0 3.6 1.4 0.2 1
## 6 5.4 3.9 1.7 0.4 1
iris$Species <- as.factor(iris$Species)
ggplot(iris,aes(x = Sepal.Length,y = Sepal.Width))+
theme_dark(base_family = "STKaiti")+
geom_point(aes(shape = Species,colour = Species))+
labs(x = "X轴",y = "Y轴",
title = "图像名称",subtitle = "可视化图子名称",
shape = "图例",colour = "图例")+
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
legend.title = element_text(hjust = 0.5),
legend.position = c(0.1,0.8))+
geom_text(aes(x=6,y = 3,label = "绘图区"),
family= "STKaiti",size = 20,colour = "lightblue",
alpha = 0.6)