This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
workbook <- "c:/r.csv"
mydatafram0 <- read.csv(workbook,1)
plot(mydatafram0$date,mydatafram0$PM2.5,main = "每日PM2.5值分布散点图",xlab = "日期",ylab = "PM2.5的值")
library(ggplot2)
ggplot(mydatafram0,aes(x = PM2.5, y = PM10)) +geom_line()
#可以看出PM2.5和PM10成线性关系
barplot(table(mydatafram0$weather),main = "所有的天气状况分布统计柱状图",col = 7)
pie(table(mydatafram0$qua),main = "所有的天气质量分布饼图")
boxplot(mydatafram0$AQI,main = "AQI箱线图")
boxplot(mydatafram0$PM2.5,main = "PM2.5箱线图")
stars(mydatafram0[c("PM10","AQI","PM2.5")],main = "关于date,AQI,PM2.5属性的星相图")
library(scatterplot3d)
scatterplot3d(mydatafram0[c("AQI","PM2.5","PM10")])
#PM2.5和PM10的值越高,AQI的值越高,空气质量越差
library(plotrix)
pie3D(table(mydatafram0$qua))
pie3D(table(mydatafram0$qua),main = "所有的天气质量分布三维饼状图",cex = 3)
library(Matrix)
library(arules)
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(grid)
library(arulesViz)
windqua <- data.frame(mydatafram0$wind,mydatafram0$qua) #生成新的数据框windqua
rules0 = apriori(windqua,parameter = list(support = 0.005,confidence = 0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport support minlen maxlen
## 0.5 0.1 1 none FALSE TRUE 0.005 1 10
## target ext
## rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 0
## Warning in apriori(windqua, parameter = list(support = 0.005, confidence = 0.5)): You chose a very low absolute support count of 0. You might run out of memory! Increase minimum support.
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[40 item(s), 175 transaction(s)] done [0.00s].
## sorting and recoding items ... [40 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [27 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules0 #显示rules0中生成关联规则条数
## set of 27 rules
Sys.setlocale(category = "LC_ALL", locale = "us")
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
inspect(rules0) #查看生成的关联规则
## lhs rhs support confidence lift
## 1 {mydatafram0.wind=ÎÞ³ÖÐø·çÏò ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 2 {mydatafram0.wind=±±·ç 4-5¼¶ /±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 3 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 4 {mydatafram0.wind=Î÷ÄÏ·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 5 {mydatafram0.wind=ÄÏ·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 6 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÖØ¶ÈÎÛȾ} 0.005714286 1.0000000 15.909091
## 7 {mydatafram0.wind=Î÷ÄÏ·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÓÅ} 0.005714286 1.0000000 21.875000
## 8 {mydatafram0.wind=±±·ç 3-4¼¶ /Î÷±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 9 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 10 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /Î÷±±·ç 3-4¼¶} => {mydatafram0.qua=ÖØ¶ÈÎÛȾ} 0.005714286 1.0000000 15.909091
## 11 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /Î÷ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 12 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 13 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /¶«·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 14 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 15 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 16 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÖжÈÎÛȾ} 0.005714286 1.0000000 5.833333
## 17 {mydatafram0.wind=ÄÏ·ç ¡Ü3¼¶ /ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.011428571 1.0000000 3.888889
## 18 {mydatafram0.wind=Î÷±±·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=ÑÏÖØÎÛȾ} 0.005714286 0.5000000 8.750000
## 19 {mydatafram0.wind=Î÷±±·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 0.5000000 1.232394
## 20 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.011428571 1.0000000 2.464789
## 21 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.011428571 0.6666667 1.643192
## 22 {mydatafram0.wind=¶«±±·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÖжÈÎÛȾ} 0.011428571 0.5000000 2.916667
## 23 {mydatafram0.wind=¶«±±·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.011428571 0.5000000 1.232394
## 24 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /Î÷ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.017142857 0.7500000 1.848592
## 25 {mydatafram0.wind=Î÷±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.034285714 1.0000000 2.464789
## 26 {mydatafram0.qua=ÑÏÖØÎÛȾ} => {mydatafram0.wind=¶«±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} 0.034285714 0.6000000 1.981132
## 27 {mydatafram0.wind=¶«±±·ç ¡Ü3¼¶ /¶«·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.040000000 0.5384615 1.327194
rules1 = apriori(windqua,parameter = list(support = 0.005,confidence = 0.60))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport support minlen maxlen
## 0.6 0.1 1 none FALSE TRUE 0.005 1 10
## target ext
## rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 0
## Warning in apriori(windqua, parameter = list(support = 0.005, confidence = 0.6)): You chose a very low absolute support count of 0. You might run out of memory! Increase minimum support.
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[40 item(s), 175 transaction(s)] done [0.00s].
## sorting and recoding items ... [40 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [22 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules1 #显示rules0中生成关联规则条数
## set of 22 rules
Sys.setlocale(category = "LC_ALL", locale = "us")
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
inspect(rules1) #查看生成的关联规则
## lhs rhs support confidence lift
## 1 {mydatafram0.wind=ÎÞ³ÖÐø·çÏò ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 2 {mydatafram0.wind=±±·ç 4-5¼¶ /±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 3 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 4 {mydatafram0.wind=Î÷ÄÏ·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 5 {mydatafram0.wind=ÄÏ·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 6 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÖØ¶ÈÎÛȾ} 0.005714286 1.0000000 15.909091
## 7 {mydatafram0.wind=Î÷ÄÏ·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÓÅ} 0.005714286 1.0000000 21.875000
## 8 {mydatafram0.wind=±±·ç 3-4¼¶ /Î÷±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 9 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 10 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /Î÷±±·ç 3-4¼¶} => {mydatafram0.qua=ÖØ¶ÈÎÛȾ} 0.005714286 1.0000000 15.909091
## 11 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /Î÷ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 12 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 13 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /¶«·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 14 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1.0000000 3.888889
## 15 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1.0000000 2.464789
## 16 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÖжÈÎÛȾ} 0.005714286 1.0000000 5.833333
## 17 {mydatafram0.wind=ÄÏ·ç ¡Ü3¼¶ /ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.011428571 1.0000000 3.888889
## 18 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.011428571 1.0000000 2.464789
## 19 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.011428571 0.6666667 1.643192
## 20 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /Î÷ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.017142857 0.7500000 1.848592
## 21 {mydatafram0.wind=Î÷±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.034285714 1.0000000 2.464789
## 22 {mydatafram0.qua=ÑÏÖØÎÛȾ} => {mydatafram0.wind=¶«±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} 0.034285714 0.6000000 1.981132
rules.sorted_sup = sort(rules0,by="support")
inspect(rules.sorted_sup [1:5])
## lhs rhs support confidence lift
## 1 {mydatafram0.wind=¶«±±·ç ¡Ü3¼¶ /¶«·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.04000000 0.5384615 1.327194
## 2 {mydatafram0.wind=Î÷±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.03428571 1.0000000 2.464789
## 3 {mydatafram0.qua=ÑÏÖØÎÛȾ} => {mydatafram0.wind=¶«±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} 0.03428571 0.6000000 1.981132
## 4 {mydatafram0.wind=¶«·ç ¡Ü3¼¶ /Î÷ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.01714286 0.7500000 1.848592
## 5 {mydatafram0.wind=ÄÏ·ç ¡Ü3¼¶ /ÄÏ·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.01142857 1.0000000 3.888889
rules.sorted_on = sort(rules0,by = "confidence")
inspect(rules.sorted_on [1:5])
## lhs rhs support confidence lift
## 1 {mydatafram0.wind=ÎÞ³ÖÐø·çÏò ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1 2.464789
## 2 {mydatafram0.wind=±±·ç 4-5¼¶ /±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1 2.464789
## 3 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /±±·ç 3-4¼¶} => {mydatafram0.qua=Á¼} 0.005714286 1 2.464789
## 4 {mydatafram0.wind=Î÷ÄÏ·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1 3.888889
## 5 {mydatafram0.wind=ÄÏ·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=Çá¶ÈÎÛȾ} 0.005714286 1 3.888889
rules.sorted_lift = sort(rules0,by = "lift")
inspect(rules.sorted_lift [1:5])#提升度可以说是筛选关联规则最可靠的指标
## lhs rhs support confidence lift
## 1 {mydatafram0.wind=Î÷ÄÏ·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÓÅ} 0.005714286 1.0 21.875000
## 2 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /Î÷±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÖØ¶ÈÎÛȾ} 0.005714286 1.0 15.909091
## 3 {mydatafram0.wind=Î÷·ç ¡Ü3¼¶ /Î÷±±·ç 3-4¼¶} => {mydatafram0.qua=ÖØ¶ÈÎÛȾ} 0.005714286 1.0 15.909091
## 4 {mydatafram0.wind=Î÷±±·ç ¡Ü3¼¶ /Î÷·ç ¡Ü3¼¶} => {mydatafram0.qua=ÑÏÖØÎÛȾ} 0.005714286 0.5 8.750000
## 5 {mydatafram0.wind=±±·ç ¡Ü3¼¶ /¶«±±·ç ¡Ü3¼¶} => {mydatafram0.qua=ÖжÈÎÛȾ} 0.005714286 1.0 5.833333
plot(rules0) #对rules0作散点图
#plot(rules0,interactive = TRUE) #绘制互动散点图
plot(rules.sorted_sup,method = "grouped") #绘制关联规则分组图
setwd("C:/")
date=read.table("date.csv",header=F,sep=",")#读取数据集,并命名为date
dim(date)#获取数据维度
## [1] 175 3
names(date)=c("date","PM2.5","AQI")#设置变量名为date,PM2.5,AQI
var=date$date#设置变量名为date,PM2.5,AQI
var=as.character(var)#将var转换为字符型
for(i in 1:175)row.names(date)[i]=var[i]#将数据集date的行名命名为相应的日期
plot(date$PM2.5,date$AQI)#画出所有日期的样本点
c=which(date$date=="2016/3/1")#选出2016年3月1日那天的数据的位置
points(date[c(c),-1],pch=16,col="blue")#获取2016/3/1的位置为蓝色点
d=which(date$date=="2016/3/6")
points(date[c(d),-1],pch=16,col="red")#获取2016/3/6的位置为红色点
legend(date$AQI[c],date$PM2.5[c],"2016/3/1",bty="n",xjust=0.5,cex=0.8)
legend(date$AQI[c],date$PM2.5[c],"2016/3/6",bty="n",xjust=0.5,cex=0.8)
legend(date$AQI[c],date$PM2.5[c],"2015/11/2",bty="n",xjust=0.5,cex=0.8)#标出各个点的日期
fit_km1=kmeans(date[,-1],center=10)#用kmeans算法对date数据集进行聚类
print(fit_km1)
## K-means clustering with 10 clusters of sizes 6, 17, 26, 16, 21, 11, 12, 24, 22, 20
##
## Cluster means:
## PM2.5 AQI
## 1 18.83333 70.0000
## 2 39.29412 291.2353
## 3 43.76923 211.9231
## 4 35.25000 149.0625
## 5 75.04762 326.4286
## 6 218.90909 335.0000
## 7 119.08333 344.8333
## 8 57.58333 254.0417
## 9 95.04545 285.0000
## 10 150.00000 329.3000
##
## Clustering vector:
## 2016/3/1 2016/3/2 2016/3/3 2016/3/4 2016/3/5 2016/3/6
## 3 3 3 9 8 5
## 2016/3/7 2016/3/8 2016/3/9 2016/3/10 2016/3/11 2016/3/12
## 5 7 5 2 2 7
## 2016/3/13 2016/3/14 2016/3/15 2016/3/16 2016/3/17 2016/3/18
## 8 8 8 10 10 9
## 2016/3/19 2016/3/20 2016/3/21 2016/3/22 2016/3/23 2016/2/1
## 9 5 5 7 7 4
## 2016/2/2 2016/2/3 2016/2/4 2016/2/5 2016/2/6 2016/2/7
## 4 9 9 3 1 4
## 2016/2/8 2016/2/9 2016/2/10 2016/2/11 2016/2/12 2016/2/13
## 6 7 10 10 9 2
## 2016/2/14 2016/2/15 2016/2/16 2016/2/17 2016/2/18 2016/2/19
## 2 2 8 3 2 5
## 2016/2/20 2016/2/21 2016/2/22 2016/2/23 2016/2/24 2016/2/25
## 5 8 9 8 5 9
## 2016/2/26 2016/2/27 2016/2/28 2016/2/29 2016/1/1 2016/1/2
## 8 8 3 4 10 6
## 2016/1/3 2016/1/4 2016/1/5 2016/1/6 2016/1/7 2016/1/8
## 6 6 6 6 10 10
## 2016/1/9 2016/1/10 2016/1/11 2016/1/12 2016/1/13 2016/1/14
## 10 10 7 7 7 3
## 2016/1/15 2016/1/16 2016/1/17 2016/1/18 2016/1/19 2016/1/20
## 3 9 8 3 4 3
## 2016/1/21 2016/1/22 2016/1/23 2016/1/24 2016/1/25 2016/1/26
## 5 3 2 3 3 8
## 2016/1/27 2016/1/28 2016/1/29 2016/1/30 2016/1/31 2015/12/1
## 9 9 10 10 3 9
## 2015/12/2 2015/12/3 2015/12/4 2015/12/5 2015/12/6 2015/12/7
## 2 2 2 9 7 10
## 2015/12/8 2015/12/9 2015/12/10 2015/12/11 2015/12/12 2015/12/13
## 10 6 10 9 8 4
## 2015/12/14 2015/12/15 2015/12/16 2015/12/17 2015/12/18 2015/12/19
## 4 3 3 2 8 9
## 2015/12/20 2015/12/21 2015/12/22 2015/12/23 2015/12/24 2015/12/25
## 10 6 6 6 9 4
## 2015/12/26 2015/12/27 2015/12/28 2015/12/29 2015/12/30 2015/12/31
## 4 5 10 10 9 10
## 2015/11/1 2015/11/2 2015/11/3 2015/11/4 2015/11/5 2015/11/6
## 4 3 8 9 9 4
## 2015/11/7 2015/11/8 2015/11/9 2015/11/10 2015/11/11 2015/11/12
## 1 1 3 8 4 3
## 2015/11/13 2015/11/14 2015/11/15 2015/11/16 2015/11/17 2015/11/18
## 8 8 3 5 5 8
## 2015/11/19 2015/11/20 2015/11/21 2015/11/22 2015/11/23 2015/11/24
## 2 2 3 3 4 1
## 2015/11/25 2015/11/26 2015/11/27 2015/11/28 2015/11/29 2015/11/30
## 4 3 2 7 10 6
## 2015/10/1 2015/10/2 2015/10/3 2015/10/4 2015/10/5 2015/10/6
## 5 5 5 5 5 5
## 2015/10/7 2015/10/8 2015/10/9 2015/10/10 2015/10/11 2015/10/12
## 5 2 2 3 8 8
## 2015/10/13 2015/10/14 2015/10/15 2015/10/16 2015/10/17 2015/10/18
## 5 9 8 9 9 5
## 2015/10/19 2015/10/20 2015/10/21 2015/10/22 2015/10/23 2015/10/24
## 7 10 7 8 3 8
## 2015/10/25 2015/10/26 2015/10/27 2015/10/28 2015/10/29 2015/10/30
## 4 1 3 8 2 4
## 2015/10/31
## 1
##
## Within cluster sum of squares by cluster:
## [1] 2840.833 5888.588 11788.462 7601.938 9930.095 9604.909 3010.583
## [8] 7794.792 8774.955 7468.200
## (between_SS / total_SS = 94.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss"
## [5] "tot.withinss" "betweenss" "size" "iter"
## [9] "ifault"
fit_km1$centers #获取各类别中心坐标
## PM2.5 AQI
## 1 18.83333 70.0000
## 2 39.29412 291.2353
## 3 43.76923 211.9231
## 4 35.25000 149.0625
## 5 75.04762 326.4286
## 6 218.90909 335.0000
## 7 119.08333 344.8333
## 8 57.58333 254.0417
## 9 95.04545 285.0000
## 10 150.00000 329.3000
plot(date[,-1],pch=(fit_km1$cluster-1))#以10种不同的形态展示
points(fit_km1$centers,pch=8,col="purple")#将10个类别的中心以紫色星号表示
legend(fit_km1$centers[1,1],fit_km1$centers[1,2],"Center_1",bty="n",xjust=1,yjust=0,cex=0.8,text.col = "red")
#添加标注Center-1
legend(fit_km1$centers[2,1]-2,fit_km1$centers[2,2],"Center_2",bty="n",xjust=0,yjust=0,cex=0.8,text.col = "blue")
legend(fit_km1$centers[3,1],fit_km1$centers[3,2],"Center_3",bty="n",xjust=0.5,cex=0.8,text.col = "green")
result=rep(0,174)
fit_km2=kmeans(date[,-1],center=10)#取类别center=10
cluster201631=fit_km2$cluster[which(date$date=="2016/3/1")]#获取2016/3/1
which(fit_km2$cluster==cluster201631)#选择出与2016/3/1同类别的日期
## 2016/3/1 2016/3/2 2016/3/3 2016/2/5 2016/2/17 2016/2/28
## 1 2 3 28 40 51
## 2016/1/14 2016/1/15 2016/1/18 2016/1/20 2016/1/22 2016/1/25
## 66 67 70 72 74 77
## 2016/1/31 2015/12/15 2015/12/16 2015/12/26 2015/11/2 2015/11/9
## 83 98 99 109 116 123
## 2015/11/12 2015/11/15 2015/11/21 2015/11/22 2015/11/23 2015/11/25
## 126 129 135 136 137 139
## 2015/11/26 2015/10/10 2015/10/23 2015/10/27 2015/10/30
## 140 154 167 171 174
f=which(fit_km2$cluster==cluster201631)#选择出与2016/3/1同类别的日期
points(date[c(f),-1],pch=16,col="blue")#获取2016/3/1同类别的点
for(k in 1:174)
{fit_km1=kmeans(date[,-1],center=k)#取类别k进行均值聚类
result[k]=fit_km1$betweenss/fit_km1$totss#计算类别为K时的聚类优度,存入result
}
round(result,2)
## [1] 0.00 0.55 0.75 0.83 0.87 0.90 0.91 0.93 0.93 0.94 0.95 0.95 0.96 0.96
## [15] 0.96 0.96 0.97 0.97 0.97 0.97 0.98 0.97 0.97 0.98 0.98 0.98 0.98 0.98
## [29] 0.98 0.98 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99
## [43] 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99 0.99
## [57] 0.99 0.99 0.99 1.00 0.99 1.00 0.99 1.00 0.99 0.99 0.99 1.00 1.00 1.00
## [71] 1.00 1.00 0.99 0.99 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## [85] 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## [99] 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## [113] 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## [127] 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## [141] 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## [155] 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00
## [169] 1.00 1.00 1.00 1.00 1.00 1.00
plot(1:174,result,type="b",main="choosing",xlab="number:1 to 174",ylab="betweenss/totss")#对result进行制图
points(10,result[10],pch=16,col="red")#将类别数为10的点用实心红点标出
legend(10,result[10],paste("(10,",sprintf("%.1f%%",result[10]*100),")",sep=""),bty="n",xjust=0.3,cex=0.8)
fit_km2=kmeans(date[,-1],center=10)#取类别center=10
cluster201631=fit_km2$cluster[which(date$date=="2016/3/1")]#获取2016/3/1
which(fit_km2$cluster==cluster201631)#选择出与2016/3/1同类别的日期
## 2016/3/1 2016/3/2 2016/3/3 2016/2/5 2016/2/17 2016/2/28
## 1 2 3 28 40 51
## 2016/1/14 2016/1/15 2016/1/18 2016/1/20 2016/1/22 2016/1/24
## 66 67 70 72 74 76
## 2016/1/25 2016/1/31 2015/12/15 2015/12/16 2015/12/26 2015/11/2
## 77 83 98 99 109 116
## 2015/11/9 2015/11/12 2015/11/15 2015/11/21 2015/11/22 2015/11/23
## 123 126 129 135 136 137
## 2015/11/25 2015/11/26 2015/10/10 2015/10/23 2015/10/27 2015/10/30
## 139 140 154 167 171 174