library(datasets)
data(USArrests)
summary(USArrests)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
ir3<- kmeans(USArrests[,-5], center=3, iter.max=200)
ir3
## K-means clustering with 3 clusters of sizes 20, 14, 16
##
## Cluster means:
## Murder Assault UrbanPop Rape
## 1 4.270000 87.5500 59.75000 14.39000
## 2 8.214286 173.2857 70.64286 22.84286
## 3 11.812500 272.5625 68.31250 28.37500
##
## Clustering vector:
## Alabama Alaska Arizona Arkansas California
## 3 3 3 2 3
## Colorado Connecticut Delaware Florida Georgia
## 2 1 3 3 2
## Hawaii Idaho Illinois Indiana Iowa
## 1 1 3 1 1
## Kansas Kentucky Louisiana Maine Maryland
## 1 1 3 1 3
## Massachusetts Michigan Minnesota Mississippi Missouri
## 2 3 1 3 2
## Montana Nebraska Nevada New Hampshire New Jersey
## 1 1 3 1 2
## New Mexico New York North Carolina North Dakota Ohio
## 3 3 3 1 1
## Oklahoma Oregon Pennsylvania Rhode Island South Carolina
## 2 2 1 2 3
## South Dakota Tennessee Texas Utah Vermont
## 1 2 2 1 1
## Virginia Washington West Virginia Wisconsin Wyoming
## 2 2 1 1 2
##
## Within cluster sum of squares by cluster:
## [1] 19263.760 9136.643 19563.863
## (between_SS / total_SS = 86.5 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
##table(ir3$cluster, USArrests$Species)
##cm<-table(ir3$cluster,USArrests$Species)
##1-sum(diag(cm))/sum(cm)
str(USArrests)
## 'data.frame': 50 obs. of 4 variables:
## $ Murder : num 13.2 10 8.1 8.8 9 7.9 3.3 5.9 15.4 17.4 ...
## $ Assault : int 236 263 294 190 276 204 110 238 335 211 ...
## $ UrbanPop: int 58 48 80 50 91 78 77 72 80 60 ...
## $ Rape : num 21.2 44.5 31 19.5 40.6 38.7 11.1 15.8 31.9 25.8 ...
row.names(USArrests)
## [1] "Alabama" "Alaska" "Arizona" "Arkansas"
## [5] "California" "Colorado" "Connecticut" "Delaware"
## [9] "Florida" "Georgia" "Hawaii" "Idaho"
## [13] "Illinois" "Indiana" "Iowa" "Kansas"
## [17] "Kentucky" "Louisiana" "Maine" "Maryland"
## [21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
## [25] "Missouri" "Montana" "Nebraska" "Nevada"
## [29] "New Hampshire" "New Jersey" "New Mexico" "New York"
## [33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
## [37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
## [41] "South Dakota" "Tennessee" "Texas" "Utah"
## [45] "Vermont" "Virginia" "Washington" "West Virginia"
## [49] "Wisconsin" "Wyoming"
## Data Preprocess
sum(!complete.cases(USArrests))
## [1] 0
## Remove or impute missing objects
df <-na.omit(USArrests)
## Rescale (or normalization, etc.)
## Rescale (or normalization, etc.)
##df <-scale(df,center=T,scale = T)
summary(df)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
apply(USArrests, 2, sd)
## Murder Assault UrbanPop Rape
## 4.355510 83.337661 14.474763 9.366385
apply(USArrests, 2, mean)
## Murder Assault UrbanPop Rape
## 7.788 170.760 65.540 21.232
apply(df, 2, sd)
## Murder Assault UrbanPop Rape
## 4.355510 83.337661 14.474763 9.366385
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.5.1
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
distance<-get_dist(df)
library(mlr3)
## Warning: package 'mlr3' was built under R version 4.5.1
library(mlr3cluster)
## Warning: package 'mlr3cluster' was built under R version 4.5.1
library(mlr3viz)
## Warning: package 'mlr3viz' was built under R version 4.5.1
library(devtools)
## Warning: package 'devtools' was built under R version 4.5.1
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.5.1
fviz_dist(distance, gradient = list(low = "#00AFBB",mid = "white", high = "green"))
## R Markdown
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.
When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```