Clustering Analysis

The Weather Dataset

library(rattle)
## Warning: package 'rattle' was built under R version 4.1.3
## Loading required package: tibble
## Loading required package: bitops
## Rattle: A free graphical interface for data science with R.
## Version 5.5.1 Copyright (c) 2006-2021 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
dim(weather)
## [1] 366  24
set.seed(42)
obs1 <- sample(1: nrow(weather), 5)
vars <- c("MinTemp", "Rainfall", "Evaporation")
cluster1 <- weather[obs1, vars]
str(cluster1)
## tibble [5 x 3] (S3: tbl_df/tbl/data.frame)
##  $ MinTemp    : num [1:5] 12.8 3.9 3.5 17 2.1
##  $ Rainfall   : num [1:5] 0 3.4 0 2 0
##  $ Evaporation: num [1:5] 6.4 6.6 6 9 2.6
clus1 <- as.numeric(weather$cluster1)
## Warning: Unknown or uninitialised column: `cluster1`.
mean(clus1)
## [1] NaN
summary(weather[7:9])
##     Sunshine       WindGustDir  WindGustSpeed  
##  Min.   : 0.000   NW     : 73   Min.   :13.00  
##  1st Qu.: 5.950   NNW    : 44   1st Qu.:31.00  
##  Median : 8.600   E      : 37   Median :39.00  
##  Mean   : 7.909   WNW    : 35   Mean   :39.84  
##  3rd Qu.:10.500   ENE    : 30   3rd Qu.:46.00  
##  Max.   :13.600   (Other):144   Max.   :98.00  
##  NA's   :3        NA's   :  3   NA's   :2
obs2 <- setdiff(sample(1:nrow(weather), 20), obs1)
cluster2 <- weather[obs2, vars]
str(cluster2)
## tibble [19 x 3] (S3: tbl_df/tbl/data.frame)
##  $ MinTemp    : num [1:19] 13.1 4.4 10.8 -0.2 12 3.2 9 15.3 7.1 10.8 ...
##  $ Rainfall   : num [1:19] 6.4 0 0 0 0.4 0 0 0 2 0 ...
##  $ Evaporation: num [1:19] 2.8 6.2 8.4 3.4 1.2 6.8 9 9.4 3.2 6.8 ...
clus2 <- as.numeric(weather$cluster2)  # converting to numeric
## Warning: Unknown or uninitialised column: `cluster2`.
mean(clus2)
## [1] NaN
x <- round(weather$MinTemp[1:2])
y <- round(weather$MaxTemp[1:2])

plot(x, y, ylim=c(23,29), pch=4, lwd=5,
     xlab= "MinTemp", ylab= "MaxTemp", bty="n")

round(x)
## [1]  8 14
round(y)
## [1] 24 27