options(repos = c(CRAN = "https://cloud.r-project.org"))
#installing packages
install.packages("ggplot2")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'ggplot2' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpMjUeez\downloaded_packages
install.packages("cluster")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'cluster' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'cluster'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\DELL\AppData\Local\R\win-library\4.2\00LOCK\cluster\libs\x64\cluster.dll
## to C:\Users\DELL\AppData\Local\R\win-library\4.2\cluster\libs\x64\cluster.dll:
## Permission denied
## Warning: restored 'cluster'
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpMjUeez\downloaded_packages
install.packages("factoextra")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'factoextra' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpMjUeez\downloaded_packages
install.packages("dendextend")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'dendextend' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpMjUeez\downloaded_packages
install.packages("dplyr")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'dplyr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\DELL\AppData\Local\R\win-library\4.2\00LOCK\dplyr\libs\x64\dplyr.dll
## to C:\Users\DELL\AppData\Local\R\win-library\4.2\dplyr\libs\x64\dplyr.dll:
## Permission denied
## Warning: restored 'dplyr'
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpMjUeez\downloaded_packages
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
library(cluster)
## Warning: package 'cluster' was built under R version 4.2.3
library(factoextra)
## Warning: package 'factoextra' was built under R version 4.2.3
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(dendextend)
## Warning: package 'dendextend' was built under R version 4.2.3
##
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags:
## https://stackoverflow.com/questions/tagged/dendextend
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
##
## cutree
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
rainfall_data <- read.csv("C:/Users/DELL/Desktop/rainfal india/rainfaLLIndia.csv")
head(rainfall_data)
sum(is.na(rainfall_data))
## [1] 0
rainfall_data_clean <- na.omit(rainfall_data)
rainfall_data_scaled <- rainfall_data_clean %>%
mutate(across(JUN:SEP, scale))
head(rainfall_data_scaled)
summary(rainfall_data_scaled)
## subdivision YEAR JUN.V1 JUL.V1
## Length:4332 Min. :1901 Min. :-1.411029 Min. :-1.656272
## Class :character 1st Qu.:1931 1st Qu.:-0.677255 1st Qu.:-0.635599
## Mode :character Median :1961 Median :-0.387493 Median :-0.227833
## Mean :1961 Mean : 0.000000 Mean : 0.000000
## 3rd Qu.:1991 3rd Qu.: 0.328077 3rd Qu.: 0.267731
## Max. :2021 Max. : 5.956016 Max. : 7.525030
## AUG.V1 SEP.V1 JUN.SEP
## Min. :-2.047104 Min. :-2.175125 Min. : -99.9
## 1st Qu.:-0.705033 1st Qu.:-0.713518 1st Qu.: 565.9
## Median :-0.162728 Median :-0.179248 Median : 877.3
## Mean : 0.000000 Mean : 0.000000 Mean :1057.1
## 3rd Qu.: 0.467108 3rd Qu.: 0.505554 3rd Qu.:1287.9
## Max. : 7.243149 Max. : 7.496245 Max. :4536.9
ggplot(rainfall_data_clean, aes(x = YEAR, y = JUN)) +
geom_line() +
ggtitle("Rainfall in June over Years")

ggplot(rainfall_data_clean, aes(x = YEAR, y = JUL)) +
geom_line() +
ggtitle("Rainfall in July over Years")

df <- rainfall_data_scaled %>%
select(JUN:SEP)
res.dist <- dist(df, method = "euclidean")
res.hc <- hclust(d = res.dist, method = "ward.D2")
#Done on the subdata of df, it consists 10% of the dataset df. to get reproductivity (same answer) we have used the set.seed()
set.seed(123)
sampled_data <- df[sample(nrow(df), 0.1 * nrow(df)), ]
res.hc <- hclust(dist(sampled_data))
fviz_dend(res.hc, cex = 0.5)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the factoextra package.
## Please report the issue at <https://github.com/kassambara/factoextra/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

fviz_dend(res.hc, k = 4,
k_colors = c("blue", "green3", "red", "orange"),
cex = 0.5)

nc <- cutree(res.hc, k = 4)
table(nc)
## nc
## 1 2 3 4
## 193 200 39 1
install.packages("fastcluster")
## Installing package into 'C:/Users/DELL/AppData/Local/R/win-library/4.2'
## (as 'lib' is unspecified)
## package 'fastcluster' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'fastcluster'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problem copying
## C:\Users\DELL\AppData\Local\R\win-library\4.2\00LOCK\fastcluster\libs\x64\fastcluster.dll
## to
## C:\Users\DELL\AppData\Local\R\win-library\4.2\fastcluster\libs\x64\fastcluster.dll:
## Permission denied
## Warning: restored 'fastcluster'
##
## The downloaded binary packages are in
## C:\Users\DELL\AppData\Local\Temp\RtmpMjUeez\downloaded_packages
library(fastcluster)
## Warning: package 'fastcluster' was built under R version 4.2.3
##
## Attaching package: 'fastcluster'
## The following object is masked from 'package:stats':
##
## hclust
df_subset <- df[sample(nrow(df), 100), ]
res.dist_subset <- dist(df_subset)
hc_average_subset <- hclust(res.dist_subset, method = "average")
hc_wardD_subset <- hclust(res.dist_subset, method = "ward.D")
d1_subset <- as.dendrogram(hc_average_subset)
d2_subset <- as.dendrogram(hc_wardD_subset)
dend_list_subset <- dendlist(d1_subset, d2_subset)
tanglegram(d1_subset, d2_subset)

cor.dendlist(dend_list_subset, method = "cophenetic")
## [,1] [,2]
## [1,] 1.0000000 0.7348767
## [2,] 0.7348767 1.0000000
set.seed(123)
df_subset <- df[sample(nrow(df), 100), ]
dend1 <-df_subset %>% dist %>% hclust("complete") %>% as.dendrogram()
dend2 <-df_subset %>% dist %>% hclust("average") %>% as.dendrogram()
dend3 <-df_subset %>% dist %>% hclust("ward.D") %>% as.dendrogram()
dend4 <-df_subset %>% dist %>% hclust("median") %>% as.dendrogram()
dend5 <- df_subset %>% dist %>% hclust("single") %>% as.dendrogram()
dend6 <- df_subset %>% dist %>% hclust("centroid") %>% as.dendrogram()
dend7 <-df_subset %>% dist %>% hclust("ward.D2") %>% as.dendrogram()
dend_list <- dendlist("WARD.D2" = dend7, "CENTROID" = dend6,
"SINGLE" = dend5, "MEDIAN" = dend4,
"WARD.D" = dend3, "AVERAGE" = dend2,
"COMPLETE" = dend1)
corre <- round(cor.dendlist(dend_list), 2)
rownames(corre) <- toupper(rownames(corre))
colnames(corre) <- toupper(colnames(corre))
corre
## WARD.D2 CENTROID SINGLE MEDIAN WARD.D AVERAGE COMPLETE
## WARD.D2 1.00 0.26 0.25 0.27 0.68 0.40 0.58
## CENTROID 0.26 1.00 0.95 0.67 0.15 0.89 0.55
## SINGLE 0.25 0.95 1.00 0.59 0.13 0.87 0.54
## MEDIAN 0.27 0.67 0.59 1.00 0.24 0.65 0.41
## WARD.D 0.68 0.15 0.13 0.24 1.00 0.29 0.59
## AVERAGE 0.40 0.89 0.87 0.65 0.29 1.00 0.60
## COMPLETE 0.58 0.55 0.54 0.41 0.59 0.60 1.00