options(warn=-1)
council.mth.data <- read.csv("~/Desktop/Project 4/Data/council.mth.data.csv")

#extract names of the councils 
council.list<-numeric()
c<-seq(1,6325, by=204)
for (i in 1:32){
  council.list[i]<-lapply(council.mth.data[c[i],3], as.character)[[1]][1] 
}

#create dataframe of 2002-2007 including applications, assessments, population, assess/app ratio by council, then scale
d<-seq(69,6393,by=204)
council.mth.data[2456,5]<-round(mean(council.mth.data[2449:2457, 5],na.rm=TRUE), 0)

application.2002.2007<-numeric()
for (i in 1:32){
  application.2002.2007[i]<-sum(council.mth.data[c[i]:d[i], 4])
}

assess.2002.2007<-numeric()
for (i in 1:32){
  assess.2002.2007[i]<-sum(council.mth.data[c[i]:d[i], 5])
}

pop.2002.2007<-numeric()
for (i in 1:32){
  pop.2002.2007[i]<-sum(council.mth.data[c[i]:d[i], 6])
}

year2002.2007<-data.frame("applications" = application.2002.2007, "assessments" = assess.2002.2007, "ratio"=assess.2002.2007/application.2002.2007*100, "pop"=pop.2002.2007)
row.names(year2002.2007)<-council.list
year2002.2007<-scale(year2002.2007)

install.packages('tidyverse',repos = "http://cran.us.r-project.org")
## 
##   There is a binary version available but the source version is
##   later:
##           binary source needs_compilation
## tidyverse  1.1.1  1.2.1             FALSE
## installing the source package 'tidyverse'
install.packages('cluster',repos = "http://cran.us.r-project.org")
## 
##   There is a binary version available but the source version is
##   later:
##         binary source needs_compilation
## cluster  2.0.6  2.1.0              TRUE
## installing the source package 'cluster'
install.packages('factoextra',repos = "http://cran.us.r-project.org")
## also installing the dependencies 'FactoMineR', 'ggpubr'
## 
##   There are binary versions available but the source versions are
##   later:
##            binary source needs_compilation
## FactoMineR   1.39   1.42             FALSE
## ggpubr      0.1.6  0.2.2             FALSE
## 
## 
## The downloaded binary packages are in
##  /var/folders/v9/1mr42x3x73q6glsf9v8qh7180000gn/T//Rtmpo6HnsN/downloaded_packages
## installing the source packages 'FactoMineR', 'ggpubr'
install.packages('dendextend',repos = "http://cran.us.r-project.org")
## 
##   There is a binary version available but the source version is
##   later:
##            binary source needs_compilation
## dendextend  1.6.0 1.12.0             FALSE
## installing the source package 'dendextend'
library(tidyverse)  # data manipulation
## -- Attaching packages -------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1     <U+221A> purrr   0.3.2
## <U+221A> tibble  2.1.3     <U+221A> dplyr   0.8.3
## <U+221A> tidyr   0.8.3     <U+221A> stringr 1.2.0
## <U+221A> readr   1.1.1     <U+221A> forcats 0.3.0
## -- Conflicts ----------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(cluster)    # clustering algorithms
library(factoextra) # clustering visualization
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(dendextend) # for comparing two dendrograms
## 
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
#hierarchical clustering 
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")

# function to compute coefficient
ac <- function(x) {
  agnes(year2002.2007, method = x)$ac
}

map_dbl(m, ac)
##   average    single  complete      ward 
## 0.8735937 0.8320100 0.8808216 0.8928262
#plot dendrogram
hc1 <- agnes(year2002.2007, method = "ward")
pltree(hc1, cex = 0.6, hang = -1, main = "Dendrogram year 2002-2007")
rect.hclust(hc1, k = 4, border = 2:5)

#........................................................................................

#create dataframe of 2008-2012 including applications, assessments, population, app/assess ratio by council, then scale
g<-seq(70,6394, by=204)
h<-seq(129,6453,by=204)

application.2008.2012<-numeric()
for (i in 1:32){
  application.2008.2012[i]<-sum(council.mth.data[g[i]:h[i], 4])
}

assess.2008.2012<-numeric()
for (i in 1:32){
  assess.2008.2012[i]<-sum(council.mth.data[g[i]:h[i], 5])
}

pop.2008.2012<-numeric()
for (i in 1:32){
  pop.2008.2012[i]<-sum(council.mth.data[g[i]:h[i], 6])
}

year2008.2012<-data.frame("applications" = application.2008.2012, "assessments" = assess.2008.2012, "ratio"=assess.2008.2012/application.2008.2012*100, "pop"=pop.2008.2012)
row.names(year2008.2012)<-council.list
year2008.2012<-scale(year2008.2012)

#hierarchical clustering 
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")

# function to compute coefficient
ac <- function(x) {
  agnes(year2008.2012, method = x)$ac
}

map_dbl(m, ac)
##   average    single  complete      ward 
## 0.8502960 0.8499669 0.8753117 0.9010640
#plot dendrogram
hc2 <- agnes(year2008.2012, method = "ward")
pltree(hc2, cex = 0.6, hang = -1, main = "Dendrogram year 2008-2012")
rect.hclust(hc2, k = 4, border = 2:5)

#........................................................................................

#create dataframe of 2013-2018 including applications, assessments, population, app/assess ratio by council, then scale
v<-seq(130,6454,by=204)
w<-seq(201,6525,by=204)

application.2013.2018<-numeric()
for (i in 1:32){
  application.2013.2018[i]<-sum(council.mth.data[v[i]:w[i], 4])
}

assess.2013.2018<-numeric()
for (i in 1:32){
  assess.2013.2018[i]<-sum(council.mth.data[v[i]:w[i], 5])
}

pop.2013.2018<-numeric()
for (i in 1:32){
  pop.2013.2018[i]<-sum(council.mth.data[v[i]:w[i], 6])
}

year2013.2018<-data.frame("applications" = application.2013.2018, "assessments" = assess.2013.2018, "ratio"=assess.2013.2018/application.2013.2018*100, "pop"=pop.2013.2018)
row.names(year2013.2018)<-council.list
year2013.2018<-scale(year2013.2018)

#hierarchical clustering 
m <- c( "average", "single", "complete", "ward")
names(m) <- c( "average", "single", "complete", "ward")

# function to compute coefficient
ac <- function(x) {
  agnes(year2013.2018, method = x)$ac
}

map_dbl(m, ac)
##   average    single  complete      ward 
## 0.8334566 0.8151024 0.8755650 0.9086807
#plot dendrogram
hc3 <- agnes(year2013.2018, method = "ward")
pltree(hc3, cex = 0.6, hang = -1, main = "Dendrogram year 2013-2018")
rect.hclust(hc3, k = 4, border = 2:5)