Load all the libraries or functions that you will use to for the rest of the assignment. It is helpful to define your libraries and functions at the top of a report, so that others can know what they need for the report to compile correctly.
##r chunk
library(cluster)
library(pvclust)
library(reticulate)
py_config()
## python: /usr/bin/python3
## libpython: /usr/lib/python3.8/config-3.8-x86_64-linux-gnu/libpython3.8.so
## pythonhome: //usr://usr
## version: 3.8.5 (default, Jul 28 2020, 12:59:40) [GCC 9.3.0]
## numpy: /usr/lib/python3/dist-packages/numpy
## numpy_version: 1.17.4
The data for this assignment is tweets from US governors at the beginning of the pandemic. The dataset for this assignment includes basic info about each governor (state, name, party) and whether or not they used 1000 specific words in their tweets about COVID. Columns 4 to 1004 represent this words and are coded 0 (was not used in tweets) or 1 (was used in tweets). The goal of the current assignment is to explore the clusters of terms used in governors’ tweets about COVID.
t(), as the clustering variables should be rows in the dataframe.##r chunk
GovTweets <- read.csv("Gov Tweets.csv")
row.names(GovTweets) = GovTweets$State
GovTweets <- GovTweets[, -c(1,2,3)]
df = GovTweets[,c(7:57)]
df <- t(df)
df
## GA CA TX AR TN ND KY NH AK HI UT ME IN KS WI MD VT NM RI NJ NE FL
## industry 1 1 0 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0
## sector 1 1 0 1 0 1 0 1 1 1 0 0 0 1 0 0 0 0 0 1 1 0
## affect 0 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 1 0 0
## crisis 0 0 0 1 1 1 0 1 0 1 0 0 1 1 1 1 1 1 1 1 0 0
## important 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 1 1 0
## continue 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## support 1 0 1 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1 1 1
## protect 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
## people 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
## threat 0 0 1 1 1 0 0 0 0 1 0 0 0 0 0 1 0 1 0 1 1 0
## longer 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 1 0 0
## imminent 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## urllink 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## live 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## governor 0 0 1 1 1 1 1 1 0 1 1 0 0 1 1 1 0 1 0 1 1 0
## provide 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
## update 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1
## media 0 0 0 1 1 1 1 0 0 0 1 0 0 1 1 1 0 0 1 0 1 0
## hold 0 0 1 1 0 1 0 1 1 0 0 1 1 1 0 1 1 1 0 1 1 0
## news 0 0 0 1 1 1 1 0 1 1 1 0 0 1 1 1 1 1 1 1 1 1
## conference 0 0 0 1 0 1 1 1 0 1 0 1 1 1 0 1 1 1 0 1 1 1
## X1.30 0 0 0 1 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 1 0 0
## pm 0 0 0 1 1 0 1 1 1 1 0 0 0 0 1 0 0 1 1 1 1 1
## today 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## response 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## watch 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
## faith 0 0 0 1 1 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0
## community 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
## effort 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 1
## fight 1 1 0 1 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 1 1
## in.person 0 0 1 1 0 0 1 0 0 0 0 0 0 0 1 0 1 1 0 0 0 0
## fellowship 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## church 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## meet 1 1 1 1 1 1 0 1 0 1 0 1 0 0 0 1 1 1 0 1 1 0
## remotely 0 0 0 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0
## present 0 0 0 1 0 1 1 0 0 0 0 1 0 0 1 0 0 1 0 0 0 0
## guidance 1 0 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 0
## give 0 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 0 1 1 0
## house 0 1 1 1 1 1 1 1 0 0 1 1 0 0 0 1 1 1 0 1 0 0
## worship 0 0 1 1 1 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 1 0
## option 0 0 1 1 1 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 0 0
## congregation 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## thank 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1
## X.walmart 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
## quest 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## diagnostic 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## opening 0 0 1 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 1
## drive.thru 0 0 0 1 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 1 1 1
## test 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1
## site 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 1 1 1 0 1 0 1
## central 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## ID CT MO MI MN OH WY NV MT IA OK CO IL PA MS NY AL VA SD LA NC DE
## industry 0 1 1 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 1
## sector 0 0 1 0 1 1 0 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0
## affect 0 1 1 1 0 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1 0 1
## crisis 0 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1
## important 1 1 1 1 1 1 1 1 1 0 1 1 0 1 0 1 1 1 1 1 1 1
## continue 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## support 0 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 1 1 1
## protect 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1
## people 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1 1 1 0
## threat 0 0 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 0 0 1 0 1
## longer 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## imminent 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## urllink 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## live 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## governor 0 1 1 0 0 1 1 1 1 0 1 0 0 1 1 1 1 1 0 1 0 0
## provide 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## update 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## media 0 1 1 0 0 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0 0
## hold 0 1 1 1 1 1 1 1 0 1 0 1 0 0 0 0 1 1 1 1 0 0
## news 1 1 1 1 1 1 1 1 1 0 1 0 0 1 1 0 1 1 1 1 1 1
## conference 1 1 1 0 0 1 1 1 0 1 1 1 0 1 1 0 1 1 1 1 0 0
## X1.30 1 0 0 0 0 0 0 1 0 1 1 1 0 0 1 0 0 0 1 0 0 0
## pm 1 1 1 1 0 1 1 1 0 0 1 1 0 0 1 0 1 1 1 1 1 1
## today 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## response 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1
## watch 1 1 1 0 0 1 1 1 0 0 1 1 0 0 1 1 1 1 1 1 1 1
## faith 0 0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0
## community 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## effort 1 1 1 0 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 1 1
## fight 0 1 1 1 1 1 0 1 1 1 1 1 0 1 1 0 1 1 1 1 1 1
## in.person 0 1 0 0 0 1 0 0 0 0 0 1 0 1 0 1 0 1 0 0 0 0
## fellowship 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
## church 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
## meet 1 1 1 1 0 1 0 0 1 0 1 0 0 1 1 1 0 1 0 1 0 0
## remotely 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## present 0 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## guidance 1 1 1 0 1 1 1 1 0 0 1 1 0 1 0 1 0 1 0 1 0 1
## give 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1
## house 0 1 1 0 1 1 0 1 1 0 1 1 0 1 1 1 0 1 0 1 0 0
## worship 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0
## option 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0
## congregation 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
## thank 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## X.walmart 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0
## quest 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## diagnostic 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0
## opening 0 1 0 0 0 1 0 1 0 1 0 0 0 0 0 1 1 0 0 1 1 0
## drive.thru 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0
## test 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## site 1 1 1 1 1 0 0 1 0 0 1 1 0 0 0 1 1 0 0 0 1 1
## central 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0
## SC WV OR AZ MA
## industry 0 0 0 1 0
## sector 0 0 0 1 0
## affect 0 1 1 1 0
## crisis 1 1 1 1 1
## important 0 1 1 1 1
## continue 1 1 1 1 1
## support 1 1 1 1 1
## protect 0 1 1 1 1
## people 0 1 1 1 1
## threat 0 1 0 1 0
## longer 0 0 0 0 0
## imminent 0 0 0 0 0
## urllink 1 1 1 1 1
## live 1 1 1 1 1
## governor 1 1 0 1 0
## provide 1 1 1 1 1
## update 1 1 1 1 1
## media 1 1 0 0 0
## hold 0 1 1 1 0
## news 1 1 0 1 1
## conference 0 1 1 1 0
## X1.30 0 0 0 0 0
## pm 0 0 0 1 1
## today 0 1 1 1 1
## response 1 1 1 1 1
## watch 0 1 1 1 1
## faith 0 0 1 1 0
## community 1 1 1 1 1
## effort 1 1 1 1 1
## fight 1 1 1 1 1
## in.person 0 0 0 1 1
## fellowship 0 0 0 0 0
## church 0 0 0 0 0
## meet 0 0 0 1 1
## remotely 0 0 1 0 1
## present 0 0 0 0 0
## guidance 0 1 1 1 1
## give 0 0 0 1 1
## house 0 1 1 1 1
## worship 0 0 0 0 0
## option 0 0 0 1 1
## congregation 0 0 0 0 0
## thank 1 1 1 1 1
## X.walmart 0 0 0 0 0
## quest 0 0 0 1 1
## diagnostic 0 0 0 1 0
## opening 0 0 0 1 0
## drive.thru 0 0 0 0 1
## test 0 1 1 1 1
## site 1 0 1 1 1
## central 0 1 0 1 0
While the data set includes popular distance measures, we still need to figure out how these distance measures are related to each other. Create distance measures in Euclidean or Manhattan distance.
##r chunk
EuclideanDistance = dist(df, method="euclidean")
EuclideanDistance
## industry sector affect crisis important continue support
## sector 3.464102
## affect 4.472136 5.099020
## crisis 4.898979 5.099020 3.464102
## important 4.795832 5.196152 3.872983 3.605551
## continue 5.567764 5.567764 4.123106 3.605551 2.828427
## support 5.196152 5.385165 3.316625 3.000000 3.162278 2.449490
## protect 5.291503 5.291503 4.000000 3.741657 2.236068 1.732051 3.000000
## people 5.291503 5.099020 4.000000 3.741657 2.236068 2.236068 3.000000
## threat 4.358899 4.358899 4.123106 4.358899 4.690416 5.291503 4.690416
## longer 3.605551 3.872983 5.196152 5.385165 5.830952 6.480741 6.000000
## imminent 4.242641 4.472136 5.291503 5.656854 6.082763 6.708204 6.244998
## urllink 5.567764 5.567764 4.123106 3.605551 2.828427 0.000000 2.449490
## live 5.477226 5.477226 4.000000 3.464102 2.645751 1.000000 2.236068
## governor 4.898979 4.690416 3.464102 4.000000 4.123106 4.358899 3.872983
## provide 5.477226 5.477226 3.741657 3.464102 2.645751 1.732051 2.236068
## update 5.291503 5.291503 4.000000 3.162278 3.000000 1.732051 2.236068
## media 4.690416 4.898979 4.690416 4.690416 5.000000 5.385165 5.196152
## hold 4.795832 4.358899 3.605551 4.123106 4.000000 4.472136 4.242641
## news 5.291503 5.291503 4.472136 3.464102 3.872983 3.316625 3.605551
## conference 5.099020 4.690416 3.464102 4.000000 4.123106 4.123106 3.872983
## X1.30 4.582576 4.123106 5.000000 5.196152 5.656854 6.000000 5.830952
## pm 4.690416 4.690416 4.472136 4.000000 4.582576 4.358899 4.358899
## today 5.567764 5.567764 3.872983 3.605551 2.828427 1.414214 2.828427
## response 5.567764 5.567764 3.872983 3.316625 3.162278 2.000000 2.449490
## watch 5.291503 5.099020 3.464102 3.464102 3.316625 3.316625 3.605551
## faith 3.872983 3.872983 4.582576 5.196152 5.291503 6.000000 5.477226
## community 5.477226 5.477226 4.000000 3.741657 2.645751 1.000000 2.645751
## effort 5.656854 5.656854 3.741657 3.741657 3.316625 2.645751 3.000000
## fight 4.795832 5.000000 4.123106 3.605551 3.162278 2.828427 2.828427
## in.person 4.472136 4.690416 4.898979 5.099020 5.196152 5.916080 5.385165
## fellowship 4.123106 4.358899 5.385165 5.744563 6.164414 6.782330 6.324555
## church 4.242641 4.000000 5.477226 5.830952 5.916080 6.557439 6.082763
## meet 4.242641 4.472136 4.242641 4.690416 4.123106 4.582576 4.358899
## remotely 4.472136 4.242641 5.477226 5.656854 5.916080 6.557439 6.244998
## present 4.123106 4.123106 5.000000 5.567764 5.656854 6.324555 5.830952
## guidance 4.582576 4.582576 3.605551 3.605551 2.828427 3.741657 3.464102
## give 4.472136 4.898979 3.741657 3.741657 3.316625 3.605551 3.605551
## house 4.472136 4.690416 4.000000 4.242641 3.605551 4.358899 4.123106
## worship 4.242641 4.472136 4.898979 5.656854 5.744563 6.244998 5.744563
## option 4.000000 4.690416 4.898979 5.477226 5.385165 6.082763 5.567764
## congregation 4.242641 4.000000 5.830952 5.830952 6.082763 6.708204 6.244998
## thank 5.567764 5.567764 3.872983 3.605551 2.828427 1.414214 2.449490
## X.walmart 4.358899 4.123106 5.567764 5.916080 6.164414 6.782330 6.324555
## quest 4.000000 4.000000 5.477226 5.656854 6.082763 6.708204 6.244998
## diagnostic 3.872983 3.872983 5.196152 5.567764 6.000000 6.633250 6.164414
## opening 4.358899 4.795832 4.358899 5.196152 5.291503 5.656854 5.099020
## drive.thru 4.123106 4.123106 5.196152 5.385165 5.656854 6.164414 5.656854
## test 5.291503 5.477226 3.741657 3.464102 2.645751 1.732051 2.645751
## site 4.690416 4.690416 4.472136 4.242641 4.123106 4.123106 4.123106
## central 3.872983 4.123106 5.000000 5.385165 5.830952 6.480741 6.000000
## protect people threat longer imminent urllink live
## sector
## affect
## crisis
## important
## continue
## support
## protect
## people 1.414214
## threat 5.000000 5.000000
## longer 6.244998 6.082763 4.000000
## imminent 6.480741 6.324555 4.123106 3.000000
## urllink 1.732051 2.236068 5.291503 6.480741 6.708204
## live 2.000000 2.000000 5.196152 6.403124 6.633250 1.000000
## governor 4.242641 4.000000 3.872983 5.196152 5.099020 4.358899 4.242641
## provide 2.000000 2.000000 5.000000 6.244998 6.480741 1.732051 1.414214
## update 2.449490 2.449490 5.000000 6.244998 6.480741 1.732051 1.414214
## media 5.477226 5.291503 4.582576 4.582576 4.000000 5.385165 5.291503
## hold 4.358899 4.123106 4.242641 4.690416 5.196152 4.472136 4.358899
## news 3.464102 3.464102 4.358899 5.744563 5.830952 3.316625 3.162278
## conference 4.000000 3.741657 4.582576 5.385165 5.477226 4.123106 4.000000
## X1.30 5.916080 5.744563 5.099020 3.741657 3.872983 6.000000 5.916080
## pm 4.242641 4.242641 4.358899 5.196152 5.099020 4.358899 4.242641
## today 1.732051 2.236068 5.099020 6.324555 6.557439 1.414214 1.732051
## response 2.645751 2.645751 4.898979 6.164414 6.403124 2.000000 1.732051
## watch 2.828427 2.828427 5.000000 5.916080 5.830952 3.316625 3.162278
## faith 5.744563 5.567764 4.000000 4.000000 3.000000 6.000000 5.916080
## community 1.414214 2.000000 5.196152 6.403124 6.633250 1.000000 1.414214
## effort 2.828427 2.828427 5.385165 6.244998 6.164414 2.645751 2.449490
## fight 3.000000 3.000000 4.898979 5.830952 6.244998 2.828427 2.645751
## in.person 5.656854 5.477226 4.795832 4.123106 4.000000 5.916080 5.830952
## fellowship 6.557439 6.403124 4.242641 2.828427 1.732051 6.782330 6.708204
## church 6.324555 6.164414 4.123106 3.000000 2.449490 6.557439 6.480741
## meet 4.242641 4.000000 4.358899 4.795832 5.291503 4.582576 4.472136
## remotely 6.324555 6.164414 4.795832 3.000000 2.828427 6.557439 6.480741
## present 6.082763 5.916080 4.472136 3.162278 3.316625 6.324555 6.244998
## guidance 3.316625 3.316625 4.242641 5.477226 5.744563 3.741657 3.605551
## give 3.464102 3.464102 4.358899 5.385165 5.656854 3.605551 3.464102
## house 4.000000 3.741657 4.582576 5.000000 5.477226 4.358899 4.242641
## worship 6.000000 5.830952 3.872983 3.605551 3.162278 6.244998 6.164414
## option 5.830952 5.656854 4.358899 3.872983 3.162278 6.082763 6.000000
## congregation 6.480741 6.324555 4.123106 2.645751 2.449490 6.708204 6.633250
## thank 1.732051 2.236068 5.099020 6.324555 6.557439 1.414214 1.732051
## X.walmart 6.557439 6.403124 4.242641 2.828427 1.732051 6.782330 6.708204
## quest 6.480741 6.324555 4.358899 2.645751 2.449490 6.708204 6.633250
## diagnostic 6.403124 6.244998 4.472136 2.828427 2.645751 6.633250 6.557439
## opening 5.567764 5.385165 4.242641 4.000000 3.872983 5.656854 5.567764
## drive.thru 5.916080 5.744563 4.242641 3.162278 3.316625 6.164414 6.082763
## test 2.000000 2.000000 5.196152 6.244998 6.480741 1.732051 1.414214
## site 4.000000 4.000000 4.795832 5.385165 5.477226 4.123106 4.000000
## central 6.244998 6.082763 3.741657 2.828427 3.000000 6.480741 6.403124
## governor provide update media hold news conference
## sector
## affect
## crisis
## important
## continue
## support
## protect
## people
## threat
## longer
## imminent
## urllink
## live
## governor
## provide 4.000000
## update 4.242641 2.000000
## media 3.741657 5.291503 5.291503
## hold 4.582576 4.123106 4.358899 4.582576
## news 3.741657 3.162278 3.162278 4.242641 4.795832
## conference 4.000000 3.741657 4.000000 4.690416 3.316625 4.242641
## X1.30 5.196152 5.744563 5.744563 5.000000 4.898979 5.744563 4.582576
## pm 4.472136 4.242641 4.000000 4.690416 4.795832 3.464102 4.242641
## today 4.358899 2.236068 2.236068 5.385165 4.242641 3.316625 3.872983
## response 4.123106 1.732051 2.236068 5.000000 4.000000 2.645751 3.605551
## watch 4.000000 2.828427 3.464102 4.690416 4.123106 3.464102 3.162278
## faith 4.358899 5.744563 5.744563 3.872983 4.690416 5.196152 4.582576
## community 4.242641 1.414214 2.000000 5.477226 4.358899 3.464102 4.000000
## effort 4.000000 2.000000 2.828427 5.099020 4.358899 3.741657 3.741657
## fight 4.123106 2.645751 3.000000 5.000000 4.472136 3.605551 3.872983
## in.person 4.690416 5.656854 5.656854 4.690416 5.000000 5.477226 5.099020
## fellowship 5.196152 6.557439 6.557439 4.123106 5.291503 5.916080 5.567764
## church 5.099020 6.324555 6.324555 4.242641 4.795832 5.830952 5.477226
## meet 4.000000 4.472136 4.472136 5.291503 4.795832 4.690416 4.472136
## remotely 5.291503 6.324555 6.480741 4.242641 5.196152 5.830952 5.477226
## present 5.000000 6.082763 6.244998 4.358899 4.898979 5.567764 5.196152
## guidance 3.605551 3.316625 3.605551 4.795832 4.242641 4.123106 3.872983
## give 4.000000 3.464102 3.162278 4.898979 3.872983 3.464102 4.242641
## house 3.741657 4.242641 4.472136 5.099020 4.358899 4.690416 4.242641
## worship 4.690416 6.000000 6.000000 4.472136 5.196152 5.656854 5.099020
## option 5.099020 5.830952 6.000000 4.472136 5.000000 5.656854 5.291503
## congregation 5.291503 6.480741 6.480741 4.472136 5.196152 5.830952 5.477226
## thank 4.123106 1.000000 2.236068 5.385165 4.242641 3.316625 3.872983
## X.walmart 5.196152 6.557439 6.557439 4.123106 5.099020 5.916080 5.385165
## quest 5.291503 6.480741 6.480741 4.472136 5.196152 5.830952 5.477226
## diagnostic 5.000000 6.403124 6.403124 4.358899 5.099020 5.916080 5.385165
## opening 4.358899 5.385165 5.385165 4.358899 4.690416 5.196152 4.795832
## drive.thru 5.000000 5.916080 5.916080 4.582576 4.898979 5.567764 4.795832
## test 4.472136 2.000000 2.000000 5.477226 4.358899 3.464102 4.000000
## site 4.898979 4.000000 3.741657 5.477226 5.000000 4.242641 5.099020
## central 5.000000 6.244998 6.244998 4.582576 4.898979 5.567764 5.196152
## X1.30 pm today response watch faith community
## sector
## affect
## crisis
## important
## continue
## support
## protect
## people
## threat
## longer
## imminent
## urllink
## live
## governor
## provide
## update
## media
## hold
## news
## conference
## X1.30
## pm 4.795832
## today 5.830952 4.123106
## response 5.656854 3.872983 2.000000
## watch 5.196152 3.464102 3.000000 3.000000
## faith 4.242641 4.582576 5.830952 5.656854 5.000000
## community 5.916080 4.472136 1.732051 2.236068 3.162278 5.916080
## effort 5.567764 4.472136 3.000000 2.645751 2.828427 5.744563 2.449490
## fight 5.477226 4.358899 3.162278 2.828427 3.605551 5.477226 2.645751
## in.person 4.582576 4.898979 5.744563 5.744563 5.291503 4.358899 5.830952
## fellowship 3.741657 5.196152 6.633250 6.480741 5.916080 3.464102 6.708204
## church 4.123106 5.291503 6.403124 6.244998 6.000000 3.316625 6.480741
## meet 5.385165 4.690416 4.582576 4.795832 4.690416 5.000000 4.472136
## remotely 3.872983 5.477226 6.403124 6.244998 5.656854 3.605551 6.480741
## present 4.000000 5.196152 6.164414 6.000000 5.567764 4.242641 6.244998
## guidance 5.291503 4.582576 3.741657 3.741657 3.316625 4.898979 3.605551
## give 5.196152 4.000000 3.316625 3.316625 4.000000 5.000000 3.464102
## house 5.385165 5.099020 4.123106 4.358899 4.242641 5.000000 4.242641
## worship 4.123106 5.099020 6.082763 5.916080 5.477226 3.605551 6.164414
## option 4.582576 4.898979 5.916080 5.744563 5.291503 3.605551 6.000000
## congregation 3.872983 5.477226 6.557439 6.403124 6.164414 3.605551 6.633250
## thank 5.830952 4.358899 2.000000 2.000000 3.000000 5.830952 1.000000
## X.walmart 3.741657 5.196152 6.633250 6.480741 5.916080 3.162278 6.708204
## quest 3.872983 5.099020 6.557439 6.403124 5.830952 3.605551 6.633250
## diagnostic 4.000000 5.196152 6.480741 6.480741 5.744563 3.464102 6.557439
## opening 4.690416 4.795832 5.477226 5.477226 5.000000 4.242641 5.567764
## drive.thru 3.741657 4.795832 6.000000 5.830952 5.196152 3.464102 6.082763
## test 5.744563 4.242641 1.732051 2.236068 3.162278 5.916080 2.000000
## site 5.385165 4.472136 4.358899 4.358899 4.242641 5.000000 4.000000
## central 3.741657 5.000000 6.324555 6.164414 5.744563 3.464102 6.403124
## effort fight in.person fellowship church meet remotely
## sector
## affect
## crisis
## important
## continue
## support
## protect
## people
## threat
## longer
## imminent
## urllink
## live
## governor
## provide
## update
## media
## hold
## news
## conference
## X1.30
## pm
## today
## response
## watch
## faith
## community
## effort
## fight 3.316625
## in.person 5.291503 5.744563
## fellowship 6.244998 6.164414 3.872983
## church 6.324555 6.082763 3.741657 2.645751
## meet 4.690416 4.582576 4.472136 5.000000 5.099020
## remotely 6.000000 5.916080 3.741657 2.645751 3.162278 5.291503
## present 5.916080 5.656854 3.605551 3.162278 3.316625 5.196152 3.316625
## guidance 3.605551 4.000000 4.582576 5.656854 5.567764 4.123106 5.385165
## give 4.000000 3.605551 5.099020 5.744563 5.477226 4.242641 5.830952
## house 4.242641 4.123106 4.242641 5.196152 5.291503 3.464102 5.099020
## worship 6.000000 5.744563 4.000000 3.000000 3.162278 4.690416 3.741657
## option 5.477226 5.567764 3.741657 3.316625 3.464102 4.898979 3.464102
## congregation 6.480741 6.082763 3.741657 2.236068 2.000000 5.099020 2.828427
## thank 2.236068 2.828427 5.744563 6.633250 6.403124 4.582576 6.403124
## X.walmart 6.403124 6.164414 3.872983 2.000000 1.732051 5.196152 2.645751
## quest 6.164414 6.082763 3.162278 2.236068 2.828427 4.898979 2.000000
## diagnostic 6.082763 6.164414 3.000000 2.449490 2.645751 4.795832 2.645751
## opening 5.000000 5.291503 4.123106 3.741657 4.123106 4.795832 4.358899
## drive.thru 5.916080 5.477226 4.582576 3.464102 3.316625 4.795832 3.605551
## test 2.449490 3.000000 5.656854 6.557439 6.480741 4.472136 6.324555
## site 4.242641 4.358899 4.898979 5.567764 5.477226 4.690416 5.477226
## central 6.244998 5.830952 4.123106 2.449490 3.316625 4.795832 3.316625
## present guidance give house worship option congregation
## sector
## affect
## crisis
## important
## continue
## support
## protect
## people
## threat
## longer
## imminent
## urllink
## live
## governor
## provide
## update
## media
## hold
## news
## conference
## X1.30
## pm
## today
## response
## watch
## faith
## community
## effort
## fight
## in.person
## fellowship
## church
## meet
## remotely
## present
## guidance 5.291503
## give 5.385165 4.123106
## house 4.795832 3.605551 4.242641
## worship 3.605551 5.000000 5.291503 5.099020
## option 3.605551 5.385165 5.099020 4.898979 3.464102
## congregation 3.000000 5.567764 5.656854 5.291503 3.162278 3.741657
## thank 6.164414 3.464102 3.605551 4.358899 6.082763 5.916080 6.557439
## X.walmart 3.162278 5.830952 5.744563 5.567764 3.000000 3.316625 1.732051
## quest 3.000000 5.567764 5.656854 5.099020 3.464102 2.828427 2.449490
## diagnostic 3.162278 5.477226 5.744563 5.000000 3.605551 3.316625 2.645751
## opening 4.000000 5.099020 4.795832 4.582576 4.123106 3.605551 4.358899
## drive.thru 4.242641 5.291503 5.196152 5.196152 3.316625 3.605551 3.316625
## test 6.082763 3.605551 3.464102 4.000000 6.164414 5.830952 6.633250
## site 5.196152 4.123106 4.472136 4.690416 5.099020 5.099020 5.656854
## central 3.741657 5.477226 5.567764 5.000000 3.605551 3.872983 3.000000
## thank X.walmart quest diagnostic opening drive.thru
## sector
## affect
## crisis
## important
## continue
## support
## protect
## people
## threat
## longer
## imminent
## urllink
## live
## governor
## provide
## update
## media
## hold
## news
## conference
## X1.30
## pm
## today
## response
## watch
## faith
## community
## effort
## fight
## in.person
## fellowship
## church
## meet
## remotely
## present
## guidance
## give
## house
## worship
## option
## congregation
## thank
## X.walmart 6.633250
## quest 6.557439 2.236068
## diagnostic 6.480741 2.449490 1.732051
## opening 5.477226 4.000000 3.872983 3.741657
## drive.thru 6.000000 2.828427 3.316625 3.741657 4.242641
## test 2.236068 6.708204 6.480741 6.403124 5.385165 6.082763
## site 4.123106 5.567764 5.477226 5.567764 4.582576 5.196152
## central 6.324555 2.828427 2.645751 2.828427 4.000000 3.464102
## test site
## sector
## affect
## crisis
## important
## continue
## support
## protect
## people
## threat
## longer
## imminent
## urllink
## live
## governor
## provide
## update
## media
## hold
## news
## conference
## X1.30
## pm
## today
## response
## watch
## faith
## community
## effort
## fight
## in.person
## fellowship
## church
## meet
## remotely
## present
## guidance
## give
## house
## worship
## option
## congregation
## thank
## X.walmart
## quest
## diagnostic
## opening
## drive.thru
## test
## site 4.000000
## central 6.244998 5.385165
Note: If one variable is alone in a cluster, replace it with another word and: - Rerun the distance and cluster measures. - Create a new plot of the cluster analysis (the branches may be hard to see but they are clearly separating out more).
##r chunk
Euclidean.hc = hclust(EuclideanDistance, method = "ward.D2")
plot(Euclidean.hc, hang = -1)
sapply calculate the average silhouette distances for 2 to n-1 clusters on only the second cluster analysis.##r chunk
sapply(2:(nrow(df)-1), function(x) summary(silhouette(cutree(Euclidean.hc, k = x),EuclideanDistance))$avg.width)
## [1] 0.35498095 0.24293286 0.19612477 0.19736143 0.18220222 0.18961804
## [7] 0.17387179 0.17314486 0.17495210 0.17603970 0.17699510 0.17684124
## [13] 0.17651502 0.16558067 0.16289123 0.16176213 0.13847748 0.08044988
## [19] 0.08256904 0.08395642 0.08533010 0.08613273 0.08273655 0.08038364
## [25] 0.08008986 0.07594975 0.08085534 0.08641132 0.08811044 0.07717495
## [31] 0.07732348 0.07674133 0.07885757 0.07714424 0.07243152 0.07161714
## [37] 0.07381671 0.07261412 0.07694170 0.07684419 0.06756536 0.05866135
## [43] 0.05087750 0.05017757 0.04755062 0.03832563 0.03109385 0.04495869
## [49] 0.03921569
Replot the dendogram with cluster markers based on the highest silhouette value.
Interpret the results - what topics do these clusters seem to be capturing?
##r chunk
plot(Euclidean.hc, hang = -1)
rect.hclust(Euclidean.hc, k = 2)
Make a snake plot of the results by plotting the 50 states. - If you have more than two clusters, pick one pair you find interesting.
Which states appear to be most heavily tied to each cluster? Are there any interesting differences you see given the top and bottom most distinguishing words?
There is gap in between the states which are closer to cluster-1 and cluster-2 , there are some states who are of equal distance from both the clusters , I am interested in those.
CT , LA , NV , HI , DE , PA , MD ,WY , OR seems tied to cluster-2 and GA , IA , MN , KY , TN , TX , FL , NY , OK seems to be closer to cluster-1
df = df[ , sample(1:ncol(df), 25)]
clustercut = cutree(Euclidean.hc, k = 2)
cluster1 = df[ names(clustercut[clustercut == 1]), ]
cluster2 = df[ names(clustercut[clustercut == 2]), ]
differences = colMeans(cluster1) - colMeans(cluster2)
{plot(sort(differences)*1.2,
1:length(differences),
type = "n",
xlab = "Cluster 2 < -- > Cluster 1",
yaxt = "n", ylab = "")
text(sort(differences),
1:length(differences),
names(sort(differences)))}
pvclust to validate your solution on the dataframe.pv_cluster = pvclust(df, method.hclust = "ward.D2",method.dist = "euclidean")
## Bootstrap (r = 0.49)... Done.
## Bootstrap (r = 0.59)... Done.
## Bootstrap (r = 0.69)... Done.
## Bootstrap (r = 0.78)... Done.
## Bootstrap (r = 0.88)... Done.
## Bootstrap (r = 1.0)... Done.
## Bootstrap (r = 1.1)... Done.
## Bootstrap (r = 1.2)... Done.
## Bootstrap (r = 1.29)... Done.
## Bootstrap (r = 1.39)... Done.
plot(pv_cluster, hang = -1)
cluster_labels <- rownames(GovTweets)
import scipy.cluster.hierarchy as sch
import matplotlib
from sklearn.cluster import AgglomerativeClustering
from matplotlib import pyplot as plt
from sklearn import metrics
from scipy.cluster.hierarchy import fcluster
matplotlib.use('Agg')
max_d = 12
sim_data = r.GovTweets
sim_dist = sch.linkage(sim_data, method = 'ward')
plt.figure()
plt.title("Hierarchical Clustering Dendogram")
plt.xlabel("Causal Variable")
plt.ylabel("Distance")
sch.dendrogram(sim_dist,
leaf_rotation=90., leaf_font_size=8.,
labels = r.cluster_labels)
## {'icoord': [[25.0, 25.0, 35.0, 35.0], [15.0, 15.0, 30.0, 30.0], [5.0, 5.0, 22.5, 22.5], [65.0, 65.0, 75.0, 75.0], [55.0, 55.0, 70.0, 70.0], [45.0, 45.0, 62.5, 62.5], [105.0, 105.0, 115.0, 115.0], [95.0, 95.0, 110.0, 110.0], [125.0, 125.0, 135.0, 135.0], [175.0, 175.0, 185.0, 185.0], [165.0, 165.0, 180.0, 180.0], [155.0, 155.0, 172.5, 172.5], [145.0, 145.0, 163.75, 163.75], [205.0, 205.0, 215.0, 215.0], [195.0, 195.0, 210.0, 210.0], [154.375, 154.375, 202.5, 202.5], [130.0, 130.0, 178.4375, 178.4375], [102.5, 102.5, 154.21875, 154.21875], [85.0, 85.0, 128.359375, 128.359375], [53.75, 53.75, 106.6796875, 106.6796875], [13.75, 13.75, 80.21484375, 80.21484375], [245.0, 245.0, 255.0, 255.0], [235.0, 235.0, 250.0, 250.0], [225.0, 225.0, 242.5, 242.5], [265.0, 265.0, 275.0, 275.0], [285.0, 285.0, 295.0, 295.0], [315.0, 315.0, 325.0, 325.0], [305.0, 305.0, 320.0, 320.0], [345.0, 345.0, 355.0, 355.0], [335.0, 335.0, 350.0, 350.0], [312.5, 312.5, 342.5, 342.5], [290.0, 290.0, 327.5, 327.5], [270.0, 270.0, 308.75, 308.75], [375.0, 375.0, 385.0, 385.0], [395.0, 395.0, 405.0, 405.0], [415.0, 415.0, 425.0, 425.0], [400.0, 400.0, 420.0, 420.0], [380.0, 380.0, 410.0, 410.0], [455.0, 455.0, 465.0, 465.0], [445.0, 445.0, 460.0, 460.0], [475.0, 475.0, 485.0, 485.0], [452.5, 452.5, 480.0, 480.0], [435.0, 435.0, 466.25, 466.25], [395.0, 395.0, 450.625, 450.625], [365.0, 365.0, 422.8125, 422.8125], [289.375, 289.375, 393.90625, 393.90625], [233.75, 233.75, 341.640625, 341.640625], [46.982421875, 46.982421875, 287.6953125, 287.6953125]], 'dcoord': [[0.0, 17.029386365926403, 17.029386365926403, 0.0], [0.0, 17.378147196982766, 17.378147196982766, 17.029386365926403], [0.0, 18.303005217723125, 18.303005217723125, 17.378147196982766], [0.0, 9.746794344808963, 9.746794344808963, 0.0], [0.0, 12.635927614016575, 12.635927614016575, 9.746794344808963], [0.0, 12.935738607954836, 12.935738607954836, 12.635927614016575], [0.0, 15.132745950421556, 15.132745950421556, 0.0], [0.0, 16.299284237863535, 16.299284237863535, 15.132745950421556], [0.0, 16.46207763315433, 16.46207763315433, 0.0], [0.0, 14.45683229480096, 14.45683229480096, 0.0], [0.0, 14.662878298615178, 14.662878298615178, 14.45683229480096], [0.0, 15.149257407543114, 15.149257407543114, 14.662878298615178], [0.0, 15.953056133543816, 15.953056133543816, 15.149257407543114], [0.0, 15.329709716755891, 15.329709716755891, 0.0], [0.0, 16.0312195418814, 16.0312195418814, 15.329709716755891], [15.953056133543816, 16.830032679706836, 16.830032679706836, 16.0312195418814], [16.46207763315433, 17.39396447047078, 17.39396447047078, 16.830032679706836], [16.299284237863535, 17.696008531550774, 17.696008531550774, 17.39396447047078], [0.0, 18.3213080774908, 18.3213080774908, 17.696008531550774], [12.935738607954836, 19.63395180528015, 19.63395180528015, 18.3213080774908], [18.303005217723125, 21.16052165632211, 21.16052165632211, 19.63395180528015], [0.0, 17.08800749063506, 17.08800749063506, 0.0], [0.0, 18.036999011291577, 18.036999011291577, 17.08800749063506], [0.0, 21.740898478827106, 21.740898478827106, 18.036999011291577], [0.0, 18.466185312619388, 18.466185312619388, 0.0], [0.0, 17.46424919657298, 17.46424919657298, 0.0], [0.0, 16.401219466856727, 16.401219466856727, 0.0], [0.0, 17.019596548292988, 17.019596548292988, 16.401219466856727], [0.0, 16.881943016134134, 16.881943016134134, 0.0], [0.0, 17.175564037317667, 17.175564037317667, 16.881943016134134], [17.019596548292988, 17.62573875520305, 17.62573875520305, 17.175564037317667], [17.46424919657298, 18.732769861039415, 18.732769861039415, 17.62573875520305], [18.466185312619388, 19.487175269905073, 19.487175269905073, 18.732769861039415], [0.0, 17.46424919657298, 17.46424919657298, 0.0], [0.0, 17.0, 17.0, 0.0], [0.0, 17.175564037317667, 17.175564037317667, 0.0], [17.0, 17.916472867168913, 17.916472867168913, 17.175564037317667], [17.46424919657298, 18.357559750685823, 18.357559750685823, 17.916472867168913], [0.0, 17.578395831246947, 17.578395831246947, 0.0], [0.0, 17.74823934929885, 17.74823934929885, 17.578395831246947], [0.0, 17.776388834631177, 17.776388834631177, 0.0], [17.74823934929885, 18.395651660107074, 18.395651660107074, 17.776388834631177], [0.0, 18.68867749913478, 18.68867749913478, 18.395651660107074], [18.357559750685823, 19.45079261452687, 19.45079261452687, 18.68867749913478], [0.0, 20.790160394541676, 20.790160394541676, 19.45079261452687], [19.487175269905073, 21.83902799606038, 21.83902799606038, 20.790160394541676], [21.740898478827106, 25.125834681419104, 25.125834681419104, 21.83902799606038], [21.16052165632211, 42.157789154817436, 42.157789154817436, 25.125834681419104]], 'ivl': ['CA', 'NY', 'UT', 'OR', 'IA', 'SC', 'RI', 'IL', 'GA', 'MS', 'FL', 'SD', 'HI', 'WY', 'MT', 'DE', 'CO', 'ME', 'ID', 'VT', 'AK', 'NC', 'AR', 'NJ', 'MO', 'OH', 'ND', 'NH', 'NE', 'WV', 'AL', 'IN', 'KS', 'MI', 'WI', 'MN', 'TX', 'NV', 'LA', 'TN', 'OK', 'KY', 'PA', 'MD', 'NM', 'VA', 'MA', 'CT', 'AZ'], 'leaves': [1, 37, 10, 46, 31, 44, 18, 34, 0, 36, 21, 40, 9, 28, 30, 43, 33, 11, 22, 16, 8, 42, 3, 19, 24, 27, 5, 7, 20, 45, 38, 12, 13, 25, 14, 26, 2, 29, 41, 4, 32, 6, 35, 15, 17, 39, 48, 23, 47], 'color_list': ['g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'g', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'r', 'b']}
plt.show()
for i in range(2, max_d):
sil = metrics.silhouette_score(sim_data, fcluster(sim_dist, i, criterion='maxclust'), metric='euclidean')
print(i, ":", sil)
## 2 : 0.07908269749204867
## 3 : 0.06578709521762185
## 4 : 0.04594118774987207
## 5 : 0.048471531939069695
## 6 : 0.039189103960027005
## 7 : 0.04004797410109079
## 8 : 0.00725201780298204
## 9 : 0.007337152809636061
## 10 : 0.007217797727869243
## 11 : 0.007236429985985678