Assignment 4

library(igraph)

## 
## Attaching package: 'igraph'

## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum

## The following object is masked from 'package:base':
## 
##     union

library(data.table)
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following objects are masked from 'package:data.table':
## 
##     dcast, melt

library(sf)

## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::as_data_frame() masks tibble::as_data_frame(), igraph::as_data_frame()
## x dplyr::between()       masks data.table::between()
## x purrr::compose()       masks igraph::compose()
## x tidyr::crossing()      masks igraph::crossing()
## x dplyr::filter()        masks stats::filter()
## x dplyr::first()         masks data.table::first()
## x dplyr::groups()        masks igraph::groups()
## x dplyr::lag()           masks stats::lag()
## x dplyr::last()          masks data.table::last()
## x purrr::simplify()      masks igraph::simplify()
## x purrr::transpose()     masks data.table::transpose()

library(tmap)

Run_Communities <- function(g,isSimp,algorithmCom){
  cc <- algorithmCom
  f <- as.data.frame(cbind(V(g)$name,cc$membership))
  colnames(f)<- c("names", "community")
  f$type <- i
  f$method <- algorithm(cc)
  f$n_clusters <- length(cc)
  f$modulary <- modularity(cc)
  print(head(f))
  write.table(f, "MOD.csv", row.names = FALSE, col.names = FALSE,  append=TRUE, sep=",")
}

setwd("/Users/chaneumpark/Dropbox (GaTech)/2021-Fall/Urban Analytics/R lab session/Assignment4 CDR")

all2types <- read.csv("all2types.csv") #source=origin, target=destination, weight=# of movements !NOTE: not directed
CountyNames <- read.csv("CountyNames.csv")

OrigNames <- merge(all2types, CountyNames, by.x = 'source', by.y = 'FIPS')
OrigDestNames <- merge(OrigNames, CountyNames, by.x = 'target', by.y = 'FIPS')
all2types <- OrigDestNames  
rm(OrigNames, OrigDestNames)

### Q1 & Q2 ###
filter(all2types, type =="Commutes") %>% filter(weight == max(weight))

##   target source  weight     type                 Name.x                 Name.y
## 1   6037   6037 4181968 Commutes Los Angeles_California Los Angeles_California

filter(all2types, type =="Migrants") %>% filter(weight == max(weight))

##   target source weight     type                 Name.x                 Name.y
## 1   6037   6037 851215 Migrants Los Angeles_California Los Angeles_California

### Q3 & Q4 ###
filter(all2types, source != target, type =="Commutes") %>% filter(weight == max(weight))

##   target source weight     type         Name.x            Name.y
## 1  36061  36047 457281 Commutes Kings_New York New York_New York

filter(all2types, source != target, type =="Migrants") %>% filter(weight == max(weight))

##   target source weight     type                 Name.x            Name.y
## 1   6059   6037  66604 Migrants Los Angeles_California Orange_California

### Q5 ###
if (file.exists("MOD.csv")) {file.remove("MOD.csv")}

## [1] TRUE

types <- unique(all2types$type)

###cluster_louvain
for (i in types){ 
net <- filter(all2types, type==i)
g <- graph_from_data_frame(net, directed = FALSE)
 Run_Communities(g,0,cluster_louvain(g))
}

##   names community     type      method n_clusters  modulary
## 1  1001         1 Migrants multi level         28 0.8003269
## 2  1003         1 Migrants multi level         28 0.8003269
## 3  1005         1 Migrants multi level         28 0.8003269
## 4  1007         1 Migrants multi level         28 0.8003269
## 5  1009         1 Migrants multi level         28 0.8003269
## 6  1011         1 Migrants multi level         28 0.8003269
##   names community     type      method n_clusters  modulary
## 1  1001         1 Commutes multi level         73 0.9541115
## 2  1003         2 Commutes multi level         73 0.9541115
## 3  1005         1 Commutes multi level         73 0.9541115
## 4  1007         1 Commutes multi level         73 0.9541115
## 5  1009         1 Commutes multi level         73 0.9541115
## 6  1011         1 Commutes multi level         73 0.9541115

#cluster_fast_greedy
Run_Communities <- function(g,isSimp,algorithmCom){
  cc <- algorithmCom
  f <- as.data.frame(cbind(V(g)$name,cc$membership))
  colnames(f)<- c("names", "community")
  f$type <- i
  f$method <- algorithm(cc)
  f$n_clusters <- length(cc)
  f$modulary <- modularity(cc)
  print(head(f))
  write.table(f, "MOD_fastgreedy.csv", row.names = FALSE, col.names = FALSE,  append=TRUE, sep=",")
}

for (i in types){ 
  net <- filter(all2types, type==i)
  g <- graph_from_data_frame(net, directed = FALSE)
  Run_Communities(g,0,cluster_fast_greedy(g))
}

##   names community     type      method n_clusters modulary
## 1  1001        21 Migrants fast greedy         29 0.800249
## 2  1003        21 Migrants fast greedy         29 0.800249
## 3  1005        21 Migrants fast greedy         29 0.800249
## 4  1007        21 Migrants fast greedy         29 0.800249
## 5  1009        21 Migrants fast greedy         29 0.800249
## 6  1011        21 Migrants fast greedy         29 0.800249
##   names community     type      method n_clusters  modulary
## 1  1001        45 Commutes fast greedy         74 0.9541703
## 2  1003        21 Commutes fast greedy         74 0.9541703
## 3  1005        35 Commutes fast greedy         74 0.9541703
## 4  1007        45 Commutes fast greedy         74 0.9541703
## 5  1009        45 Commutes fast greedy         74 0.9541703
## 6  1011        45 Commutes fast greedy         74 0.9541703

#cluster_walktrap 
Run_Communities <- function(g,isSimp,algorithmCom){
  cc <- algorithmCom
  f <- as.data.frame(cbind(V(g)$name,cc$membership))
  colnames(f)<- c("names", "community")
  f$type <- i
  f$method <- algorithm(cc)
  f$n_clusters <- length(cc)
  f$modulary <- modularity(cc)
  print(head(f))
  write.table(f, "MOD_walktrap.csv", row.names = FALSE, col.names = FALSE,  append=TRUE, sep=",")
}

for (i in types){ 
  net <- filter(all2types, type==i)
  g <- graph_from_data_frame(net, directed = FALSE)
  Run_Communities(g,0,cluster_walktrap(g))
}

##   names community     type   method n_clusters  modulary
## 1  1001        13 Migrants walktrap        136 0.2451867
## 2  1003        13 Migrants walktrap        136 0.2451867
## 3  1005        13 Migrants walktrap        136 0.2451867
## 4  1007        13 Migrants walktrap        136 0.2451867
## 5  1009        13 Migrants walktrap        136 0.2451867
## 6  1011        13 Migrants walktrap        136 0.2451867
##   names community     type   method n_clusters  modulary
## 1  1001       452 Commutes walktrap       1351 0.2106047
## 2  1003        11 Commutes walktrap       1351 0.2106047
## 3  1005       469 Commutes walktrap       1351 0.2106047
## 4  1007       192 Commutes walktrap       1351 0.2106047
## 5  1009        17 Commutes walktrap       1351 0.2106047
## 6  1011       483 Commutes walktrap       1351 0.2106047

#cluster_infomap (takes a while, you may have to wait)
Run_Communities <- function(g,isSimp,algorithmCom){
  cc <- algorithmCom
  f <- as.data.frame(cbind(V(g)$name,cc$membership))
  colnames(f)<- c("names", "community")
  f$type <- i
  f$method <- algorithm(cc)
  f$n_clusters <- length(cc)
  f$modulary <- modularity(cc)
  print(head(f))
  write.table(f, "MOD_infomap.csv", row.names = FALSE, col.names = FALSE,  append=TRUE, sep=",")
}

for (i in types){ 
  net <- filter(all2types, type==i)
  g <- graph_from_data_frame(net, directed = FALSE)
  Run_Communities(g,0,cluster_infomap(g))
}

##   names community     type  method n_clusters modulary
## 1  1001         1 Migrants infomap         50 0.794581
## 2  1003         1 Migrants infomap         50 0.794581
## 3  1005         1 Migrants infomap         50 0.794581
## 4  1007         1 Migrants infomap         50 0.794581
## 5  1009         1 Migrants infomap         50 0.794581
## 6  1011         1 Migrants infomap         50 0.794581
##   names community     type  method n_clusters  modulary
## 1  1001         1 Commutes infomap        255 0.9449073
## 2  1003         2 Commutes infomap        255 0.9449073
## 3  1005         3 Commutes infomap        255 0.9449073
## 4  1007         4 Commutes infomap        255 0.9449073
## 5  1009         4 Commutes infomap        255 0.9449073
## 6  1011         3 Commutes infomap        255 0.9449073

MOD <- read.csv("MOD.csv", header=FALSE) 
colnames(MOD) <- c("fips", "community", "type", "method", "n_regions", "modularity")  ###community is just an id for it

MOD$fips <- as.numeric(MOD$fips)

counties <- st_read("counties.shp")

## Reading layer `counties' from data source 
##   `/Users/chaneumpark/Dropbox (GaTech)/2021-Fall/Urban Analytics/R lab session/Assignment4 CDR/counties.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 3142 features and 62 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -19839090 ymin: 2145730 xmax: -7454985 ymax: 11542620
## Projected CRS: WGS 84 / Pseudo-Mercator

st_crs(counties, crs= 42304)

## Coordinate Reference System:
##   User input: WGS 84 / Pseudo-Mercator 
##   wkt:
## PROJCRS["WGS 84 / Pseudo-Mercator",
##     BASEGEOGCRS["WGS 84",
##         DATUM["World Geodetic System 1984",
##             ELLIPSOID["WGS 84",6378137,298.257223563,
##                 LENGTHUNIT["metre",1]]],
##         PRIMEM["Greenwich",0,
##             ANGLEUNIT["degree",0.0174532925199433]],
##         ID["EPSG",4326]],
##     CONVERSION["Popular Visualisation Pseudo-Mercator",
##         METHOD["Popular Visualisation Pseudo Mercator",
##             ID["EPSG",1024]],
##         PARAMETER["Latitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8801]],
##         PARAMETER["Longitude of natural origin",0,
##             ANGLEUNIT["degree",0.0174532925199433],
##             ID["EPSG",8802]],
##         PARAMETER["False easting",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8806]],
##         PARAMETER["False northing",0,
##             LENGTHUNIT["metre",1],
##             ID["EPSG",8807]]],
##     CS[Cartesian,2],
##         AXIS["easting (X)",east,
##             ORDER[1],
##             LENGTHUNIT["metre",1]],
##         AXIS["northing (Y)",north,
##             ORDER[2],
##             LENGTHUNIT["metre",1]],
##     USAGE[
##         SCOPE["Web mapping and visualisation."],
##         AREA["World between 85.06°S and 85.06°N."],
##         BBOX[-85.06,-180,85.06,180]],
##     ID["EPSG",3857]]

counties <- counties[,1:2]

countycommunities <- merge(counties, MOD, by.x = "OBJECTID", by.y = "fips")
countycommunities <- countycommunities[,3:8]

countycommunities$community <- as.character(countycommunities$community)

states <- st_read('Extra shp/States.shp')

## Reading layer `States' from data source 
##   `/Users/chaneumpark/Dropbox (GaTech)/2021-Fall/Urban Analytics/R lab session/Assignment4 CDR/Extra shp/States.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 56 features and 9 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -179.1473 ymin: -14.55255 xmax: 179.7785 ymax: 71.35256
## Geodetic CRS:  NAD83

cities <- st_read('Extra shp/Cities.shp')

## Reading layer `Cities' from data source 
##   `/Users/chaneumpark/Dropbox (GaTech)/2021-Fall/Urban Analytics/R lab session/Assignment4 CDR/Extra shp/Cities.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 98 features and 44 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: -157.8427 ymin: 21.33078 xmax: -69.77768 ymax: 61.2187
## Geodetic CRS:  WGS 84

### map for 'Migrants' ###
tm_shape(filter(countycommunities, type=="Migrants")) + tm_fill(col = 'community') + 
  tm_shape(states) + tm_borders() + 
  tm_shape(cities) + tm_dots() + tm_text("NAME", just = 'center', remove.overlap = T, size=0.5) +
  tm_legend(show=F)

### map for 'Commutes' ###
tm_shape(filter(countycommunities, type=="Commutes")) + tm_fill(col = 'community') + 
  tm_shape(states) + tm_borders() + tm_shape(cities) + tm_dots() + tm_text("NAME", just = 'center', remove.overlap = T, size=0.5) + tm_legend(show=F)

## Warning: Number of levels of the variable "community" is 73, which is
## larger than max.categories (which is 30), so levels are combined. Set
## tmap_options(max.categories = 73) in the layer function to show all levels.