library(tidyverse)
library(sf)
library(mapview)
library(data.table)
library(ggrepel)
library(tidyverse)
library(plotly)
library(tigris)
library(leaflet)

CL_NH_dt <- st_read("D:\\School\\summer 2021\\CT_NH\\NH_CT.shp")
## Reading layer `NH_CT' from data source `D:\School\summer 2021\CT_NH\NH_CT.shp' using driver `ESRI Shapefile'
## Simple feature collection with 178 features and 24 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -71.19116 ymin: 42.22788 xmax: -70.98471 ymax: 42.40493
## Geodetic CRS:  NAD83
Crgslst <- fread("D:\\School\\summer 2021\\Craigslist_Listings.4.27.2021.csv") #149314

Crgslst$CT_ID_10 <- as.character(Crgslst$CT_ID_10)

CL_NH_dt <- left_join(Crgslst, CL_NH_dt[,c("GEOID10", "Neighborho")], by = c("CT_ID_10" = "GEOID10"))

CL_NH_dt$Neighborho[str_detect(CL_NH_dt$Neighborho, "Dorchester")] <- "Dorchester"

CL_NH_dt$post_date <- as.Date(paste("2020",CL_NH_dt$LISTING_MONTH,
 CL_NH_dt$LISTING_DAY),format="%Y %B %d")

# going weekly
#creating week 
CL_NH_dt$Week <- as.Date(cut(CL_NH_dt$post_date,
  breaks = "week",
  start.on.monday = FALSE))

# create the measure
CL_NH_dt$COVID_mention <-
  as.numeric(str_detect(
    CL_NH_dt$BODY,
    "(?i)lockdown|lockd|lockdowns|covid|covid19|boscovid|covidiots|covidiot|for'covid|forcovid|precovid|coronavirus|corona|corona's|coronabirus|coronamasks|coronaviris|corinavirus|cronavirus|coronavirius|masks|mask|unmasked|maskless|masked|facemask|mask's|masking|coronamasks|facemasks|masker|coughing|cough|coughs|coughed|coughlin|fever|vaccine|vaccines|symptomatic|asymptomatic|symptoms|flu|quarantine|quarantining|quarantined|isolation|isolating|isolate|distancing|distances|screening|transmission|pandemic|flattening|epidemic|virus|viruses|party|party's|partying|partygoers|partys|parties|houseparty|gathering|gatherings|guidelines|guideline"))

CL_NH_dt$virtual_mention <-
  as.numeric(
    str_detect(CL_NH_dt$BODY,
               "(?i)virtual tour|video tour|online tour|video|online|virtual|self-guided|unseen|live tour"))

CL_NH_dt$all_mention <-
  as.numeric(
    str_detect(CL_NH_dt$BODY,
               "(?i)lockdown|lockd|lockdowns|covid|covid19|boscovid|covidiots|covidiot|for'covid|forcovid|precovid|coronavirus|corona|corona's|coronabirus|coronamasks|coronaviris|corinavirus|cronavirus|coronavirius|masks|mask|unmasked|maskless|masked|facemask|mask's|masking|coronamasks|facemasks|masker|coughing|cough|coughs|coughed|coughlin|fever|vaccine|vaccines|symptomatic|asymptomatic|symptoms|flu|quarantine|quarantining|quarantined|isolation|isolating|isolate|distancing|distances|screening|transmission|pandemic|flattening|epidemic|virus|viruses|party|party's|partying|partygoers|partys|parties|houseparty|gathering|gatherings|guidelines|guideline|virtual tour|video tour|online tour|video|online|virtual|self-guided|unseen|live tour"))

Crgslst_w_hood <- CL_NH_dt %>%
  group_by(Week, Neighborho) %>%
  summarise(all_posts = n(),virtual_mention_pr = (sum(virtual_mention)/all_posts)*100, 
            COVID_mention_pr = (sum(COVID_mention)/all_posts)*100, 
            all_mentions_pr = (sum(all_mention)/all_posts)*100)
# plotting - going one by one to get clear plots
# Most basic bubble plot
p1 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "South Boston",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in South Boston") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))


p2 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Dorchester",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Dorchester") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p3 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Back Bay, Beacon Hill, Downtown, North End, West End",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Back Bay, Beacon Hill, Downtown, North End and West End") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))


p4 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Charlestown",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Charlestown") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p5 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "East Boston",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in East Boston") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p6 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "South End",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in South End") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p7 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Roxbury",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Roxbury") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p8 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Fenway",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Fenway") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p9 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Hyde Park",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Hyde Park") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))


p10 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Mattapan",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Mattapan") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p11 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Roslindale",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Roslindale") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p12 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "West Roxbury",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in West Roxbury") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p13 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Allston/Brighton",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Allston/Brighton") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

p14 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Jamaica Plain",], aes(x=Week, y=all_mentions_pr)) +
  geom_line( color="steelblue") + 
  geom_point() +
  xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Jamaica Plain") +
  theme_bw() +
  theme(axis.text.x=element_text(angle=60, hjust=1)) +
  scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))

ggplotly(p1)
ggplotly(p2)
ggplotly(p3)
ggplotly(p4)
ggplotly(p5)
ggplotly(p6)
ggplotly(p7)
ggplotly(p8)
ggplotly(p9)
ggplotly(p10)
ggplotly(p11)
ggplotly(p12)
ggplotly(p13)
ggplotly(p14)
# map for all mentions
## do a map for total numbers across weeks, and then do a map for the mean of mentions in each neighborhood

Crgslst_w_hood <- CL_NH_dt %>%
  group_by(Neighborho) %>%
  summarise(all_posts = n(), 
            all_mentions_pr = (sum(all_mention)/all_posts)*100)

Crgslst_w_hood <- Crgslst_w_hood[!is.na(Crgslst_w_hood$Neighborho),]

NH <- st_read("D:\\School\\summer 2021\\BPHC_Nbhd_Shp-20210706T230937Z-001\\BPHC_Nbhd_Shp\\NHzip_2020-10-16.shp")
## Reading layer `NHzip_2020-10-16' from data source `D:\School\summer 2021\BPHC_Nbhd_Shp-20210706T230937Z-001\BPHC_Nbhd_Shp\NHzip_2020-10-16.shp' using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 739826.9 ymin: 2908285 xmax: 795020.4 ymax: 2970073
## Projected CRS: NAD83 / Massachusetts Mainland (ftUS)
NH$Neighborho[str_detect(NH$Neighborho, "Dorchester")] <- "Dorchester"

mydatamerged <- geo_join(NH, Crgslst_w_hood, "Neighborho", "Neighborho")

df <- mydatamerged
df <- st_transform(df, 4326)
mypopup <- paste0("Percentage of covid-related statements is ", ifelse(is.na(df$all_mentions_pr), 0, round((df$all_mentions_pr), 2)), "%", " in ", df$Neighborho)

mypal <- colorNumeric(
  palette = "YlGnBu",
  domain = df$all_mentions_pr
)

# YlOrRd, RdYlGn, YlGnBu, RdYlBu

myLAT <- 42.3398
myLNG <- -71.0892
mycentername <- "NEU"


mymap <- leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(myLNG, myLAT, zoom = 12) %>%
  addMarkers(lat=myLAT, lng=myLNG, popup=mycentername) %>%
  addPolygons(data = df, highlight = highlightOptions(weight = 3,
                                           color = "red",
                                           bringToFront = TRUE) ,
              fillColor = ~mypal(all_mentions_pr), 
              color = "#000000", # you need to use hex colors - #b2aeae
              fillOpacity = 0.7, 
              weight = 1,
              smoothFactor = 0.2 ,
             popup = mypopup) %>% 
  addLegend(pal = mypal,
            values = df$all_mentions_pr,
            position = "bottomright",
            title = "Covid-related statements",
            labFormat = labelFormat(suffix="%", transform = function(x) 1*x))
# %>%
#    addLayersControl(
#     baseGroups = c("OSM (default)", "Toner", "Toner Lite"),
#     overlayGroups = c("Quakes", "Outline"),
#     options = layersControlOptions(collapsed = FALSE)
#   )

mymap
# map for virtual only

Crgslst_w_hood <- CL_NH_dt %>%
  group_by(Neighborho) %>%
  summarise(all_posts = n(), 
            virtual_mentions_pr = (sum(virtual_mention)/all_posts)*100)

Crgslst_w_hood <- Crgslst_w_hood[!is.na(Crgslst_w_hood$Neighborho),]

NH <- st_read("D:\\School\\summer 2021\\BPHC_Nbhd_Shp-20210706T230937Z-001\\BPHC_Nbhd_Shp\\NHzip_2020-10-16.shp")
## Reading layer `NHzip_2020-10-16' from data source `D:\School\summer 2021\BPHC_Nbhd_Shp-20210706T230937Z-001\BPHC_Nbhd_Shp\NHzip_2020-10-16.shp' using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 739826.9 ymin: 2908285 xmax: 795020.4 ymax: 2970073
## Projected CRS: NAD83 / Massachusetts Mainland (ftUS)
NH$Neighborho[str_detect(NH$Neighborho, "Dorchester")] <- "Dorchester"

mydatamerged <- geo_join(NH, Crgslst_w_hood, "Neighborho", "Neighborho")

df <- mydatamerged
df <- st_transform(df, 4326)
mypopup <- paste0("Percentage of covid-related statements is ", ifelse(is.na(df$virtual_mentions_pr), 0, round((df$virtual_mentions_pr), 2)), "%", " in ", df$Neighborho)

mypal <- colorNumeric(
  palette = "YlGnBu",
  domain = df$virtual_mentions_pr
)

# YlOrRd, RdYlGn, YlGnBu, RdYlBu

myLAT <- 42.3398
myLNG <- -71.0892
mycentername <- "NEU"


mymap <- leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(myLNG, myLAT, zoom = 12) %>%
  addMarkers(lat=myLAT, lng=myLNG, popup=mycentername) %>%
  addPolygons(data = df, highlight = highlightOptions(weight = 3,
                                           color = "red",
                                           bringToFront = TRUE) ,
              fillColor = ~mypal(virtual_mentions_pr), 
              color = "#000000", # you need to use hex colors - #b2aeae
              fillOpacity = 0.7, 
              weight = 1,
              smoothFactor = 0.2 ,
             popup = mypopup) %>% 
  addLegend(pal = mypal,
            values = df$virtual_mentions_pr,
            position = "bottomright",
            title = "virtual visit statements",
            labFormat = labelFormat(suffix="%", transform = function(x) 1*x))
# %>%
#    addLayersControl(
#     baseGroups = c("OSM (default)", "Toner", "Toner Lite"),
#     overlayGroups = c("Quakes", "Outline"),
#     options = layersControlOptions(collapsed = FALSE)
#   )

mymap
# map for only covid-related minus virtual

Crgslst_w_hood <- CL_NH_dt %>%
  group_by(Neighborho) %>%
  summarise(all_posts = n(), 
            COVID_mentions_pr = (sum(COVID_mention)/all_posts)*100)

Crgslst_w_hood <- Crgslst_w_hood[!is.na(Crgslst_w_hood$Neighborho),]

NH <- st_read("D:\\School\\summer 2021\\BPHC_Nbhd_Shp-20210706T230937Z-001\\BPHC_Nbhd_Shp\\NHzip_2020-10-16.shp")
## Reading layer `NHzip_2020-10-16' from data source `D:\School\summer 2021\BPHC_Nbhd_Shp-20210706T230937Z-001\BPHC_Nbhd_Shp\NHzip_2020-10-16.shp' using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 739826.9 ymin: 2908285 xmax: 795020.4 ymax: 2970073
## Projected CRS: NAD83 / Massachusetts Mainland (ftUS)
NH$Neighborho[str_detect(NH$Neighborho, "Dorchester")] <- "Dorchester"

mydatamerged <- geo_join(NH, Crgslst_w_hood, "Neighborho", "Neighborho")

df <- mydatamerged
df <- st_transform(df, 4326)
mypopup <- paste0("Percentage of covid-related statements is ", ifelse(is.na(df$COVID_mentions_pr), 0, round((df$COVID_mentions_pr), 2)), "%", " in ", df$Neighborho)

mypal <- colorNumeric(
  palette = "YlGnBu",
  domain = df$COVID_mentions_pr
)

# YlOrRd, RdYlGn, YlGnBu, RdYlBu

myLAT <- 42.3398
myLNG <- -71.0892
mycentername <- "NEU"


mymap <- leaflet() %>%
  addProviderTiles("CartoDB.Positron") %>%
  setView(myLNG, myLAT, zoom = 12) %>%
  addMarkers(lat=myLAT, lng=myLNG, popup=mycentername) %>%
  addPolygons(data = df, highlight = highlightOptions(weight = 3,
                                           color = "red",
                                           bringToFront = TRUE) ,
              fillColor = ~mypal(COVID_mentions_pr), 
              color = "#000000", # you need to use hex colors - #b2aeae
              fillOpacity = 0.7, 
              weight = 1,
              smoothFactor = 0.2 ,
             popup = mypopup) %>% 
  addLegend(pal = mypal,
            values = df$COVID_mentions_pr,
            position = "bottomright",
            title = "virtual visit statements",
            labFormat = labelFormat(suffix="%", transform = function(x) 1*x))
# %>%
#    addLayersControl(
#     baseGroups = c("OSM (default)", "Toner", "Toner Lite"),
#     overlayGroups = c("Quakes", "Outline"),
#     options = layersControlOptions(collapsed = FALSE)
#   )

mymap
# connect covid data