library(tidyverse)
library(sf)
library(mapview)
library(data.table)
library(ggrepel)
library(tidyverse)
library(plotly)
library(tigris)
library(leaflet)
CL_NH_dt <- st_read("D:\\School\\summer 2021\\CT_NH\\NH_CT.shp")
## Reading layer `NH_CT' from data source `D:\School\summer 2021\CT_NH\NH_CT.shp' using driver `ESRI Shapefile'
## Simple feature collection with 178 features and 24 fields
## Geometry type: POLYGON
## Dimension: XY
## Bounding box: xmin: -71.19116 ymin: 42.22788 xmax: -70.98471 ymax: 42.40493
## Geodetic CRS: NAD83
Crgslst <- fread("D:\\School\\summer 2021\\Craigslist_Listings.4.27.2021.csv") #149314
Crgslst$CT_ID_10 <- as.character(Crgslst$CT_ID_10)
CL_NH_dt <- left_join(Crgslst, CL_NH_dt[,c("GEOID10", "Neighborho")], by = c("CT_ID_10" = "GEOID10"))
CL_NH_dt$Neighborho[str_detect(CL_NH_dt$Neighborho, "Dorchester")] <- "Dorchester"
CL_NH_dt$post_date <- as.Date(paste("2020",CL_NH_dt$LISTING_MONTH,
CL_NH_dt$LISTING_DAY),format="%Y %B %d")
# going weekly
#creating week
CL_NH_dt$Week <- as.Date(cut(CL_NH_dt$post_date,
breaks = "week",
start.on.monday = FALSE))
# create the measure
CL_NH_dt$COVID_mention <-
as.numeric(str_detect(
CL_NH_dt$BODY,
"(?i)lockdown|lockd|lockdowns|covid|covid19|boscovid|covidiots|covidiot|for'covid|forcovid|precovid|coronavirus|corona|corona's|coronabirus|coronamasks|coronaviris|corinavirus|cronavirus|coronavirius|masks|mask|unmasked|maskless|masked|facemask|mask's|masking|coronamasks|facemasks|masker|coughing|cough|coughs|coughed|coughlin|fever|vaccine|vaccines|symptomatic|asymptomatic|symptoms|flu|quarantine|quarantining|quarantined|isolation|isolating|isolate|distancing|distances|screening|transmission|pandemic|flattening|epidemic|virus|viruses|party|party's|partying|partygoers|partys|parties|houseparty|gathering|gatherings|guidelines|guideline"))
CL_NH_dt$virtual_mention <-
as.numeric(
str_detect(CL_NH_dt$BODY,
"(?i)virtual tour|video tour|online tour|video|online|virtual|self-guided|unseen|live tour"))
CL_NH_dt$all_mention <-
as.numeric(
str_detect(CL_NH_dt$BODY,
"(?i)lockdown|lockd|lockdowns|covid|covid19|boscovid|covidiots|covidiot|for'covid|forcovid|precovid|coronavirus|corona|corona's|coronabirus|coronamasks|coronaviris|corinavirus|cronavirus|coronavirius|masks|mask|unmasked|maskless|masked|facemask|mask's|masking|coronamasks|facemasks|masker|coughing|cough|coughs|coughed|coughlin|fever|vaccine|vaccines|symptomatic|asymptomatic|symptoms|flu|quarantine|quarantining|quarantined|isolation|isolating|isolate|distancing|distances|screening|transmission|pandemic|flattening|epidemic|virus|viruses|party|party's|partying|partygoers|partys|parties|houseparty|gathering|gatherings|guidelines|guideline|virtual tour|video tour|online tour|video|online|virtual|self-guided|unseen|live tour"))
Crgslst_w_hood <- CL_NH_dt %>%
group_by(Week, Neighborho) %>%
summarise(all_posts = n(),virtual_mention_pr = (sum(virtual_mention)/all_posts)*100,
COVID_mention_pr = (sum(COVID_mention)/all_posts)*100,
all_mentions_pr = (sum(all_mention)/all_posts)*100)
# plotting - going one by one to get clear plots
# Most basic bubble plot
p1 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "South Boston",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in South Boston") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p2 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Dorchester",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Dorchester") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p3 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Back Bay, Beacon Hill, Downtown, North End, West End",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Back Bay, Beacon Hill, Downtown, North End and West End") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p4 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Charlestown",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Charlestown") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p5 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "East Boston",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in East Boston") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p6 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "South End",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in South End") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p7 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Roxbury",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Roxbury") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p8 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Fenway",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Fenway") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p9 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Hyde Park",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Hyde Park") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p10 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Mattapan",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Mattapan") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p11 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Roslindale",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Roslindale") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p12 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "West Roxbury",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in West Roxbury") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p13 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Allston/Brighton",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Allston/Brighton") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
p14 <- ggplot(Crgslst_w_hood[Crgslst_w_hood$Neighborho == "Jamaica Plain",], aes(x=Week, y=all_mentions_pr)) +
geom_line( color="steelblue") +
geom_point() +
xlab("") + ylab("") + ggtitle("Number of Weekly covid-related posts in Jamaica Plain") +
theme_bw() +
theme(axis.text.x=element_text(angle=60, hjust=1)) +
scale_x_date(breaks = as.Date(c("2019-12-29","2020-01-26","2020-02-23","2020-03-22","2020-04-19","2020-05-17","2020-06-14", "2020-07-12", "2020-08-09","2020-09-06", "2020-10-04", "2020-11-01","2020-11-29", "2020-12-27", "2021-01-24","2021-02-21", "2021-03-21")))
ggplotly(p1)
ggplotly(p2)
ggplotly(p3)
ggplotly(p4)
ggplotly(p5)
ggplotly(p6)
ggplotly(p7)
ggplotly(p8)
ggplotly(p9)
ggplotly(p10)
ggplotly(p11)
ggplotly(p12)
ggplotly(p13)
ggplotly(p14)
# map for all mentions
## do a map for total numbers across weeks, and then do a map for the mean of mentions in each neighborhood
Crgslst_w_hood <- CL_NH_dt %>%
group_by(Neighborho) %>%
summarise(all_posts = n(),
all_mentions_pr = (sum(all_mention)/all_posts)*100)
Crgslst_w_hood <- Crgslst_w_hood[!is.na(Crgslst_w_hood$Neighborho),]
NH <- st_read("D:\\School\\summer 2021\\BPHC_Nbhd_Shp-20210706T230937Z-001\\BPHC_Nbhd_Shp\\NHzip_2020-10-16.shp")
## Reading layer `NHzip_2020-10-16' from data source `D:\School\summer 2021\BPHC_Nbhd_Shp-20210706T230937Z-001\BPHC_Nbhd_Shp\NHzip_2020-10-16.shp' using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 739826.9 ymin: 2908285 xmax: 795020.4 ymax: 2970073
## Projected CRS: NAD83 / Massachusetts Mainland (ftUS)
NH$Neighborho[str_detect(NH$Neighborho, "Dorchester")] <- "Dorchester"
mydatamerged <- geo_join(NH, Crgslst_w_hood, "Neighborho", "Neighborho")
df <- mydatamerged
df <- st_transform(df, 4326)
mypopup <- paste0("Percentage of covid-related statements is ", ifelse(is.na(df$all_mentions_pr), 0, round((df$all_mentions_pr), 2)), "%", " in ", df$Neighborho)
mypal <- colorNumeric(
palette = "YlGnBu",
domain = df$all_mentions_pr
)
# YlOrRd, RdYlGn, YlGnBu, RdYlBu
myLAT <- 42.3398
myLNG <- -71.0892
mycentername <- "NEU"
mymap <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(myLNG, myLAT, zoom = 12) %>%
addMarkers(lat=myLAT, lng=myLNG, popup=mycentername) %>%
addPolygons(data = df, highlight = highlightOptions(weight = 3,
color = "red",
bringToFront = TRUE) ,
fillColor = ~mypal(all_mentions_pr),
color = "#000000", # you need to use hex colors - #b2aeae
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2 ,
popup = mypopup) %>%
addLegend(pal = mypal,
values = df$all_mentions_pr,
position = "bottomright",
title = "Covid-related statements",
labFormat = labelFormat(suffix="%", transform = function(x) 1*x))
# %>%
# addLayersControl(
# baseGroups = c("OSM (default)", "Toner", "Toner Lite"),
# overlayGroups = c("Quakes", "Outline"),
# options = layersControlOptions(collapsed = FALSE)
# )
mymap
# map for virtual only
Crgslst_w_hood <- CL_NH_dt %>%
group_by(Neighborho) %>%
summarise(all_posts = n(),
virtual_mentions_pr = (sum(virtual_mention)/all_posts)*100)
Crgslst_w_hood <- Crgslst_w_hood[!is.na(Crgslst_w_hood$Neighborho),]
NH <- st_read("D:\\School\\summer 2021\\BPHC_Nbhd_Shp-20210706T230937Z-001\\BPHC_Nbhd_Shp\\NHzip_2020-10-16.shp")
## Reading layer `NHzip_2020-10-16' from data source `D:\School\summer 2021\BPHC_Nbhd_Shp-20210706T230937Z-001\BPHC_Nbhd_Shp\NHzip_2020-10-16.shp' using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 739826.9 ymin: 2908285 xmax: 795020.4 ymax: 2970073
## Projected CRS: NAD83 / Massachusetts Mainland (ftUS)
NH$Neighborho[str_detect(NH$Neighborho, "Dorchester")] <- "Dorchester"
mydatamerged <- geo_join(NH, Crgslst_w_hood, "Neighborho", "Neighborho")
df <- mydatamerged
df <- st_transform(df, 4326)
mypopup <- paste0("Percentage of covid-related statements is ", ifelse(is.na(df$virtual_mentions_pr), 0, round((df$virtual_mentions_pr), 2)), "%", " in ", df$Neighborho)
mypal <- colorNumeric(
palette = "YlGnBu",
domain = df$virtual_mentions_pr
)
# YlOrRd, RdYlGn, YlGnBu, RdYlBu
myLAT <- 42.3398
myLNG <- -71.0892
mycentername <- "NEU"
mymap <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(myLNG, myLAT, zoom = 12) %>%
addMarkers(lat=myLAT, lng=myLNG, popup=mycentername) %>%
addPolygons(data = df, highlight = highlightOptions(weight = 3,
color = "red",
bringToFront = TRUE) ,
fillColor = ~mypal(virtual_mentions_pr),
color = "#000000", # you need to use hex colors - #b2aeae
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2 ,
popup = mypopup) %>%
addLegend(pal = mypal,
values = df$virtual_mentions_pr,
position = "bottomright",
title = "virtual visit statements",
labFormat = labelFormat(suffix="%", transform = function(x) 1*x))
# %>%
# addLayersControl(
# baseGroups = c("OSM (default)", "Toner", "Toner Lite"),
# overlayGroups = c("Quakes", "Outline"),
# options = layersControlOptions(collapsed = FALSE)
# )
mymap
# map for only covid-related minus virtual
Crgslst_w_hood <- CL_NH_dt %>%
group_by(Neighborho) %>%
summarise(all_posts = n(),
COVID_mentions_pr = (sum(COVID_mention)/all_posts)*100)
Crgslst_w_hood <- Crgslst_w_hood[!is.na(Crgslst_w_hood$Neighborho),]
NH <- st_read("D:\\School\\summer 2021\\BPHC_Nbhd_Shp-20210706T230937Z-001\\BPHC_Nbhd_Shp\\NHzip_2020-10-16.shp")
## Reading layer `NHzip_2020-10-16' from data source `D:\School\summer 2021\BPHC_Nbhd_Shp-20210706T230937Z-001\BPHC_Nbhd_Shp\NHzip_2020-10-16.shp' using driver `ESRI Shapefile'
## Simple feature collection with 15 features and 6 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 739826.9 ymin: 2908285 xmax: 795020.4 ymax: 2970073
## Projected CRS: NAD83 / Massachusetts Mainland (ftUS)
NH$Neighborho[str_detect(NH$Neighborho, "Dorchester")] <- "Dorchester"
mydatamerged <- geo_join(NH, Crgslst_w_hood, "Neighborho", "Neighborho")
df <- mydatamerged
df <- st_transform(df, 4326)
mypopup <- paste0("Percentage of covid-related statements is ", ifelse(is.na(df$COVID_mentions_pr), 0, round((df$COVID_mentions_pr), 2)), "%", " in ", df$Neighborho)
mypal <- colorNumeric(
palette = "YlGnBu",
domain = df$COVID_mentions_pr
)
# YlOrRd, RdYlGn, YlGnBu, RdYlBu
myLAT <- 42.3398
myLNG <- -71.0892
mycentername <- "NEU"
mymap <- leaflet() %>%
addProviderTiles("CartoDB.Positron") %>%
setView(myLNG, myLAT, zoom = 12) %>%
addMarkers(lat=myLAT, lng=myLNG, popup=mycentername) %>%
addPolygons(data = df, highlight = highlightOptions(weight = 3,
color = "red",
bringToFront = TRUE) ,
fillColor = ~mypal(COVID_mentions_pr),
color = "#000000", # you need to use hex colors - #b2aeae
fillOpacity = 0.7,
weight = 1,
smoothFactor = 0.2 ,
popup = mypopup) %>%
addLegend(pal = mypal,
values = df$COVID_mentions_pr,
position = "bottomright",
title = "virtual visit statements",
labFormat = labelFormat(suffix="%", transform = function(x) 1*x))
# %>%
# addLayersControl(
# baseGroups = c("OSM (default)", "Toner", "Toner Lite"),
# overlayGroups = c("Quakes", "Outline"),
# options = layersControlOptions(collapsed = FALSE)
# )
mymap
# connect covid data