Text here
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.6 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.9
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
setwd("C:/Users/rsaidi/Dropbox/Rachel/MontColl/Datasets/Datasets")
bc <- read_csv("border_crossing.csv")
## Rows: 355511 Columns: 7
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (5): Port Name, State, Border, Date, Measure
## dbl (2): Port Code, Value
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
names(bc) <- tolower(names(bc))
names(bc) <- gsub(" ", "_", names(bc))
bc$date <- as.Date(bc$date, format = "%m/%d/%Y")
bc2 <- bc %>%
mutate(year = year(date)) %>%
mutate(month = month(date))
head(bc2)
## # A tibble: 6 x 9
## port_name state port_code border date measure value year month
## <chr> <chr> <dbl> <chr> <date> <chr> <dbl> <dbl> <dbl>
## 1 Alcan AK 3104 US-Canada~ 2020-02-01 Person~ 1414 2020 2
## 2 Alcan AK 3104 US-Canada~ 2020-02-01 Person~ 763 2020 2
## 3 Alcan AK 3104 US-Canada~ 2020-02-01 Truck ~ 412 2020 2
## 4 Alcan AK 3104 US-Canada~ 2020-02-01 Truck ~ 122 2020 2
## 5 Alcan AK 3104 US-Canada~ 2020-02-01 Trucks 545 2020 2
## 6 Alexandria Bay NY 708 US-Canada~ 2020-02-01 Bus Pa~ 1174 2020 2
plot1 <- ggplot(bc2) +
geom_bar(aes(state, fill=border)) +
ylab("Total Crossings") +
ggtitle("Total Bording Crossing Per State")
plot1
Briefly describe what this plot shows
plot2 <- ggplot(bc2, aes(border, fill=measure)) +
geom_bar(position="dodge") +
ylab("Count") +
ggtitle("US-Canada vs US-Mexico Border Crossing Count") +
scale_fill_brewer(palette = "Set3") +
guides(fill=guide_legend(title="Method"))
plot2
Briefly describe what this plot shows
length(unique(bc2$port_name))
## [1] 116
new <- bc2 %>%
group_by(border, date, measure) %>%
summarise(total = sum(value)) %>%
filter(measure %in% c("Personal Vehicle Passengers", "Personal Vehicles", "Pedestrians"))
## `summarise()` has grouped output by 'border', 'date'. You can override using
## the `.groups` argument.
head(new)
## # A tibble: 6 x 4
## # Groups: border, date [2]
## border date measure total
## <chr> <date> <chr> <dbl>
## 1 US-Canada Border 1996-01-01 Pedestrians 18537
## 2 US-Canada Border 1996-01-01 Personal Vehicle Passengers 6136398
## 3 US-Canada Border 1996-01-01 Personal Vehicles 2489564
## 4 US-Canada Border 1996-02-01 Pedestrians 22856
## 5 US-Canada Border 1996-02-01 Personal Vehicle Passengers 6540141
## 6 US-Canada Border 1996-02-01 Personal Vehicles 2567875
options(scipen = 999)
plot3 <- new %>%
filter(date >= "2005-01-01") %>%
ggplot(aes(date, total, color = measure)) +
geom_point() +
scale_color_brewer(palette = "Set1") +
facet_wrap(~border) +
theme_bw()
plot3
Text here