library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(readr)
library(knitr)
library(ggplot2)
library(png)
library(grid)
#import
assignment6a <- "https://raw.githubusercontent.com/RandallThompson/Data607/master/New_York_Subway_Entrances_.csv"
#read
subways<-read.csv(assignment6a)
#dataframe
data_frame(subways)
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
## # A tibble: 1,928 x 1
## subways$OBJECTID $URL $NAME $the_geom $LINE
## <int> <fct> <fct> <fct> <fct>
## 1 1734 http://web.m~ Birchall Ave &~ POINT (-73.8683560~ 5-Feb
## 2 1735 http://web.m~ Birchall Ave &~ POINT (-73.8682130~ 5-Feb
## 3 1736 http://web.m~ Morris Park Av~ POINT (-73.8734990~ 5-Feb
## 4 1737 http://web.m~ Morris Park Av~ POINT (-73.8728919~ 5-Feb
## 5 1738 http://web.m~ Boston Rd & 17~ POINT (-73.8796230~ 5-Feb
## 6 1739 http://web.m~ Boston Rd & E ~ POINT (-73.8800050~ 5-Feb
## 7 1740 http://web.m~ Boston Rd & E ~ POINT (-73.8798330~ 5-Feb
## 8 1741 http://web.m~ Boston Rd & 17~ POINT (-73.8795549~ 5-Feb
## 9 1742 http://web.m~ Boston Rd & 17~ POINT (-73.8793970~ 5-Feb
## 10 1743 http://web.m~ Boston Rd & 17~ POINT (-73.8880479~ 5-Feb
## # ... with 1,918 more rows
#turn dates into numbers
subways$LINE <- sub("Feb", 2, subways$LINE)
subways$LINE <- sub("Mar", 3, subways$LINE)
subways$LINE <- sub("Apr", 4, subways$LINE)
subways$LINE <- sub("200", "", subways$LINE)
subways$LINE <- sub(" ", "", subways$LINE)
subways$LINE <- toupper(subways$LINE)
#separate line into there own columns
subways <- separate(subways, LINE, c(as.character(seq(1:15))), sep = c("-|/"), extra = "merge")
## Warning: Expected 15 pieces. Missing pieces filled with `NA` in 1928
## rows [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
## 20, ...].
#removing exmpty columns
subways <- subways[colSums(!is.na(subways))>0]
#make 1 column LINE with each stop on each line having it's own row
subways<-gather(subways, "index", "LINE", 5:15) %>%
filter(LINE != "")
#separate geo points into X and Y
subways <- separate(subways, the_geom, c("point", "X", "Y"), sep = c(" "))
#Remove extra characters
subways$X <- sub("\\(", "", subways$X)
subways$Y <- sub("\\)", "", subways$Y)
#graph points with each color a different subway line.
NYCsubway <- ggplot(subways, aes(x = as.numeric(subways$X), y = as.numeric(subways$Y), color = LINE)) +
geom_point(alpha = .2)
NYCsubway
