data1 <- read.csv("Charm_City_Circulator_Ridership.csv")
dim(data1)
## [1] 1146 15
names(data1)
## [1] "day" "date" "orangeBoardings"
## [4] "orangeAlightings" "orangeAverage" "purpleBoardings"
## [7] "purpleAlightings" "purpleAverage" "greenBoardings"
## [10] "greenAlightings" "greenAverage" "bannerBoardings"
## [13] "bannerAlightings" "bannerAverage" "daily"
head(data1[1:5])
## day date orangeBoardings orangeAlightings orangeAverage
## 1 Monday 01/11/2010 877 1027 952.0
## 2 Tuesday 01/12/2010 777 815 796.0
## 3 Wednesday 01/13/2010 1203 1220 1211.5
## 4 Thursday 01/14/2010 1194 1233 1213.5
## 5 Friday 01/15/2010 1645 1643 1644.0
## 6 Saturday 01/16/2010 1457 1524 1490.5
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
orange <- data1 %>%
filter(!is.na(orangeBoardings)) %>%
select(orangeBoardings)
orange$boarding <- "orange"
head(orange)
## orangeBoardings boarding
## 1 877 orange
## 2 777 orange
## 3 1203 orange
## 4 1194 orange
## 5 1645 orange
## 6 1457 orange
orange2 <- orange$boarding
orange2 <- as.data.frame(orange2)
library(dplyr)
orange2 <- rename(orange2, "boarding" = "orange2")
names(orange2)
## [1] "boarding"
head(orange2)
## boarding
## 1 orange
## 2 orange
## 3 orange
## 4 orange
## 5 orange
## 6 orange
purple <- data1 %>%
filter(!is.na(purpleBoardings)) %>%
select(purpleBoardings)
purple$boarding <- "purple"
head(purple)
## purpleBoardings boarding
## 1 1028 purple
## 2 1327 purple
## 3 1726 purple
## 4 2044 purple
## 5 2230 purple
## 6 1852 purple
purple2 <- purple$boarding
purple2 <- as.data.frame(purple2)
library(dplyr)
purple2 <- rename(purple2, "boarding" = "purple2")
names(purple2)
## [1] "boarding"
head(purple2)
## boarding
## 1 purple
## 2 purple
## 3 purple
## 4 purple
## 5 purple
## 6 purple
green <- data1 %>%
filter(!is.na(greenBoardings)) %>%
select(greenBoardings)
green$boarding <- "green"
head(green)
## greenBoardings boarding
## 1 887 green
## 2 1057 green
## 3 1142 green
## 4 1357 green
## 5 760 green
## 6 654 green
green2 <- green$boarding
green2 <- as.data.frame(green2)
library(dplyr)
green2 <- rename(green2, "boarding" = "green2")
names(green2)
## [1] "boarding"
head(green2)
## boarding
## 1 green
## 2 green
## 3 green
## 4 green
## 5 green
## 6 green
banner <- data1 %>%
filter(!is.na(bannerBoardings)) %>%
select(bannerBoardings)
banner$boarding <- "banner"
head(banner)
## bannerBoardings boarding
## 1 520 banner
## 2 574 banner
## 3 630 banner
## 4 670 banner
## 5 847 banner
## 6 987 banner
banner2 <- banner$boarding
banner2 <- as.data.frame(banner2)
library(dplyr)
banner2 <- rename(banner2, "boarding" = "banner2")
names(banner2)
## [1] "boarding"
head(banner2)
## boarding
## 1 banner
## 2 banner
## 3 banner
## 4 banner
## 5 banner
## 6 banner
cc <- rbind(orange2,
green2,
purple2,
banner2)
barplot(table(cc$boarding),
main = "Bar chart showing the frequency of Boarding",
xlab = " category of boarding",
ylab = "frequency of boarding",
border = "brown",
col = c("orange", "green", "purple", "blue"))
2) construct boxplot by group of day for orangeBoardings, PurpleBoardings,greenBoardings, bannerBoardings.
library(dplyr)
new <- data1 %>%
group_by(day) %>%
summarise(orange = mean(orangeBoardings, na.rm=T),
purple = mean(purpleBoardings, na.rm= T),
green = mean(greenBoardings, na.rm=T),
banner = mean(bannerBoardings, na.rm=T))
new2 <- data1 %>%
group_by(day) %>%
select(day, orangeBoardings)
orange$boarding <- "orange"
head(orange)
## orangeBoardings boarding
## 1 877 orange
## 2 777 orange
## 3 1203 orange
## 4 1194 orange
## 5 1645 orange
## 6 1457 orange
orangebp <- orange$boarding
orangebp <- as.data.frame(orangebp)
library(dplyr)
orangebp <- rename(orangebp, "boarding" = "orangebp")
names(orangebp)
## [1] "boarding"
head(orangebp)
## boarding
## 1 orange
## 2 orange
## 3 orange
## 4 orange
## 5 orange
## 6 orange
new3 <- data1 %>%
group_by(day) %>%
select(day, purpleBoardings)
purple$boarding <- "purple"
head(purple)
## purpleBoardings boarding
## 1 1028 purple
## 2 1327 purple
## 3 1726 purple
## 4 2044 purple
## 5 2230 purple
## 6 1852 purple
purplebp <- orange$boarding
purplebp <- as.data.frame(purplebp)
library(dplyr)
purplebp <- rename(purplebp, "boarding" = "purplebp")
names(purplebp)
## [1] "boarding"
head(purplebp)
## boarding
## 1 orange
## 2 orange
## 3 orange
## 4 orange
## 5 orange
## 6 orange
new4 <- data1 %>%
group_by(day) %>%
select(day, bannerBoardings)
banner$boarding <- "banner"
head(banner)
## bannerBoardings boarding
## 1 520 banner
## 2 574 banner
## 3 630 banner
## 4 670 banner
## 5 847 banner
## 6 987 banner
bannerbp <- banner$boarding
bannerbp <- as.data.frame(bannerbp)
library(dplyr)
bannerbp <- rename(bannerbp, "boarding" = "bannerbp")
names(bannerbp)
## [1] "boarding"
head(bannerbp)
## boarding
## 1 banner
## 2 banner
## 3 banner
## 4 banner
## 5 banner
## 6 banner
new5 <- data1 %>%
group_by(day) %>%
select(day, greenBoardings)
green$boarding <- "green"
head(green)
## greenBoardings boarding
## 1 887 green
## 2 1057 green
## 3 1142 green
## 4 1357 green
## 5 760 green
## 6 654 green
greenbp <- green$boarding
greenbp <- as.data.frame(greenbp)
library(dplyr)
greenbp <- rename(greenbp, "boarding" = "greenbp")
names(greenbp)
## [1] "boarding"
head(greenbp)
## boarding
## 1 green
## 2 green
## 3 green
## 4 green
## 5 green
## 6 green
cc <- rbind(orangebp,
greenbp,
purplebp,
bannerbp)
new <- data1 %>%
group_by(date) %>%
summarise(orange = mean(orangeBoardings, na.rm=T),
purple = mean(purpleBoardings, na.rm= T),
green = mean(greenBoardings, na.rm=T),
banner = mean(bannerBoardings, na.rm=T))
library(ggplot2)
qplot(new4$orangeBoardings)
## Warning: Unknown or uninitialised column: 'orangeBoardings'.
## Warning: Unknown or uninitialised column: 'orangeBoardings'.
## Warning: Unknown or uninitialised column: 'orangeBoardings'.