library(readr)
Bicycle <- read.csv("C:/Users/dan/Desktop/a Visualization mod 2/Bicycle.csv")
#View(Bicycle)
Unique_ID: Self-evident NB_TRAFFIC_SURVEY: Survey Number NB_LOCATION_TRAFFIC_SURVEY: Location survey Number Sort Des: Short Description of the location DS_LOCATION: Location Description DT_ANALYSIS_SUMMARY: Date NB_YEAR: Year data collected NB_MONTH: Month data collected NB_WEEKDAY_NONHOL_QTR: Holiday period indication CT_VOLUME_AMPEAK: Max hour in morning peak CT_VOLUME_PMPEAK: Max hour in evening peak CT_VOLUME_4HOUR_OFFPEAK: 4 hour off peak volume (12:00 to 4:00 PM) CT_VOLUME_12HOUR: 12 hour volume (7:00 AM to 7:00 PM) CT_VOLUME_24HOUR: 24 hour volume DS_HOLIDAY: Holiday description NB_SEASONALITY_PERIOD: Seasonality period indication (1 to 27) NB_TYPE_PERIOD: Seasonality period type indication (1 to 3) Primary: Primary site indication (True / False) weekend: Weekend indication (True / False) Quarter: Number quarter (1 to 4) Season: Weather season Cyclying: Season Cycling season day: Day of the week
# Bind up the columns that interest me
Bicycle_1 <- as.data.frame(cbind(Bicycle$NB_MONTH,Bicycle$NB_YEAR, Bicycle$CT_VOLUME_AMPEAK, Bicycle$CT_VOLUME_PMPEAK))
#Bicycle_1 <- as.numeric(Bicycle_1)
#head(Bicycle_1, n=20) ## Show me the first 20 rows
sum(is.na(Bicycle_1)) ## check for any missing values in my new data set
## [1] 0
#str(Bicycle_1) ## Check the str
## Rename the columns
names(Bicycle_1)[1] <- c("NB_MONTH")
names(Bicycle_1)[2] <- c("NB_YEAR")
names(Bicycle_1)[3] <- c("VOLUME_AMPEAK")
names(Bicycle_1)[4] <- c("VOLUME_PMPEAK")
#View(Bicycle_1)
#str(Bicycle_1)
Bicycle_1$NB_MONTH <- as.numeric(Bicycle_1$NB_MONTH) ## Change everything to numeric
Bicycle_1$NB_YEAR <- as.numeric(Bicycle_1$NB_YEAR)
Bicycle_1$VOLUME_AMPEAK <- as.numeric(Bicycle_1$VOLUME_AMPEAK)
Bicycle_1$VOLUME_PMPEAK <- as.numeric(Bicycle_1$VOLUME_PMPEAK)
#str(Bicycle_1) ## Its now all numeric
#View(Bicycle_1)
# Change year and month to ascending
Bicycle_2 <- Bicycle_1[order(Bicycle_1$NB_YEAR, Bicycle_1$NB_MONTH),]
#View(Bicycle_2)
# Sorting it all into years and number per years
Bicycle_3_ampeak <- Bicycle_2 %>% group_by(Bicycle_2$NB_YEAR) %>% tally(VOLUME_AMPEAK)
Bicycle_3_ampeak$n <- Bicycle_3_ampeak$n/1000
#Bicycle_3_ampeak
Bicycle_4_pmpeak <- Bicycle_2 %>% group_by(Bicycle_2$NB_YEAR) %>% tally(VOLUME_PMPEAK)
Bicycle_4_pmpeak$n <- Bicycle_4_pmpeak$n/1000
#View(Bicycle_4_pmpeak)
#display.brewer.all()
#display.brewer.all(11) ## select some colours
#brewer.pal(n = 11, name = "Spectral")
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
p1 <- ggplot(Bicycle_3_ampeak, aes(x = Bicycle_3_ampeak$`Bicycle_2$NB_YEAR`, y = Bicycle_3_ampeak$n))
p1 + geom_bar(stat = "identity",fill = "#5E4FA2")+
labs(title = "Volume per 1000 Melbourne Cyclists ",
y = "AM Peak hour ",
x = "Years")+
geom_text(aes(label=round(Bicycle_3_ampeak$n,2)), vjust = -0.5,size = 3)+
scale_y_continuous(limits = c(0, 1700))##+ geom_smooth(colour = "blue")
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
p2<- ggplot(Bicycle_4_pmpeak, aes(x = Bicycle_4_pmpeak$`Bicycle_2$NB_YEAR`, y = Bicycle_4_pmpeak$n))
p2 + geom_bar(stat = "identity",fill = "#3288BD") +
labs(title = "Volume per 1000 Melbourne Cyclists",
y = "PM Peak hour",
x = "Years")+
geom_text(aes(label=round(Bicycle_4_pmpeak$n,2)), vjust = -0.5,size = 3)+
scale_y_continuous(limits = c(0, 1700))##+ geom_smooth(colour = "red")
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
#par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
#par(mar=c(5,8,4,2))
library(cowplot)
plot_grid(p1 + geom_bar(stat = "identity",fill = "white")+
labs(title = "Cycle Traffic for Melbourne, 2005-2013 ",
y = "AM ",
x = " ")+
geom_text(aes(label=round(Bicycle_3_ampeak$n,2)), vjust = -0.573,size = 3)+ #-0.574
scale_y_continuous(limits = c(0, 1700))+ geom_smooth(colour = "blue"),
p2 + geom_bar(stat = "identity",fill = "white") +
labs(title = " ",
y = "PM ",
x = "Count ( per 1000 )")+
geom_text(aes(label=round(Bicycle_4_pmpeak$n,2)), vjust = -2,size = 3)+
scale_y_continuous(limits = c(0, 1700))+ geom_smooth(colour = "red"),
labels = c(" ", " "), nrow = 2, align = "v")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#theme(plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
#par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
percentage_1 <- Bicycle_4_pmpeak / Bicycle_3_ampeak
#head(percentage_1)
percent_2 <- 1- percentage_1$n
#head(percent_2)
percent_3 <- cbind.data.frame(Bicycle_3_ampeak$`Bicycle_2$NB_YEAR`, percent_2)
# head(percent_3)
# str(percent_3)
names(percent_3)[1] <- c("Year")
names(percent_3)[2] <- c("Percentage")
#head(percent_3)
per2<- ggplot(percent_3, aes(x = Year, y = Percentage))
# str(per2)
# plot(per2)
plot_grid(per2 + geom_line(stat = "identity",color = "red")+
labs(title = "Melbourne cyclists who cycle to work\n
but don't cycle home after work",
y = "% of all Melburnian Cyclists ",
x = "Years ")+
geom_text(aes(label=round(Percentage,2)), vjust = -4,size = 3.2))
#geom_smooth(colour = "red")
# p1 + geom_point() + geom_density2d() ## works
# p2 + geom_point() + geom_density2d() ## works