library(readr)
Bicycle <- read.csv("C:/Users/dan/Desktop/a Visualization mod 2/Bicycle.csv")
#View(Bicycle)
NB_YEAR: Year data collected NB_MONTH: Month data collected
CT_VOLUME_AMPEAK: Max hour in morning peak CT_VOLUME_PMPEAK: Max hour in evening peak
# Bind up the columns that interest me
Bicycle_1 <- as.data.frame(cbind(Bicycle$NB_MONTH,Bicycle$NB_YEAR, Bicycle$CT_VOLUME_AMPEAK, Bicycle$CT_VOLUME_PMPEAK))
#Bicycle_1 <- as.numeric(Bicycle_1)
#head(Bicycle_1, n=20) ## Show me the first 20 rows
sum(is.na(Bicycle_1)) ## check for any missing values in my new data set
## [1] 0
#str(Bicycle_1) ## Check the str
## Rename the columns
names(Bicycle_1)[1] <- c("NB_MONTH")
names(Bicycle_1)[2] <- c("NB_YEAR")
names(Bicycle_1)[3] <- c("VOLUME_AMPEAK")
names(Bicycle_1)[4] <- c("VOLUME_PMPEAK")
#View(Bicycle_1)
#str(Bicycle_1)
Bicycle_1$NB_MONTH <- as.numeric(Bicycle_1$NB_MONTH) ## Change everything to numeric
Bicycle_1$NB_YEAR <- as.numeric(Bicycle_1$NB_YEAR)
Bicycle_1$VOLUME_AMPEAK <- as.numeric(Bicycle_1$VOLUME_AMPEAK)
Bicycle_1$VOLUME_PMPEAK <- as.numeric(Bicycle_1$VOLUME_PMPEAK)
#str(Bicycle_1) ## Its now all numeric
#View(Bicycle_1)
# Change year and month to ascending
Bicycle_2 <- Bicycle_1[order(Bicycle_1$NB_YEAR, Bicycle_1$NB_MONTH),]
#View(Bicycle_2)
# Sorting it all into years and number per years
Bicycle_3_ampeak <- Bicycle_2 %>% group_by(Bicycle_2$NB_YEAR) %>% tally(VOLUME_AMPEAK)
Bicycle_3_ampeak$n <- Bicycle_3_ampeak$n/1000
#Bicycle_3_ampeak
Bicycle_4_pmpeak <- Bicycle_2 %>% group_by(Bicycle_2$NB_YEAR) %>% tally(VOLUME_PMPEAK)
Bicycle_4_pmpeak$n <- Bicycle_4_pmpeak$n/1000
#View(Bicycle_4_pmpeak)
#display.brewer.all()
#display.brewer.all(11) ## select some colours
#brewer.pal(n = 11, name = "Spectral")
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
p1 <- ggplot(Bicycle_3_ampeak, aes(x = Bicycle_3_ampeak$`Bicycle_2$NB_YEAR`, y = Bicycle_3_ampeak$n))
p1 + geom_bar(stat = "identity",fill = "#5E4FA2")+
labs(title = "Volume per 1000 Melbourne Cyclists ",
y = "AM Peak hour ",
x = "Years")+
geom_text(aes(label=round(Bicycle_3_ampeak$n,2)), vjust = -0.5,size = 3)+
scale_y_continuous(limits = c(0, 1700))##+ geom_smooth(colour = "blue")
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
p2<- ggplot(Bicycle_4_pmpeak, aes(x = Bicycle_4_pmpeak$`Bicycle_2$NB_YEAR`, y = Bicycle_4_pmpeak$n))
p2 + geom_bar(stat = "identity",fill = "#3288BD") +
labs(title = "Volume per 1000 Melbourne Cyclists",
y = "PM Peak hour",
x = "Years")+
geom_text(aes(label=round(Bicycle_4_pmpeak$n,2)), vjust = -0.5,size = 3)+
scale_y_continuous(limits = c(0, 1700))##+ geom_smooth(colour = "red")
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
#par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
#par(mar=c(5,8,4,2))
library(cowplot)
plot_grid(p1 + geom_bar(stat = "identity",fill = "white")+
labs(title = "Cycle Traffic for Melbourne, 2005-2013 ",
y = "AM Peak Hr ",
x = " ")+
geom_text(aes(label=round(Bicycle_3_ampeak$n,2)), vjust = -0.573,size = 3)+ #-0.574
scale_y_continuous(limits = c(0, 1700))+ geom_smooth(colour = "blue"),
p2 + geom_bar(stat = "identity",fill = "white") +
labs(title = " ",
y = "PM Peak Hr ",
x = "Count ( per 1000 )")+
geom_text(aes(label=round(Bicycle_4_pmpeak$n,2)), vjust = -2,size = 3)+
scale_y_continuous(limits = c(0, 1700))+ geom_smooth(colour = "red"),
labels = c(" ", " "), nrow = 2, align = "v")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
#theme(plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))
#par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
par(mfrow = c(1,1)) ## Reset columns to original setting ---------------- KEEP -----------
par(mar=c(5,8,4,2))
percentage_1 <- Bicycle_4_pmpeak / Bicycle_3_ampeak
#head(percentage_1)
percent_2 <- 1- percentage_1$n
#head(percent_2)
percent_3 <- cbind.data.frame(Bicycle_3_ampeak$`Bicycle_2$NB_YEAR`, percent_2)
# head(percent_3)
# str(percent_3)
names(percent_3)[1] <- c("Year")
names(percent_3)[2] <- c("Percentage")
#head(percent_3)
per2<- ggplot(percent_3, aes(x = Year, y = Percentage))
# str(per2)
# plot(per2)
plot_grid(per2 + geom_line(stat = "identity",color = "red")+
labs(title = "Melbourne cyclists %, (1- (pmpeak / ampeak)) , that cycle to work in\n
Peak Hr and don't cycle home after work in Peak Hr",
y = "% of all Melburnian Cyclists in Peak Hr",
x = "Years ")+
geom_text(aes(label=round(Percentage,2)), vjust = -4,size = 3.2))
#geom_smooth(colour = "red")
# p1 + geom_point() + geom_density2d() ## works
# p2 + geom_point() + geom_density2d() ## works