Ex 5 MATH2270 Visualising Open Data

library(readr)
Bicycle <- read.csv("C:/Users/dan/Desktop/a Visualization mod 2/Bicycle.csv")
#View(Bicycle)

Unique_ID: Self-evident NB_TRAFFIC_SURVEY: Survey Number NB_LOCATION_TRAFFIC_SURVEY: Location survey Number Sort Des: Short Description of the location DS_LOCATION: Location Description DT_ANALYSIS_SUMMARY: Date NB_YEAR: Year data collected NB_MONTH: Month data collected NB_WEEKDAY_NONHOL_QTR: Holiday period indication CT_VOLUME_AMPEAK: Max hour in morning peak CT_VOLUME_PMPEAK: Max hour in evening peak CT_VOLUME_4HOUR_OFFPEAK: 4 hour off peak volume (12:00 to 4:00 PM) CT_VOLUME_12HOUR: 12 hour volume (7:00 AM to 7:00 PM) CT_VOLUME_24HOUR: 24 hour volume DS_HOLIDAY: Holiday description NB_SEASONALITY_PERIOD: Seasonality period indication (1 to 27) NB_TYPE_PERIOD: Seasonality period type indication (1 to 3) Primary: Primary site indication (True / False) weekend: Weekend indication (True / False) Quarter: Number quarter (1 to 4) Season: Weather season Cyclying: Season Cycling season day: Day of the week

# Bind up the columns that interest me

Bicycle_1  <- as.data.frame(cbind(Bicycle$NB_MONTH,Bicycle$NB_YEAR, Bicycle$CT_VOLUME_AMPEAK, Bicycle$CT_VOLUME_PMPEAK))

#Bicycle_1 <- as.numeric(Bicycle_1)

#head(Bicycle_1, n=20)  ## Show me the first 20 rows

sum(is.na(Bicycle_1))   ## check for any missing values in my new data set

## [1] 0

#str(Bicycle_1)         ## Check the str

                                              ## Rename the columns
names(Bicycle_1)[1] <- c("NB_MONTH")     
names(Bicycle_1)[2] <- c("NB_YEAR")
names(Bicycle_1)[3] <- c("VOLUME_AMPEAK")
names(Bicycle_1)[4] <- c("VOLUME_PMPEAK")

#View(Bicycle_1)
#str(Bicycle_1)

Bicycle_1$NB_MONTH <- as.numeric(Bicycle_1$NB_MONTH)  ## Change everything to numeric
Bicycle_1$NB_YEAR  <- as.numeric(Bicycle_1$NB_YEAR)
Bicycle_1$VOLUME_AMPEAK  <- as.numeric(Bicycle_1$VOLUME_AMPEAK)
Bicycle_1$VOLUME_PMPEAK  <- as.numeric(Bicycle_1$VOLUME_PMPEAK)

#str(Bicycle_1)  ##  Its now all numeric
#View(Bicycle_1)

# Change year and month to ascending
Bicycle_2 <- Bicycle_1[order(Bicycle_1$NB_YEAR, Bicycle_1$NB_MONTH),]
#View(Bicycle_2)

# Sorting it all into years and number per years

Bicycle_3_ampeak <-  Bicycle_2 %>%  group_by(Bicycle_2$NB_YEAR) %>% tally(VOLUME_AMPEAK)
Bicycle_3_ampeak$n <-  Bicycle_3_ampeak$n/1000
#Bicycle_3_ampeak
Bicycle_4_pmpeak <-  Bicycle_2 %>%  group_by(Bicycle_2$NB_YEAR) %>% tally(VOLUME_PMPEAK)
Bicycle_4_pmpeak$n <-  Bicycle_4_pmpeak$n/1000
#View(Bicycle_4_pmpeak)

#display.brewer.all()
#display.brewer.all(11)    ##  select some colours
#brewer.pal(n = 11, name = "Spectral")

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2)) 

p1 <- ggplot(Bicycle_3_ampeak, aes(x = Bicycle_3_ampeak$`Bicycle_2$NB_YEAR`, y = Bicycle_3_ampeak$n)) 
p1 + geom_bar(stat = "identity",fill = "#5E4FA2")+
  labs(title = "Volume per 1000  Melbourne Cyclists ",
       y = "AM Peak hour ",
       x = "Years")+
  geom_text(aes(label=round(Bicycle_3_ampeak$n,2)), vjust = -0.5,size = 3)+ 
  scale_y_continuous(limits = c(0, 1700))##+ geom_smooth(colour = "blue")

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

p2<- ggplot(Bicycle_4_pmpeak, aes(x = Bicycle_4_pmpeak$`Bicycle_2$NB_YEAR`, y = Bicycle_4_pmpeak$n)) 
p2 + geom_bar(stat = "identity",fill = "#3288BD") +
  labs(title = "Volume per 1000  Melbourne Cyclists",
       y = "PM Peak hour",
       x = "Years")+
  geom_text(aes(label=round(Bicycle_4_pmpeak$n,2)), vjust = -0.5,size = 3)+ 
  scale_y_continuous(limits = c(0, 1700))##+ geom_smooth(colour = "red")

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

#par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
#par(mar=c(5,8,4,2))

library(cowplot)

plot_grid(p1 + geom_bar(stat = "identity",fill = "white")+
            labs(title = "Cycle Traffic for Melbourne, 2005-2013 ",
                 y = "AM ",
                 x = " ")+
            geom_text(aes(label=round(Bicycle_3_ampeak$n,2)), vjust = -0.573,size = 3)+ #-0.574
            scale_y_continuous(limits = c(0, 1700))+ geom_smooth(colour = "blue"), 
          p2 + geom_bar(stat = "identity",fill = "white") +
            labs(title = " ",
                 y = "PM ",
                 x = "Count ( per 1000 )")+
            geom_text(aes(label=round(Bicycle_4_pmpeak$n,2)), vjust = -2,size = 3)+ 
            scale_y_continuous(limits = c(0, 1700))+ geom_smooth(colour = "red"), 
          labels = c(" ", " "), nrow = 2, align = "v")

## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

          #theme(plot.margin = unit(c(0.2,0.2,0.2,0.2), "cm"))

#par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

I dont like the this, I’ve tried to lift the AM numbers up a little, but the numbers are still obstructed, similarity the PM has an obstructed value . Option 2 was combine the two into one visual but because the lines are similar , nope not a good option. Maybe lower the AM graph closer to the PM ? then lift the numbers ?

I’ve added this in as I find this more interesting, according to this , some work places will have a growing number of cycles, or is it that the City provided cycles are continually relocated to the main areas people might cycle from ?

par(mfrow = c(1,1))   ##  Reset columns to original setting   ----------------   KEEP    -----------
par(mar=c(5,8,4,2))

percentage_1 <-  Bicycle_4_pmpeak / Bicycle_3_ampeak
#head(percentage_1)

percent_2 <- 1- percentage_1$n
#head(percent_2)

percent_3 <- cbind.data.frame(Bicycle_3_ampeak$`Bicycle_2$NB_YEAR`, percent_2)
# head(percent_3)
# str(percent_3)

names(percent_3)[1] <- c("Year")
names(percent_3)[2] <- c("Percentage")
#head(percent_3)

per2<- ggplot(percent_3, aes(x = Year, y = Percentage)) 
# str(per2)
# plot(per2)

plot_grid(per2 + geom_line(stat = "identity",color = "red")+
            labs(title = "Melbourne cyclists who cycle to work\n 
  but don't cycle home after work",
                 y = "%  of  all  Melburnian  Cyclists ",
                 x = "Years ")+
            geom_text(aes(label=round(Percentage,2)), vjust = -4,size = 3.2))

            #geom_smooth(colour = "red")

# p1 + geom_point() + geom_density2d()    ## works

# p2 + geom_point() + geom_density2d()    ## works

Ex 5 MATH2270 Visualising Open Data

s3686502 Dan Enoka

26 August 2018

I’ve added this in as I find this more interesting, according to this , some work places will have a growing number of cycles, or is it that the City provided cycles are continually relocated to the main areas people might cycle from ?