library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.3.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(cowplot)
## Warning: package 'cowplot' was built under R version 3.3.3
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library(vcd)
## Warning: package 'vcd' was built under R version 3.3.3
## Loading required package: grid
library(GGally)
## Warning: package 'GGally' was built under R version 3.3.3
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.3.3
library(Hmisc)
## Warning: package 'Hmisc' was built under R version 3.3.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.3.3
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.3.3
## Loading required package: Formula
## Warning: package 'Formula' was built under R version 3.3.3
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     combine, src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
library(lmtest)
## Warning: package 'lmtest' was built under R version 3.3.3
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 3.3.3
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(ggmosaic)
## Warning: package 'ggmosaic' was built under R version 3.3.3
## Loading required package: productplots
## Warning: package 'productplots' was built under R version 3.3.3
## 
## Attaching package: 'productplots'
## The following objects are masked from 'package:vcd':
## 
##     mosaic, spine, tile
## 
## Attaching package: 'ggmosaic'
## The following objects are masked from 'package:productplots':
## 
##     ddecker, hspine, mosaic, prodcalc, spine, vspine
## The following objects are masked from 'package:vcd':
## 
##     mosaic, spine
library(readr)
## Warning: package 'readr' was built under R version 3.3.3
library(cowplot)
library(RColorBrewer)
library(vcd)
library(GGally)

Program: Master of Data Science@RMIT Subject: Data visualisation

Bicycle <- read_csv("C:/RMIT/Data-Visualisation/R/Bicycle.csv")
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   SortDes = col_character(),
##   DS_LOCATION = col_character(),
##   DT_ANALYSIS_SUMMARY = col_character(),
##   DS_HOLIDAY = col_character(),
##   Primary = col_logical(),
##   weekend = col_logical(),
##   Season = col_character(),
##   `Cyclying Season` = col_character(),
##   day = col_character()
## )
## See spec(...) for full column specifications.
Bicyclesum <-Bicycle %>% group_by(SortDes) %>% summarise(count = n())

Bicyclesum$Proportion <-Bicyclesum$count/nrow(Bicycle)
Bicyclesum$Percent <- Bicyclesum$count/nrow(Bicycle)*100
Bicyclesum$SortDes <- Bicyclesum$SortDes %>% factor(levels = Bicyclesum$SortDes[order(-Bicyclesum$count)]) 

Bicyclesum
## # A tibble: 34 x 4
##                                   SortDes count  Proportion   Percent
##                                    <fctr> <int>       <dbl>     <dbl>
##  1 Albert St E Bound Lane, Melbourne City  1056 0.018525341 1.8525341
##  2 Albert St W Bound Lane, Melbourne City  1056 0.018525341 1.8525341
##  3                 Anniversary Trail No.1  2482 0.043541568 4.3541568
##  4             Anniversary Trail No.2 Kew  1980 0.034735014 3.4735014
##  5                  Bay Trail in St Kilda  2496 0.043787169 4.3787169
##  6               Brighton Rd N Bound Lane  1129 0.019805975 1.9805975
##  7               Brighton Rd S Bound Lane  1129 0.019805975 1.9805975
##  8               Cannings Street, Carlton  2314 0.040594355 4.0594355
##  9       Capital City Trail, Princes Hill  2489 0.043664369 4.3664369
## 10          Federation Trail, Hobsons Bay   563 0.009876673 0.9876673
## # ... with 24 more rows
p1 <- ggplot(Bicyclesum, aes(x = SortDes, y = count)) 
p1 + geom_bar(stat = "identity")

d <- ggplot(data = Bicycle,aes(x=reorder( Bicycle$SortDes, Bicycle$SortDes,
                     function(x)+length(x))))
d + geom_bar(stat="count",color="white",fill = "#008040")  + ggtitle(" Bar plot for the bike path (S3613572)")   +coord_flip()+xlab("Bicycle Path")+
   theme_minimal()

ggsave("plot5-1.png", 
       width = 18, height = 12, units = "cm")

Q2

Bicyclesum$SortDes <- factor(Bicyclesum$SortDes, levels = Bicyclesum$SortDes[order(Bicyclesum$count)])

p2 <- ggplot(Bicyclesum, aes( y=Bicyclesum$SortDes, x= Percent))
p2 + geom_point(colour = "dodgerblue3")+ geom_segment(aes(x = 0, y = Bicyclesum$SortDes, xend = Percent,yend=Bicyclesum$SortDes),linetype = 2)+ 
labs(title = "Dot plot for the bike path (S3613572)",
       x = "Percent(%)",
       y = "Bycycle Path")+  geom_text(aes(label=round(Percent,2)), hjust = -.2,size = 3) +
  scale_x_continuous(limits = c(0,6))+
theme_classic()

ggsave("plot5-2.png", 
       width = 18, height = 12, units = "cm")
Bicycle_filter <- Bicycle %>% filter(Bicycle$SortDes == 'Royal Pde S Bound Lane'&Bicycle$weekend == "FALSE")
## Warning: package 'bindrcpp' was built under R version 3.3.3

Q3

d <- ggplot(data = Bicycle_filter,aes(x=Bicycle_filter$SortDes, y = CT_VOLUME_24HOUR, fill = CT_VOLUME_24HOUR  ))
d + geom_boxplot()  + ggtitle(" Box plot of the distribution of CT VOLUME 24HOUR (S3613572)")+ylab("CT VOLUMN 24HOUR")+
  xlab("Weekday traffic of Royal Pde S Bound Lane ")

ggsave("plot5-3.png", 
       width = 18, height = 12, units = "cm")

Q4

ggplot(Bicycle_filter, aes(x = Bicycle_filter$CT_VOLUME_24HOUR)) +
        geom_histogram(bin =20,stat = "bin",colour="floralwhite",fill="deeppink2",alpha = 1/2)+
   ggtitle("Histogram of CT VOLUME 24HOUR (S3613572)")+
  xlab("CT VOLUME 24HOUR ")
## Warning: Ignoring unknown parameters: bin
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("plot5-4.png", 
       width = 18, height = 12, units = "cm")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Q6

ggplot(Bicycle_filter, aes(x = Bicycle_filter$CT_VOLUME_24HOUR)) +
        geom_density(bin =20,stat = "bin",colour="floralwhite",fill="deeppink2",alpha = 1/2)+
   ggtitle("Density of CT VOLUME 24HOUR (S3613572)")+
  xlab("CT VOLUME 24HOUR ")
## Warning: Ignoring unknown parameters: bin
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("plot5-6.png", 
       width = 18, height = 12, units = "cm")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(Bicycle, aes(x = Bicycle$CT_VOLUME_24HOUR)) +
        geom_density(bin =30,stat = "bin",colour="floralwhite",fill="deeppink2",alpha = 1/2)+
   ggtitle("Density of CT VOLUME 24HOUR (S3613572)")+
  xlab("CT VOLUME 24HOUR ")
## Warning: Ignoring unknown parameters: bin
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("plot5-6.1.png", 
       width = 18, height = 12, units = "cm")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Q7

ggplot(Bicycle_filter, aes(Bicycle_filter$CT_VOLUME_24HOUR)) +
  geom_histogram(aes(y = ..density..)) +
  stat_function(fun = dnorm, 
                args = list(mean = mean(Bicycle_filter$CT_VOLUME_24HOUR), sd = sd(Bicycle_filter$CT_VOLUME_24HOUR)), 
                lwd = 2, 
                col = 'red') +ggtitle("Histogram of CT VOLUME 24HOUR (S3613572)")+
  xlab("CT VOLUME 24HOUR ") +theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(Bicycle_filter, aes(Bicycle_filter$CT_VOLUME_24HOUR)) +
  geom_histogram(aes(y = ..density..)) +
  geom_density(fill = "dodgerblue", alpha = 1/2) +
  geom_histogram(colour="white",aes(Bicycle_filter$CT_VOLUME_24HOUR,..density..),
                 alpha = 1/2,bins = 100) +ggtitle("Histogram of CT VOLUME 24HOUR (S3613572)")+
  xlab("CT VOLUME 24HOUR ") +theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("plot5-7.png", 
       width = 18, height = 12, units = "cm")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Q8

p8 <- ggplot(Bicycle_filter, aes(Bicycle_filter$CT_VOLUME_24HOUR)) +
  geom_histogram(aes(y = ..density..)) +
 ggtitle("Histogram of CT VOLUME 24HOUR (S3613572)")+
  xlab("CT VOLUME 24HOUR ") +theme_bw()

p8 <- p8 + geom_density(fill = "dodgerblue", alpha = 1/2) +
  geom_histogram(colour="white",aes(Bicycle_filter$CT_VOLUME_24HOUR,..density..),
                 alpha = 1/2,bins = 100) +
  geom_vline(xintercept= median(Bicycle_filter$CT_VOLUME_24HOUR),size = 1.5) +
  annotate("text",label = "Median",x = 1350, y = 0.0018) +
  geom_vline(xintercept= mean(Bicycle_filter$CT_VOLUME_24HOUR),linetype=2,colour = "red", size = 1.5) + 
  annotate("text",label = "Mean",x = 1100, y = 0.002,colour = "red")
p8
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggsave("plot5-8.png", 
       width = 18, height = 12, units = "cm")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Q9

Bicycle_filter2 <- Bicycle %>% filter(Bicycle$SortDes == 'Royal Pde S Bound Lane'&Bicycle$weekend == "TRUE")
p9<- ggplot(data = Bicycle_filter2,aes(x=Bicycle_filter2$CT_VOLUME_24HOUR))
p9 + geom_dotplot(binwidth = 13) + ggtitle(" Stack dot plot of CT VOLUME 24HOUR on weekend  (S3613572)")+xlab("CT VOLUME 24HOUR ")

ggsave("plot5-9.png", 
       width = 18, height = 12, units = "cm")

Q10

Bicycle_filter2 <- Bicycle %>% filter(Bicycle$SortDes == 'Royal Pde S Bound Lane'&Bicycle$weekend == "TRUE")

p10 <- ggplot(Bicycle_filter2, aes(x = factor(1),y=Bicycle_filter2$CT_VOLUME_24HOUR)) +
  geom_boxplot(binwidth = .5) + scale_y_continuous(limits = c(0, 800))+ylab("CT VOLUME 24HOUR ")+ ggtitle(" Distribution CT VOLUME 24HOUR on weekend  (S3613572)")+xlab("count")
## Warning: Ignoring unknown parameters: binwidth
p9<- ggplot(data = Bicycle_filter2,aes(x=Bicycle_filter2$CT_VOLUME_24HOUR))+geom_dotplot(binwidth = 8)+ scale_x_continuous(limits = c(0, 800)) +xlab("CT VOLUME 24HOUR ")



plot_grid(p10 + coord_flip()+ theme(axis.title.y=element_blank(), 
                                        axis.text.y=element_blank(),
                                        axis.ticks.y = element_blank()), ncol=1, align="v",
          rel_heights = c(1,2),p9 )
## Warning: Removed 6 rows containing non-finite values (stat_boxplot).
## Warning: Removed 6 rows containing non-finite values (stat_bindot).

ggsave("plot5-10.png", 
       width = 18, height = 12, units = "cm")

Q11

Water <- read_csv("C:/RMIT/Data-Visualisation/R/Water_taste.csv")
## Parsed with column specification:
## cols(
##   Gender = col_character(),
##   Age = col_integer(),
##   Class = col_character(),
##   UsuallyDrink = col_character(),
##   FavBotWatBrand = col_character(),
##   Preference = col_character(),
##   First = col_character(),
##   Second = col_character(),
##   Third = col_character(),
##   Fourth = col_character()
## )
Water$First<-ifelse(Water$First=="A","Sam’s Choice",ifelse(Water$First == "B","Aquafina",ifelse(Water$First == "C","Fiji","Tap water")))
p11 <- ggplot(data = Water, aes(x = Water$First, fill = Gender))
p11 + geom_bar(position = "fill")

p11 + geom_bar(position = "dodge")+scale_fill_manual(values=c('pink','#999999'))+xlab("Preference")+ ggtitle("The association between gender and first preference (S3613572)")

ggsave("plot5-11.png", 
       width = 18, height = 12, units = "cm")

Q12

crosstab1<-table(Water$Gender,Water$First, dnn = c("Gender","Preference")) 

prop.table(crosstab1, 1) #Row proportions 
##       Preference
## Gender  Aquafina      Fiji SamÂ’s Choice Tap water
##      F 0.2207792 0.4285714    0.2467532 0.1038961
##      M 0.3125000 0.3437500    0.2187500 0.1250000
prop.table(crosstab1, 2) #Column proportions
##       Preference
## Gender  Aquafina      Fiji SamÂ’s Choice Tap water
##      F 0.6296296 0.7500000    0.7307692 0.6666667
##      M 0.3703704 0.2500000    0.2692308 0.3333333
crosstab1 <- data.frame(prop.table(crosstab1, 1)) #Convert proportion table to df 
str(crosstab1) #Data frame summary
## 'data.frame':    8 obs. of  3 variables:
##  $ Gender    : Factor w/ 2 levels "F","M": 1 2 1 2 1 2 1 2
##  $ Preference: Factor w/ 4 levels "Aquafina","Fiji",..: 1 1 2 2 3 3 4 4
##  $ Freq      : num  0.221 0.312 0.429 0.344 0.247 ...
colnames(crosstab1) <- c("Gender","Preference","Proportion") #Fix variable names
str(crosstab1)
## 'data.frame':    8 obs. of  3 variables:
##  $ Gender    : Factor w/ 2 levels "F","M": 1 2 1 2 1 2 1 2
##  $ Preference: Factor w/ 4 levels "Aquafina","Fiji",..: 1 1 2 2 3 3 4 4
##  $ Proportion: num  0.221 0.312 0.429 0.344 0.247 ...
p12 <- ggplot(data = crosstab1, aes(x = Preference,y = Proportion,fill = Gender))
p12 + geom_bar(stat = "identity",position = "dodge")+scale_fill_manual(values=c('pink','#999999'))+ ggtitle("Proportion association between gender and first preference (S3613572)") 

ggsave("plot5-12.png", 
       width = 18, height = 12, units = "cm")

Q13

Q14

Seaice <- read_csv("C:/RMIT/Data-Visualisation/R/Sea_ice.csv")
## Parsed with column specification:
## cols(
##   Year = col_integer(),
##   Ice = col_double()
## )
p14 <- ggplot(data = Seaice, aes(x = Year, y = Ice))
p14 + geom_point()+ geom_smooth(method = "lm") + geom_smooth(colour = "red")+ ggtitle("Time series plot showing the change in Arctic sea ice across time (S3613572)") 
## `geom_smooth()` using method = 'loess'

ggsave("plot5-14.png", 
       width = 18, height = 12, units = "cm")
## `geom_smooth()` using method = 'loess'

Q15

Shoes <- read_csv("C:/RMIT/Data-Visualisation/R/Shoesize.csv")
## Parsed with column specification:
## cols(
##   Index = col_integer(),
##   Gender = col_character(),
##   Size = col_double(),
##   Height = col_double()
## )

Q16

p16 <- ggplot(data = Shoes, aes(x =Shoes$Height , y = Shoes$Size))
p16 + geom_point() + geom_smooth(method = "lm") + geom_smooth(colour = "red") +geom_rug(alpha = 1/2)+ 
  ggtitle("The relationship between height and shoe size (S3613572)")+xlab("Height")+ylab("Size") 
## `geom_smooth()` using method = 'loess'

ggsave("plot5-16.png", 
       width = 18, height = 12, units = "cm")
## `geom_smooth()` using method = 'loess'

Q18

p18 <- ggplot(data = Shoes, aes(x = Gender, y = Size,fill = Gender))
p18 + geom_boxplot(alpha = .25)+ggtitle("The shoe size between males and females (S3613572)")+theme_bw()

ggsave("plot5-18.png", 
       width = 18, height = 12, units = "cm")

Q19

Shoes_rank <- Shoes %>% group_by(Gender) %>% summarise(med = median(Size))
Shoes$Gender <- Shoes$Gender %>% factor(levels =Shoes_rank$Gender[order(-Shoes_rank$med)])

p19 <- ggplot(data = Shoes, aes(x = Gender, y = Size,fill = Gender))
p19 + geom_boxplot(alpha = .25)+ggtitle("The shoe size between males and females (S3613572)")+theme_bw()+ stat_summary(fun.y = "mean", geom = "point", colour = "red")

ggsave("plot5-19.png", 
       width = 18, height = 12, units = "cm")

Q20

Shoes_rank <- Shoes %>% group_by(Gender) %>% summarise(med = median(Size))
Shoes$Gender <- Shoes$Gender %>% factor(levels =Shoes_rank$Gender[order(-Shoes_rank$med)])

p20 <- ggplot(data = Shoes, aes(x = Gender, y = Size,fill = Gender))
p20 + geom_violin(alpha = .25)+ggtitle("The shoe size between males and females (S3613572)")+theme_bw()+ stat_summary(fun.y = "mean", geom = "point", colour = "red")+
  stat_summary(fun.data = "mean_cl_boot", colour = "red",
               geom = "errorbar", width = .2)

ggsave("plot5-20.png", 
       width = 18, height = 12, units = "cm")