Dataset source: https://github.com/RMHogervorst/unicorns_on_unicycles

This is succeeding part of an exercise: Data Cleaning & Data Wrangling exercise available at http://rpubs.com/drArvind/369269 on the dataset unicorns_on_unicycles mentioned above. Here we’ll just plot some enhanced scatterplots for main focus of the exercise i.e. relation between unicorn population & sale of unicycles in Austria, France, Germany, Netherlands and Switzerland.

The Dataset:

library(readxl)
observations <- read_excel("~/public datset/githubs/unicorns_on_unicycles-master/observations.xlsx")
View(observations)

sales <- read_excel("~/public datset/githubs/unicorns_on_unicycles-master/sales.xlsx")
View(sales)

obs<-data.frame(country<-observations$countryname,year<-observations$year,pop<-observations$pop,bikes<-sales$bikes,turnover<-sales$total_turnover)
View(obs)
names(obs)
## [1] "country....observations.countryname"
## [2] "year....observations.year"          
## [3] "pop....observations.pop"            
## [4] "bikes....sales.bikes"               
## [5] "turnover....sales.total_turnover"
names(obs)<-substr(names(obs),1,c(7,4,3,5,8))
names(obs)
## [1] "country"  "year"     "pop"      "bikes"    "turnover"
View(obs)

bikes & pop in Austria:

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
opts = levels((obs$country))
colr=rainbow(length(opts))
par(fig=c(0,0.8,0,0.8), new=TRUE, pch=16, col=colr[1]) # plotting symbol and color
## Warning in par(fig = c(0, 0.8, 0, 0.8), new = TRUE, pch = 16, col =
## colr[1]): calling par(new=TRUE) with no plot
obs%>%filter(country==opts[1])%>%select(pop,bikes)%>%plot
par(fig=c(0,0.8,0.55,1), new=TRUE)
obs%>%filter(country==opts[1])%>%select(pop)%>%boxplot(horizontal=TRUE, axes=FALSE)
par(fig=c(0.65,1,0,0.8),new=TRUE)
obs%>%filter(country==opts[1])%>%select(bikes)%>%boxplot(axes=FALSE)
mtext(paste("country =",opts[1]), side=3, outer=TRUE, line=-3,col = "black")

bikes & pop in France:

library(dplyr)
opts = levels((obs$country))
colr=rainbow(length(opts))
par(fig=c(0,0.8,0,0.8), new=TRUE, pch=16, col=colr[2]) # plotting symbol and color
## Warning in par(fig = c(0, 0.8, 0, 0.8), new = TRUE, pch = 16, col =
## colr[2]): calling par(new=TRUE) with no plot
obs%>%filter(country==opts[2])%>%select(pop,bikes)%>%plot
par(fig=c(0,0.8,0.55,1), new=TRUE)
obs%>%filter(country==opts[2])%>%select(pop)%>%boxplot(horizontal=TRUE, axes=FALSE)
par(fig=c(0.65,1,0,0.8),new=TRUE)
obs%>%filter(country==opts[2])%>%select(bikes)%>%boxplot(axes=FALSE)
mtext(paste("country =",opts[2]), side=3, outer=TRUE, line=-3,col = "black")

bikes & pop in Germany:

library(dplyr)
opts = levels((obs$country))
colr=rainbow(length(opts))
par(fig=c(0,0.8,0,0.8), new=TRUE, pch=16, col=colr[3]) # plotting symbol and color
## Warning in par(fig = c(0, 0.8, 0, 0.8), new = TRUE, pch = 16, col =
## colr[3]): calling par(new=TRUE) with no plot
obs%>%filter(country==opts[3])%>%select(pop,bikes)%>%plot
par(fig=c(0,0.8,0.55,1), new=TRUE)
obs%>%filter(country==opts[3])%>%select(pop)%>%boxplot(horizontal=TRUE, axes=FALSE)
par(fig=c(0.65,1,0,0.8),new=TRUE)
obs%>%filter(country==opts[3])%>%select(bikes)%>%boxplot(axes=FALSE)
mtext(paste("country =",opts[3]), side=3, outer=TRUE, line=-3,col = "black")

bikes & pop in Netherlands:

library(dplyr)
opts = levels((obs$country))
colr=rainbow(length(opts))
par(fig=c(0,0.8,0,0.8), new=TRUE, pch=16, col=colr[4]) # plotting symbol and color
## Warning in par(fig = c(0, 0.8, 0, 0.8), new = TRUE, pch = 16, col =
## colr[4]): calling par(new=TRUE) with no plot
obs%>%filter(country==opts[4])%>%select(pop,bikes)%>%plot
par(fig=c(0,0.8,0.55,1), new=TRUE)
obs%>%filter(country==opts[4])%>%select(pop)%>%boxplot(horizontal=TRUE, axes=FALSE)
par(fig=c(0.65,1,0,0.8),new=TRUE)
obs%>%filter(country==opts[4])%>%select(bikes)%>%boxplot(axes=FALSE)
mtext(paste("country =",opts[4]), side=3, outer=TRUE, line=-3,col = "black")

bikes & pop in Switzerland:

library(dplyr)
opts = levels((obs$country))
colr=rainbow(length(opts))
par(fig=c(0,0.8,0,0.8), new=TRUE, pch=16, col=colr[5]) # plotting symbol and color
## Warning in par(fig = c(0, 0.8, 0, 0.8), new = TRUE, pch = 16, col =
## colr[5]): calling par(new=TRUE) with no plot
obs%>%filter(country==opts[5])%>%select(pop,bikes)%>%plot
par(fig=c(0,0.8,0.55,1), new=TRUE)
obs%>%filter(country==opts[5])%>%select(pop)%>%boxplot(horizontal=TRUE, axes=FALSE)
par(fig=c(0.65,1,0,0.8),new=TRUE)
obs%>%filter(country==opts[5])%>%select(bikes)%>%boxplot(axes=FALSE)
mtext(paste("country =",opts[5]), side=3, outer=TRUE, line=-3,col = "black")

note:

I have not excluded obs for year 1670, which is available only for Austria, because data is already very scanty and don’t want to loose any information. More over emphasis is on finding relationship between the two variables and not on comparisions between the countries.

table(obs$year)
## 
## 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 
##    1    3    3    4    4    5    5    5    5    3    4

Conclusion:

The data from all the five countries suggests a relation between unicorn populations and unicycles in 1670s.

                                 Thank you!