setwd("~/Courses/Exploratory Data Analysis(Udacity)")
getwd()
## [1] "/Users/ahada/Courses/Exploratory Data Analysis(Udacity)"
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## Loading required package: grid
library(reshape2)
library(ggthemes)
theme_set(theme_few(16))

Reading the csv file into a dataframe

Notes:- check.names: logical. If ‘TRUE’ then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names. If necessary they are adjusted (by ‘make.names’) so that they are, and also to ensure that there are no duplicates.

ww_hrs = read.csv("weekly_working_hours.csv", header = T, check.names = FALSE)
colnames(ww_hrs)[1] = "Country"

Subsetting the data to have only complete cases since 1980

Notes:- na.omit() vs. complete.cases()

ww_hrs2 = na.omit(ww_hrs)
ww_hrs2 = data.frame(ww_hrs2, row.names=NULL, check.names = FALSE)
ww_hrs2.melted = melt(ww_hrs2, id = 'Country')
names(ww_hrs2.melted)[names(ww_hrs2.melted) == 'variable'] = 'Year'
names(ww_hrs2.melted)[names(ww_hrs2.melted) == 'value'] = 'Weekly_working_hours'

Lineplot Weekly working hours since 1980 to 2007

Notes:- The value of hjust and vjust are only defined between 0 and 1: 0 means left-justified 1 means right-justified http://stackoverflow.com/questions/7263849/what-do-hjust-and-vjust-do-when-making-a-plot-using-ggplot

iWantHue = c("#93AAC7","#79CD51","#CF5934","#CA4FC8","#4E6639","#C7537E","#84D0AA", "#4F3D5B","#CDB845","#C7A68A","#693929","#8370C7")

ggplot(data = ww_hrs2.melted, aes(x = Year, y = Weekly_working_hours)) +
  geom_line(aes(color = Country, linetype = Country, group = Country), size = 1) +
  ylab('Weekly working hours') +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
  scale_colour_manual(values = iWantHue) +
  scale_linetype_discrete() 

plot of chunk unnamed-chunk-5

Weekly working hours in 2007

ggplot(data = subset(ww_hrs2.melted, Year == 2007), 
       aes(x = Country, y = Weekly_working_hours)) + 
  geom_bar(stat = 'identity', color = 'black', fill = '#56B4E9') +
  scale_y_continuous(breaks = seq(0,36,5)) + 
  coord_flip() +
  ylab('Weekly working hours(2007)') +
  geom_text(aes(label = round(Weekly_working_hours, 1)), hjust = 1) +
  geom_line(stat = 'hline', yintercept = 'mean', linetype="dashed", color = 'red', 
            aes(group = 'Weekly_working_hours'))

plot of chunk unnamed-chunk-6

Weekly working hours in 1980 vs. 2007

ggplot(data = subset(ww_hrs2.melted, Year == 1980 | Year == 2007), 
       aes(x = Country, y = Weekly_working_hours)) +
  geom_bar(color = 'black', aes(fill = Year), position = 'dodge', stat = 'identity') +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  ylab('Weekly working hours')  

plot of chunk unnamed-chunk-7

Variation in weekly working hours

ggplot(data = ww_hrs2.melted, aes(x = Country, y = Weekly_working_hours)) +
  geom_boxplot(aes(fill = Country)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
  scale_fill_manual(values = iWantHue) +
  ylab('Weekly working hours')    

plot of chunk unnamed-chunk-8