setwd("~/Courses/Exploratory Data Analysis(Udacity)")
getwd()
## [1] "/Users/ahada/Courses/Exploratory Data Analysis(Udacity)"
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gridExtra)
## Loading required package: grid
library(reshape2)
library(ggthemes)
theme_set(theme_few(16))
Notes:- check.names: logical. If ‘TRUE’ then the names of the variables in the data frame are checked to ensure that they are syntactically valid variable names. If necessary they are adjusted (by ‘make.names’) so that they are, and also to ensure that there are no duplicates.
ww_hrs = read.csv("weekly_working_hours.csv", header = T, check.names = FALSE)
colnames(ww_hrs)[1] = "Country"
Notes:- na.omit() vs. complete.cases()
ww_hrs2 = na.omit(ww_hrs)
ww_hrs2 = data.frame(ww_hrs2, row.names=NULL, check.names = FALSE)
ww_hrs2.melted = melt(ww_hrs2, id = 'Country')
names(ww_hrs2.melted)[names(ww_hrs2.melted) == 'variable'] = 'Year'
names(ww_hrs2.melted)[names(ww_hrs2.melted) == 'value'] = 'Weekly_working_hours'
Notes:- The value of hjust and vjust are only defined between 0 and 1: 0 means left-justified 1 means right-justified http://stackoverflow.com/questions/7263849/what-do-hjust-and-vjust-do-when-making-a-plot-using-ggplot
iWantHue = c("#93AAC7","#79CD51","#CF5934","#CA4FC8","#4E6639","#C7537E","#84D0AA", "#4F3D5B","#CDB845","#C7A68A","#693929","#8370C7")
ggplot(data = ww_hrs2.melted, aes(x = Year, y = Weekly_working_hours)) +
geom_line(aes(color = Country, linetype = Country, group = Country), size = 1) +
ylab('Weekly working hours') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
scale_colour_manual(values = iWantHue) +
scale_linetype_discrete()
ggplot(data = subset(ww_hrs2.melted, Year == 2007),
aes(x = Country, y = Weekly_working_hours)) +
geom_bar(stat = 'identity', color = 'black', fill = '#56B4E9') +
scale_y_continuous(breaks = seq(0,36,5)) +
coord_flip() +
ylab('Weekly working hours(2007)') +
geom_text(aes(label = round(Weekly_working_hours, 1)), hjust = 1) +
geom_line(stat = 'hline', yintercept = 'mean', linetype="dashed", color = 'red',
aes(group = 'Weekly_working_hours'))
ggplot(data = subset(ww_hrs2.melted, Year == 1980 | Year == 2007),
aes(x = Country, y = Weekly_working_hours)) +
geom_bar(color = 'black', aes(fill = Year), position = 'dodge', stat = 'identity') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
ylab('Weekly working hours')
ggplot(data = ww_hrs2.melted, aes(x = Country, y = Weekly_working_hours)) +
geom_boxplot(aes(fill = Country)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
scale_fill_manual(values = iWantHue) +
ylab('Weekly working hours')