Data Load

#getwd()

raw_df <- read.csv("./Data/Unemployment_rate.csv", header = T)

head(raw_df, 2)
##   By.gender    By.age.group X1999..06 X1999..07 X1999..08 X1999..09
## 1     Total           Total       6.7       6.7       6.2       5.2
## 2     Total 15-19 Years old      22.8      23.9      18.1      13.7
##   X1999..10 X1999..11 X1999..12 X2000..01 X2000..02 X2000..03 X2000..04
## 1       5.0       4.8       5.2       5.7       5.7       5.1       4.5
## 2      13.1      13.8      19.0      19.7      15.0      15.7      14.8
##   X2000..05 X2000..06 X2000..07 X2000..08 X2000..09 X2000..10 X2000..11
## 1       4.1       3.9       4.0       4.1       4.0       3.7       3.9
## 2      12.5      13.1      16.1      14.0      12.9      11.0      10.6
##   X2000..12 X2001..01 X2001..02 X2001..03 X2001..04 X2001..05 X2001..06
## 1       4.4       5.1       5.5       5.1       4.1       3.7       3.6
## 2      16.7      16.3      18.9      19.5      15.3      12.4      14.4
##   X2001..07 X2001..08 X2001..09 X2001..10 X2001..11 X2001..12 X2002..01
## 1       3.6       3.6       3.3       3.3       3.4       3.7       4.1
## 2      13.9      10.9      12.4      10.6      10.1      15.6      14.7
##   X2002..02 X2002..03 X2002..04 X2002..05 X2002..06 X2002..07 X2002..08
## 1       4.1       3.8       3.4       3.1       2.9       3.0       3.2
## 2      13.4      13.9      12.9       9.4      10.8      12.5      10.1
##   X2002..09 X2002..10 X2002..11 X2002..12 X2003..01 X2003..02 X2003..03
## 1       2.7       2.8       2.9       3.1       3.7       3.8       3.7
## 2       9.0       9.4      11.7      13.9      13.4      12.2      12.1
##   X2003..04 X2003..05 X2003..06 X2003..07 X2003..08 X2003..09 X2003..10
## 1       3.5       3.4       3.4       3.6       3.5       3.3       3.4
## 2       9.9       9.5      15.2      16.0       9.7      10.5      12.8
##   X2003..11 X2003..12 X2004..01 X2004..02 X2004..03 X2004..04 X2004..05
## 1       3.5       3.8       4.0       4.1         4       3.7       3.5
## 2      15.7      16.7      16.9      14.7        17      12.8      10.8
##   X2004..06 X2004..07 X2004..08 X2004..09 X2004..10 X2004..11 X2004..12
## 1       3.4       3.6       3.6       3.3       3.4       3.5       3.8
## 2      18.0      15.5      10.1       8.4      10.3      11.7      19.0
##   X2005..01 X2005..02 X2005..03 X2005..04 X2005..05 X2005..06 X2005..07
## 1       4.3       4.3       4.1       3.8       3.6       3.6       3.7
## 2      19.6      13.4      14.5      10.4      10.1      14.2      14.2
##   X2005..08 X2005..09 X2005..10 X2005..11 X2005..12 X2006..01 X2006..02
## 1       3.6       3.6       3.6       3.3       3.5       3.8       4.1
## 2       7.6       7.4       7.6       7.7      15.1      12.4      11.2
##   X2006..03 X2006..04 X2006..05 X2006..06 X2006..07 X2006..08 X2006..09
## 1       3.9       3.5       3.2       3.4       3.4       3.4       3.2
## 2       8.5       7.0       6.5      13.4      10.6       5.9       8.5
##   X2006..10 X2006..11 X2006..12 X2007..01 X2007..02 X2007..03 X2007..04
## 1       3.3       3.2       3.3       3.6       3.7       3.5       3.4
## 2      13.7      11.3      15.7       9.4      10.4       9.1       8.0
##   X2007..05 X2007..06 X2007..07 X2007..08 X2007..09 X2007..10 X2007..11
## 1       3.2       3.2       3.3       3.1       3.0       3.0       3.0
## 2       6.2       8.3      13.5       9.4       7.5       7.2       8.4
##   X2007..12 X2008..01 X2008..02 X2008..03 X2008..04 X2008..05 X2008..06
## 1       3.1       3.3       3.5       3.4       3.2       3.1       3.1
## 2      11.6      11.2       7.3       5.9       7.8       5.7      15.8
##   X2008..07 X2008..08 X2008..09 X2008..10 X2008..11 X2008..12 X2009..01
## 1       3.1       3.1       3.0       3.0       3.0       3.3       3.6
## 2      12.4       8.6       8.7       6.2      11.6      19.0      16.7
##   X2009..02 X2009..03 X2009..04 X2009..05 X2009..06 X2009..07 X2009..08
## 1       3.9       4.0       3.8       3.8       3.8       3.7       3.7
## 2      11.6      12.1       9.6       7.7      19.1      15.9       9.6
##   X2009..09 X2009..10 X2009..11 X2009..12 X2010..01 X2010..02 X2010..03
## 1       3.3       3.2       3.3       3.5       5.0       4.8       4.1
## 2       5.0       8.0      13.5      13.6      18.7      12.6      12.2
##   X2010..04 X2010..05 X2010..06 X2010..07 X2010..08 X2010..09 X2010..10
## 1       3.7       3.2       3.5       3.7       3.3       3.4       3.3
## 2      12.3       7.9      20.3      11.6       5.7       7.1       5.9
##   X2010..11 X2010..12 X2011..01 X2011..02 X2011..03 X2011..04 X2011..05
## 1       3.0       3.5       3.8       4.5       4.3       3.7       3.2
## 2       6.7      17.1      14.6      11.5      11.4       8.5       5.8
##   X2011..06 X2011..07 X2011..08 X2011..09 X2011..10 X2011..11 X2011..12
## 1       3.3       3.3       3.0       3.0       2.9       2.9       3.0
## 2      12.5      16.1       7.7       8.1       7.8       7.8      12.6
##   X2012..01 X2012..02 X2012..03 X2012..04 X2012..05 X2012..06 X2012..07
## 1       3.5       4.2       3.7       3.5       3.1       3.2       3.1
## 2      12.7       9.1       9.4       6.5       6.0       9.6      11.0
##   X2012..08 X2012..09 X2012..10 X2012..11 X2012..12 X2013..01 X2013..02
## 1       3.0       2.9       2.8       2.8       2.9       3.4       3.9
## 2       5.8       8.8       6.7       7.8      11.3      10.2      10.0
##   X2013..03 X2013..04 X2013..05 X2013..06 X2013..07 X2013..08 X2013..09
## 1       3.5       3.2       3.0       3.1       3.1       3.0       2.7
## 2       8.6      13.2      10.2      10.2      14.8       8.1       6.6
##   X2013..10 X2013..11 X2013..12 X2014..01 X2014..02 X2014..03 X2014..04
## 1       2.7       2.6       3.0       3.4       4.5       3.9       3.8
## 2       6.5       7.3      15.3      16.6      10.5       7.3       8.1
##   X2014..05 X2014..06 X2014..07 X2014..08 X2014..09 X2014..10 X2014..11
## 1       3.5       3.5       3.4       3.3       3.1       3.2       3.0
## 2       5.2       8.7      10.7       7.5       6.1       6.2       6.8
##   X2014..12 X2015..01 X2015..02 X2015..03 X2015..04 X2015..05 X2015..06
## 1       3.3       3.7       4.5       4.0       3.9       3.7       3.8
## 2      13.5      10.7      11.6      11.1       9.0       6.7      15.3
##   X2015..07 X2015..08 X2015..09 X2015..10 X2015..11 X2015..12 X2016..01
## 1       3.6       3.4       3.2       3.1       3.0       3.2       3.7
## 2      14.9      10.1       9.1       8.1       7.3      11.1      10.2
##   X2016..02 X2016..03 X2016..04 X2016..05 X2016..06 X2016..07 X2016..08
## 1       4.9       4.2       3.9       3.6       3.6       3.5       3.6
## 2      13.0      14.6      11.7      10.7       9.7       8.3       8.2
##   X2016..09 X2016..10 X2016..11 X2016..12 X2017..01 X2017..02 X2017..03
## 1       3.5       3.3       3.1       3.2       3.7       4.9       4.1
## 2       7.6       9.2       7.6       8.7       9.9      10.0      11.3
##   X2017..04 X2017..05 X2017..06 X2017..07 X2017..08 X2017..09 X2017..10
## 1       4.2       3.6       3.8       3.4       3.6       3.3       3.2
## 2      10.0       5.8       9.2      10.3       9.8       6.3       6.3
##   X2017..11 X2017..12 X2018..01 X2018..02
## 1       3.1       3.3       3.7       4.6
## 2       5.7       7.1      11.1      11.7
#names(raw_df)

Data Cleaning

library(tidyr)
library(reshape2)

#reshape: wide to long using by melt

df_m <- melt(raw_df, id.vars = c("By.gender", "By.age.group"))
head(df_m)
##   By.gender    By.age.group  variable value
## 1     Total           Total X1999..06   6.7
## 2     Total 15-19 Years old X1999..06  22.8
## 3     Total 20-29 Years old X1999..06  10.3
## 4     Total 30-39 Years old X1999..06   5.8
## 5     Total 40-49 Years old X1999..06   5.6
## 6     Total 50-59 Yeras old X1999..06   5.7
df_m$year <- substr(df_m$variable, 2, 5)
df_m$month <- substr(df_m$variable, 8, 9)
df_m$yr_mos <- paste(df_m$year, df_m$month, sep = "-")
df_m$yr_mos <- as.Date(paste(df_m$yr_mos, "-01", sep = ""))
head(df_m)                       
##   By.gender    By.age.group  variable value year month     yr_mos
## 1     Total           Total X1999..06   6.7 1999    06 1999-06-01
## 2     Total 15-19 Years old X1999..06  22.8 1999    06 1999-06-01
## 3     Total 20-29 Years old X1999..06  10.3 1999    06 1999-06-01
## 4     Total 30-39 Years old X1999..06   5.8 1999    06 1999-06-01
## 5     Total 40-49 Years old X1999..06   5.6 1999    06 1999-06-01
## 6     Total 50-59 Yeras old X1999..06   5.7 1999    06 1999-06-01
df_select <- df_m[, c("By.gender", "By.age.group","yr_mos", "value")]
summary(df_select)
##   By.gender             By.age.group      yr_mos          
##  Female:2250   15-19 Years old: 675   Min.   :1999-06-01  
##  Male  :2250   15-24 Years old: 675   1st Qu.:2004-02-01  
##  Total :2250   15-29 Years old: 675   Median :2008-10-01  
##                15-64 Years old: 675   Mean   :2008-09-30  
##                20-29 Years old: 675   3rd Qu.:2013-06-01  
##                30-39 Years old: 675   Max.   :2018-02-01  
##                (Other)        :2700                       
##      value       
##  Min.   : 0.200  
##  1st Qu.: 2.600  
##  Median : 3.800  
##  Mean   : 5.466  
##  3rd Qu.: 8.200  
##  Max.   :27.700  
## 
tail(df_select)
##      By.gender          By.age.group     yr_mos value
## 6745    Female       40-49 Years old 2018-02-01   2.7
## 6746    Female       50-59 Yeras old 2018-02-01   2.0
## 6747    Female 60 Years old and over 2018-02-01   9.9
## 6748    Female       15-64 Years old 2018-02-01   4.1
## 6749    Female       15-24 Years old 2018-02-01  11.8
## 6750    Female       15-29 Years old 2018-02-01   9.6
#remove NA
df_cleaned <- df_select[complete.cases(df_select),]
colnames(df_cleaned) <- c("Gender", "Age", "Yr_Month", "Value")

#levels(df_cleaned$Gender)[1] : when change the item
#df_cleaned$new_age <- gsub("Years old", "Years", df_cleaned$Age)

df_10s <- df_cleaned[which(df_cleaned$Age == "15-19 Years old"),]
df_20s <- df_cleaned[which(df_cleaned$Age == "20-29 Years old"),]
df_30s <- df_cleaned[which(df_cleaned$Age == "30-39 Years old"),]
df_40s <- df_cleaned[which(df_cleaned$Age == "40-49 Years old"),]
df_50s <- df_cleaned[which(df_cleaned$Age == "50-59 Yeras old"),]

Data Visualization

library(ggplot2)
plotF <- function(df, title) {
    ggplot(df, aes(x = Yr_Month, y = Value, group = Gender, color = Gender)) +
        geom_point() +
        geom_line() +
        ggtitle(title)
}
plotF(df_10s," 15-19")

plotF(df_20s," 20-29")

plotF(df_30s," 30-39")

plotF(df_40s," 40-49")

plotF(df_50s," 50-59")