library(readxl)
library(ggplot2)
library(lubridate)

UnemploymentRate <- read_excel("C:/Users/shahi/Desktop/DAT 400/UNRATE.xls")

str(UnemploymentRate) ## Checking the data types for my variables 
## Classes 'tbl_df', 'tbl' and 'data.frame':    860 obs. of  2 variables:
##  $ observation_date: POSIXct, format: "1948-01-01" "1948-02-01" ...
##  $ UNRATE          : num  3.4 3.8 4 3.9 3.5 3.6 3.6 3.9 3.8 3.7 ...
colnames(UnemploymentRate)[colnames(UnemploymentRate)== "observation_date"] <- "Date" ##Renaming a column for easier coding later on

UnemploymentRate$Date <- as.Date(UnemploymentRate$Date)  ## Changed this variable to data type Date




ggplot(data = UnemploymentRate, aes(x = Date, y = UNRATE))+
########This section of code adds the recession shading, has to be before the plot so it does not cover the plotted lines #################################
  geom_rect(aes(xmin = ymd('1948-11-01'),
                xmax = ymd('1949-10-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
  geom_rect(aes(xmin = ymd('1953-07-01'),
                xmax = ymd('1954-05-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
              
    geom_rect(aes(xmin = ymd('1957-08-01'),
                xmax = ymd('1958-04-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1960-04-01'),
                xmax = ymd('1961-02-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1969-12-01'),
                xmax = ymd('1970-11-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1973-11-01'),
                xmax = ymd('1975-03-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1980-02-01'),
                xmax = ymd('1980-07-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1981-07-01'),
                xmax = ymd('1982-11-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1990-07-01'),
                xmax = ymd('1991-03-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('2001-03-01'),
                xmax = ymd('2001-11-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
   geom_rect(aes(xmin = ymd('2008-12-01'),
                xmax = ymd('2009-06-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
  geom_line(color = "blue") +
  ########This Section of code changes the tick start date on the x & y axis ##################################################
  scale_x_date( date_labels = ( "%Y"), breaks= as.Date(c("1950-01-01", "1955-01-01","1960-01-01","1965-01-01","1970-01-01","1975-01-01","1980-01-01","1985-01-01","1990-01-01","1995-01-01","2000-01-01","2005-01-01","2010-01-01","2015-01-01"))) +
scale_y_continuous(breaks = (1:11)) +
  ##### This section of code adds the proper titles #####################################
 labs(
    x = "Date",
    y = "Percent",
    title = "Civilian Unemployment Rate",
    subtitle = "Source: https://fred.stlouisfed.org"
  ) 

library(readxl)
library(ggplot2)
library(lubridate)
library(dplyr)
library(plyr)
library(sqldf)

UnemploymentRate <- read_excel("C:/Users/shahi/Desktop/DAT 400/UNRATE.xls")

str(UnemploymentRate) ## Checking the data types for my variables 
## Classes 'tbl_df', 'tbl' and 'data.frame':    860 obs. of  2 variables:
##  $ observation_date: POSIXct, format: "1948-01-01" "1948-02-01" ...
##  $ UNRATE          : num  3.4 3.8 4 3.9 3.5 3.6 3.6 3.9 3.8 3.7 ...
colnames(UnemploymentRate)[colnames(UnemploymentRate)== "observation_date"] <- "Date" ##Renaming a column for easier coding later on

UnemploymentRate$Date <- as.Date(UnemploymentRate$Date)  ## Changed this variable to data type Date



UnemploymentRate$Year <- format(as.Date(UnemploymentRate$Date, "%d/%m/%Y"), "%Y")

UnemploymentRate$Year <- case_when( 
UnemploymentRate$Year >= 1950 & UnemploymentRate$Year < 1960 ~ "1950-1960",
UnemploymentRate$Year >= 1960 &  UnemploymentRate$Year < 1970 ~ "1960- 1970",
UnemploymentRate$Year >= 1960 &  UnemploymentRate$Year < 1980 ~ "1970 -1980",
UnemploymentRate$Year >= 1980 &  UnemploymentRate$Year < 1990 ~ "1980 - 1990",
UnemploymentRate$Year >= 1990 &  UnemploymentRate$Year < 2000 ~ "1990-2000",
UnemploymentRate$Year >= 2000 & UnemploymentRate$Year < 2010 ~ "2000- 2010",
UnemploymentRate$Year >= "2010" ~ "2010 -2019"
)

NewGroupByTable <- sqldf("SELECT Year, avg(UNRATE) As AVG_RATE, Date

      FROM UnemploymentRate
      GROUP BY Year
      "
)

na.omit(NewGroupByTable$Year)
## [1] "1950-1960"   "1960- 1970"  "1970 -1980"  "1980 - 1990" "1990-2000"  
## [6] "2000- 2010"  "2010 -2019" 
## attr(,"na.action")
## [1] 1
## attr(,"class")
## [1] "omit"
plotlab <- c("1950-1960", "1960-1970", "1970-1980", "1980-1990", "1990-2000","2000-2010", "2010-2019")

ggplot(data = NewGroupByTable, aes(x = Date, y = AVG_RATE ))+
########This section of code adds the recession shading, has to be before the plot so it does not cover the plotted lines #################################
  geom_rect(aes(xmin = ymd('1948-11-01'),
                xmax = ymd('1949-10-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
  geom_rect(aes(xmin = ymd('1953-07-01'),
                xmax = ymd('1954-05-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
              
    geom_rect(aes(xmin = ymd('1957-08-01'),
                xmax = ymd('1958-04-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1960-04-01'),
                xmax = ymd('1961-02-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1969-12-01'),
                xmax = ymd('1970-11-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1973-11-01'),
                xmax = ymd('1975-03-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1980-02-01'),
                xmax = ymd('1980-07-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1981-07-01'),
                xmax = ymd('1982-11-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('1990-07-01'),
                xmax = ymd('1991-03-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
    geom_rect(aes(xmin = ymd('2001-03-01'),
                xmax = ymd('2001-11-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
   geom_rect(aes(xmin = ymd('2008-12-01'),
                xmax = ymd('2009-06-01'),
                ymin= -Inf,
                ymax = Inf), fill = 'grey', alpha = .05)+
  geom_line(color = "blue") +
  
    geom_line(color = "blue") +
  ########This Section of code changes the tick start date on the x & y axis ##################################################
  scale_x_date( date_labels = ( "%Y"), breaks= as.Date(c("1950-01-01", "1960-01-01","1970-01-01","1980-01-01","1990-01-01","2000-01-01","2010-01-01")),
                                                       labels= c(plotlab)) +
scale_y_continuous(breaks = (1:11)) +
  ##### This section of code adds the proper titles #####################################
 labs(
    x = "Date",
    y = "Percent",
    title = "Civilian Unemployment Rate",
    subtitle = "Source: https://fred.stlouisfed.org"
  ) 

library(readxl)
library(ggplot2)
library(lubridate)
library(dplyr)
library(plyr)
library(sqldf)

UnemploymentRate <- read_excel("C:/Users/shahi/Desktop/DAT 400/UNRATE.xls")

str(UnemploymentRate) ## Checking the data types for my variables 
## Classes 'tbl_df', 'tbl' and 'data.frame':    860 obs. of  2 variables:
##  $ observation_date: POSIXct, format: "1948-01-01" "1948-02-01" ...
##  $ UNRATE          : num  3.4 3.8 4 3.9 3.5 3.6 3.6 3.9 3.8 3.7 ...
colnames(UnemploymentRate)[colnames(UnemploymentRate)== "observation_date"] <- "Date" ##Renaming a column for easier coding later on

UnemploymentRate$Date <- as.Date(UnemploymentRate$Date)  ## Changed this variable to data type Date



UnemploymentRate$Year <- format(as.Date(UnemploymentRate$Date, "%d/%m/%Y"), "%Y")

UnemploymentRate$Year <- case_when( 
UnemploymentRate$Year >= 1950 & UnemploymentRate$Year < 1960 ~ "1950s",
UnemploymentRate$Year >= 1960 &  UnemploymentRate$Year < 1970 ~ "1960s",
UnemploymentRate$Year >= 1960 &  UnemploymentRate$Year < 1980 ~ "1970s",
UnemploymentRate$Year >= 1980 &  UnemploymentRate$Year < 1990 ~ "1980s",
UnemploymentRate$Year >= 1990 &  UnemploymentRate$Year < 2000 ~ "1990s",
UnemploymentRate$Year >= 2000 & UnemploymentRate$Year < 2010 ~ "2000s",
UnemploymentRate$Year >= "2010" ~ "2010s"
)

NewGroupByTable <- sqldf("SELECT Year, avg(UNRATE) As AVG_RATE, Date

      FROM UnemploymentRate
      GROUP BY Year
      "
)

NewGroupByTable$Greater<- case_when(
  NewGroupByTable$AVG_RATE > 5 ~"Yes",
  TRUE ~ "No"
)

na.omit(NewGroupByTable)
##    Year AVG_RATE       Date Greater
## 2 1950s 4.511667 1950-01-01      No
## 3 1960s 4.779167 1960-01-01      No
## 4 1970s 6.217500 1970-01-01     Yes
## 5 1980s 7.272500 1980-01-01     Yes
## 6 1990s 5.762500 1990-01-01     Yes
## 7 2000s 5.541667 2000-01-01     Yes
## 8 2010s 6.312069 2010-01-01     Yes
ggplot(na.omit(NewGroupByTable), aes(x = Year , y = AVG_RATE, fill = Greater) )+
  geom_bar(stat= "identity", width = .7)+
  theme_minimal()+
  labs(fill = "Greater Than 5%")+
  labs(
    x = "Decades",
    y ="Unemployment Averages",
    title = "America's Average Unemployment Per Decade",
    subtitle = "Source: https://fred.stlouisfed.org"
  )