#Question3

setwd("/Users/rosar/Documents/R")
library(readr)
library(readxl)
CarSales1 <- read_xlsx("/Users/rosar/Documents/R/PerfCarSales.xlsx")
library(readr)
library(readxl)
CMHC <- read_xlsx("/Users/rosar/Documents/R/CMHC_Housing_Starts.xlsx")

View(CMHC)

#4.1.2

class(CMHC$REF_DATE)
## [1] "POSIXct" "POSIXt"
CMHC$REF_DATE <- paste(CMHC$REF_DATE, "01", sep="-")

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
CMHC1 <- rename(CMHC, "Date" = "REF_DATE", "Geographic" = "GEO", "Starts" = "VALUE")

#4.1.4

CMHC <- filter(CMHC1, COORDINATE!=16 & COORDINATE!=1 & COORDINATE!=2 & COORDINATE!=10)

View(CMHC)
str(CMHC)
## Classes 'tbl_df', 'tbl' and 'data.frame':    5208 obs. of  5 variables:
##  $ Date      : chr  "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" ...
##  $ Geographic: chr  "Newfoundland and Labrador" "Prince Edward Island" "Nova Scotia" "New Brunswick" ...
##  $ VECTOR    : chr  "v15819049" "v15819050" "v15819051" "v15819052" ...
##  $ COORDINATE: num  3 4 5 6 7 8 9 11 12 13 ...
##  $ Starts    : num  599 239 1741 844 24402 ...

#4.1.5

library(dplyr)

CMHC1 <- mutate(CMHC, 
                Region = factor(case_when(
CMHC$COORDINATE == 3:6 ~ "Atlantic",
CMHC$COORDINATE == 13:15 ~ "Western",
CMHC$COORDINATE == 17:18 ~ "Western",
CMHC$COORDINATE == 7:9 ~ "Quebec",
CMHC$COORDINATE == 11:12 ~ "Ontario")))

View(CMHC1)

#4.1.6

str(CMHC1)
## Classes 'tbl_df', 'tbl' and 'data.frame':    5208 obs. of  6 variables:
##  $ Date      : chr  "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" ...
##  $ Geographic: chr  "Newfoundland and Labrador" "Prince Edward Island" "Nova Scotia" "New Brunswick" ...
##  $ VECTOR    : chr  "v15819049" "v15819050" "v15819051" "v15819052" ...
##  $ COORDINATE: num  3 4 5 6 7 8 9 11 12 13 ...
##  $ Starts    : num  599 239 1741 844 24402 ...
##  $ Region    : Factor w/ 3 levels "Atlantic","Quebec",..: 1 1 1 1 NA NA NA NA NA 3 ...

#4.2.1

View(CarSales1)
  
CarSales <- CarSales1 %>% rename(Sales = FastCarSales)

#4.2.2

CarSales2 <- mutate(CarSales, Population*1.0, Sales*1.0)

CarSales <- CarSales %>% mutate(Rate = ((Sales/Population)*10000))

head(CarSales)
## # A tibble: 6 x 6
##   State      Abb   Region Population Sales  Rate
##   <chr>      <chr> <chr>       <dbl> <dbl> <dbl>
## 1 Alabama    AL    South     4780558   652 1.36 
## 2 Alaska     AK    West       710350    80 1.13 
## 3 Arizona    AZ    West      6392123  1254 1.96 
## 4 Arkansas   AR    South     2916808   404 1.39 
## 5 California CA    West     37254238  9010 2.42 
## 6 Colorado   CO    West      5030018   214 0.425
View(CarSales)

#4.2.3

CarSales <- CarSales %>% select(-Abb)

CarSales <- arrange(CarSales, Rate)

View(CarSales)

#4.2.4

hist(CarSales$Rate,xlab = "Rate",col = "yellow",border = "blue")

#4.2.5

CarSales %>%
  filter(!is.na(Rate)) %>% 
  
  group_by(Region) %>% 
  
  summarise(mean_rate = mean(Rate, na.rm = TRUE))
## # A tibble: 4 x 2
##   Region        mean_rate
##   <chr>             <dbl>
## 1 North Central     0.911
## 2 Northeast         0.979
## 3 South             1.97 
## 4 West              0.778
CarSales %>%
  filter(!is.na(Rate)) %>% 
  
  group_by(Region) %>% 
  
  summarise(sd_rate = sd(Rate, na.rm = TRUE))
## # A tibble: 4 x 2
##   Region        sd_rate
##   <chr>           <dbl>
## 1 North Central   0.678
## 2 Northeast       0.659
## 3 South           1.56 
## 4 West            0.697
CarSales %>%
  filter(!is.na(Rate)) %>% 
  
  group_by(Region) %>% 
  
  summarise(max_rate = max(Rate, na.rm = TRUE))
## # A tibble: 4 x 2
##   Region        max_rate
##   <chr>            <dbl>
## 1 North Central     2.38
## 2 Northeast         2.19
## 3 South             7.50
## 4 West              2.42
CarSales %>%
  filter(!is.na(Population)) %>% 
  
  group_by(Region) %>% 
  
  summarise(mean_population = mean(Population/10^6, na.rm = TRUE))
## # A tibble: 4 x 2
##   Region        mean_population
##   <chr>                   <dbl>
## 1 North Central            5.58
## 2 Northeast                6.15
## 3 South                    6.80
## 4 West                     5.53