#Question3
setwd("/Users/rosar/Documents/R")
library(readr)
library(readxl)
CarSales1 <- read_xlsx("/Users/rosar/Documents/R/PerfCarSales.xlsx")
library(readr)
library(readxl)
CMHC <- read_xlsx("/Users/rosar/Documents/R/CMHC_Housing_Starts.xlsx")
View(CMHC)
#4.1.2
class(CMHC$REF_DATE)
## [1] "POSIXct" "POSIXt"
CMHC$REF_DATE <- paste(CMHC$REF_DATE, "01", sep="-")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
CMHC1 <- rename(CMHC, "Date" = "REF_DATE", "Geographic" = "GEO", "Starts" = "VALUE")
#4.1.4
CMHC <- filter(CMHC1, COORDINATE!=16 & COORDINATE!=1 & COORDINATE!=2 & COORDINATE!=10)
View(CMHC)
str(CMHC)
## Classes 'tbl_df', 'tbl' and 'data.frame': 5208 obs. of 5 variables:
## $ Date : chr "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" ...
## $ Geographic: chr "Newfoundland and Labrador" "Prince Edward Island" "Nova Scotia" "New Brunswick" ...
## $ VECTOR : chr "v15819049" "v15819050" "v15819051" "v15819052" ...
## $ COORDINATE: num 3 4 5 6 7 8 9 11 12 13 ...
## $ Starts : num 599 239 1741 844 24402 ...
#4.1.5
library(dplyr)
CMHC1 <- mutate(CMHC,
Region = factor(case_when(
CMHC$COORDINATE == 3:6 ~ "Atlantic",
CMHC$COORDINATE == 13:15 ~ "Western",
CMHC$COORDINATE == 17:18 ~ "Western",
CMHC$COORDINATE == 7:9 ~ "Quebec",
CMHC$COORDINATE == 11:12 ~ "Ontario")))
View(CMHC1)
#4.1.6
str(CMHC1)
## Classes 'tbl_df', 'tbl' and 'data.frame': 5208 obs. of 6 variables:
## $ Date : chr "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" "1988-06-01-01" ...
## $ Geographic: chr "Newfoundland and Labrador" "Prince Edward Island" "Nova Scotia" "New Brunswick" ...
## $ VECTOR : chr "v15819049" "v15819050" "v15819051" "v15819052" ...
## $ COORDINATE: num 3 4 5 6 7 8 9 11 12 13 ...
## $ Starts : num 599 239 1741 844 24402 ...
## $ Region : Factor w/ 3 levels "Atlantic","Quebec",..: 1 1 1 1 NA NA NA NA NA 3 ...
#4.2.1
View(CarSales1)
CarSales <- CarSales1 %>% rename(Sales = FastCarSales)
#4.2.2
CarSales2 <- mutate(CarSales, Population*1.0, Sales*1.0)
CarSales <- CarSales %>% mutate(Rate = ((Sales/Population)*10000))
head(CarSales)
## # A tibble: 6 x 6
## State Abb Region Population Sales Rate
## <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 Alabama AL South 4780558 652 1.36
## 2 Alaska AK West 710350 80 1.13
## 3 Arizona AZ West 6392123 1254 1.96
## 4 Arkansas AR South 2916808 404 1.39
## 5 California CA West 37254238 9010 2.42
## 6 Colorado CO West 5030018 214 0.425
View(CarSales)
#4.2.3
CarSales <- CarSales %>% select(-Abb)
CarSales <- arrange(CarSales, Rate)
View(CarSales)
#4.2.4
hist(CarSales$Rate,xlab = "Rate",col = "yellow",border = "blue")
#4.2.5
CarSales %>%
filter(!is.na(Rate)) %>%
group_by(Region) %>%
summarise(mean_rate = mean(Rate, na.rm = TRUE))
## # A tibble: 4 x 2
## Region mean_rate
## <chr> <dbl>
## 1 North Central 0.911
## 2 Northeast 0.979
## 3 South 1.97
## 4 West 0.778
CarSales %>%
filter(!is.na(Rate)) %>%
group_by(Region) %>%
summarise(sd_rate = sd(Rate, na.rm = TRUE))
## # A tibble: 4 x 2
## Region sd_rate
## <chr> <dbl>
## 1 North Central 0.678
## 2 Northeast 0.659
## 3 South 1.56
## 4 West 0.697
CarSales %>%
filter(!is.na(Rate)) %>%
group_by(Region) %>%
summarise(max_rate = max(Rate, na.rm = TRUE))
## # A tibble: 4 x 2
## Region max_rate
## <chr> <dbl>
## 1 North Central 2.38
## 2 Northeast 2.19
## 3 South 7.50
## 4 West 2.42
CarSales %>%
filter(!is.na(Population)) %>%
group_by(Region) %>%
summarise(mean_population = mean(Population/10^6, na.rm = TRUE))
## # A tibble: 4 x 2
## Region mean_population
## <chr> <dbl>
## 1 North Central 5.58
## 2 Northeast 6.15
## 3 South 6.80
## 4 West 5.53