library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ tibble 3.1.2 ✓ purrr 0.3.4
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd("~/Desktop/Garage")
AC <- read_csv("AC.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## Location = col_character(),
## `Garage Number` = col_double(),
## `Inside Garage Location` = col_character(),
## Brand = col_character(),
## Unit = col_character(),
## Model = col_character(),
## `Service REF` = col_character(),
## `Serial #` = col_character(),
## `Year of Manufacture` = col_character(),
## `URL link` = col_character(),
## Comment = col_character()
## )
str(AC) # Gets the top few rows.
## spec_tbl_df [44 × 11] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Location : chr [1:44] "Wheaton" "Wheaton" "Wheaton" "Wheaton" ...
## $ Garage Number : num [1:44] 13 13 13 13 45 2 7 3 3 3 ...
## $ Inside Garage Location: chr [1:44] "G4 (Elevator Control Room)" "G4 (Elevator Control Room)" "G4 (Elevator Control Room)" "G4 (Elevator Control Room)" ...
## $ Brand : chr [1:44] "MITSUBISHI" "MITSUBISHI" "MITSUBISHI" "MITSUBISHI" ...
## $ Unit : chr [1:44] "A/C" "A/C" "Vent" "Vent" ...
## $ Model : chr [1:44] "PKA-A24KA7" "PKA-A24KA7" "PUZ-A24NHA7" "PUZ-A24NHA7" ...
## $ Service REF : chr [1:44] "PKA-A24KA7.TH" "PKA-A24KA7.TH" "PUZ-A24NHA7" "PUZ-A24NHA7" ...
## $ Serial # : chr [1:44] "92M01538" "14M02520" "98U14025D" "8ZU09687A" ...
## $ Year of Manufacture : chr [1:44] "2019.02" "2021.04" "2019.08" "2018.12" ...
## $ URL link : chr [1:44] "https://share.icloud.com/photos/054dgV1AC8x2TkvDbNzxapuIA" "https://share.icloud.com/photos/010QBOAk3LIwtu0NN9Ffm8ZtQ" "https://share.icloud.com/photos/07d5inac1WyMcg4lc9L_iq31A" "https://share.icloud.com/photos/050jJQ4jTCOzHpOlyKjhvdScg" ...
## $ Comment : chr [1:44] NA NA NA NA ...
## - attr(*, "spec")=
## .. cols(
## .. Location = col_character(),
## .. `Garage Number` = col_double(),
## .. `Inside Garage Location` = col_character(),
## .. Brand = col_character(),
## .. Unit = col_character(),
## .. Model = col_character(),
## .. `Service REF` = col_character(),
## .. `Serial #` = col_character(),
## .. `Year of Manufacture` = col_character(),
## .. `URL link` = col_character(),
## .. Comment = col_character()
## .. )
Data Cleaning
ACC <- AC %>%
select(Location,
`Garage Number`,
`Inside Garage Location`,
Brand,
Unit,
Model,
`Year of Manufacture`)
names(ACC) <- gsub(" ", "", names(ACC)) # remove spaces
str(ACC)
## tibble [44 × 7] (S3: tbl_df/tbl/data.frame)
## $ Location : chr [1:44] "Wheaton" "Wheaton" "Wheaton" "Wheaton" ...
## $ GarageNumber : num [1:44] 13 13 13 13 45 2 7 3 3 3 ...
## $ InsideGarageLocation: chr [1:44] "G4 (Elevator Control Room)" "G4 (Elevator Control Room)" "G4 (Elevator Control Room)" "G4 (Elevator Control Room)" ...
## $ Brand : chr [1:44] "MITSUBISHI" "MITSUBISHI" "MITSUBISHI" "MITSUBISHI" ...
## $ Unit : chr [1:44] "A/C" "A/C" "Vent" "Vent" ...
## $ Model : chr [1:44] "PKA-A24KA7" "PKA-A24KA7" "PUZ-A24NHA7" "PUZ-A24NHA7" ...
## $ YearofManufacture : chr [1:44] "2019.02" "2021.04" "2019.08" "2018.12" ...
ACC
## # A tibble: 44 x 7
## Location GarageNumber InsideGarageLocat… Brand Unit Model YearofManufactu…
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 Wheaton 13 G4 (Elevator Cont… MITS… A/C PKA-… 2019.02
## 2 Wheaton 13 G4 (Elevator Cont… MITS… A/C PKA-… 2021.04
## 3 Wheaton 13 G4 (Elevator Cont… MITS… Vent PUZ-… 2019.08
## 4 Wheaton 13 G4 (Elevator Cont… MITS… Vent PUZ-… 2018.12
## 5 Wheaton 45 No A/C, Outdoor G… <NA> <NA> <NA> <NA>
## 6 Silver S… 2 G Frie… 2-in-… <NA> <NA>
## 7 Silver S… 7 No A/C, Outdoor G… <NA> <NA> <NA> <NA>
## 8 Silver S… 3 P3 (Next to brown… LG A/C <NA> 2017.04
## 9 Silver S… 3 P3 (Next to brown… LG A/C <NA> N/A
## 10 Silver S… 3 P3 (Next to brown… LG A/C <NA> N/A
## # … with 34 more rows
Show all missing data on top
arrange(ACC, desc(is.na(Unit)))
## # A tibble: 44 x 7
## Location GarageNumber InsideGarageLocat… Brand Unit Model YearofManufactu…
## <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 Wheaton 45 No A/C, Outdoor G… <NA> <NA> <NA> <NA>
## 2 Silver S… 7 No A/C, Outdoor G… <NA> <NA> <NA> <NA>
## 3 Bethesda 31 G (Office) <NA> <NA> <NA> <NA>
## 4 Bethesda 35 No A/C <NA> <NA> <NA> <NA>
## 5 Bethesda 36 No A/C <NA> <NA> <NA> <NA>
## 6 Wheaton 13 G4 (Elevator Cont… MITS… A/C PKA-… 2019.02
## 7 Wheaton 13 G4 (Elevator Cont… MITS… A/C PKA-… 2021.04
## 8 Wheaton 13 G4 (Elevator Cont… MITS… Vent PUZ-… 2019.08
## 9 Wheaton 13 G4 (Elevator Cont… MITS… Vent PUZ-… 2018.12
## 10 Silver S… 2 G Frie… 2-in-… <NA> <NA>
## # … with 34 more rows
Q1. How many unit in each location?
LUNIT <- ACC %>%
group_by(Location, Unit) %>%
select(Location, Unit) %>%
summarise(count = n())
## `summarise()` has grouped output by 'Location'. You can override using the `.groups` argument.
LUNIT
## # A tibble: 12 x 3
## # Groups: Location [3]
## Location Unit count
## <chr> <chr> <int>
## 1 Bethesda 2-in-1 AC/Vent 2
## 2 Bethesda A/C 3
## 3 Bethesda Furnace 1
## 4 Bethesda Vent 8
## 5 Bethesda <NA> 3
## 6 Silver Spring 2-in-1 AC/Vent 2
## 7 Silver Spring A/C 9
## 8 Silver Spring Vent 10
## 9 Silver Spring <NA> 1
## 10 Wheaton A/C 2
## 11 Wheaton Vent 2
## 12 Wheaton <NA> 1
RLUNIT <- LUNIT %>%
drop_na()
RLUNIT
## # A tibble: 9 x 3
## # Groups: Location [3]
## Location Unit count
## <chr> <chr> <int>
## 1 Bethesda 2-in-1 AC/Vent 2
## 2 Bethesda A/C 3
## 3 Bethesda Furnace 1
## 4 Bethesda Vent 8
## 5 Silver Spring 2-in-1 AC/Vent 2
## 6 Silver Spring A/C 9
## 7 Silver Spring Vent 10
## 8 Wheaton A/C 2
## 9 Wheaton Vent 2
RLUNIT %>%
ggplot() +
geom_col(aes(x = reorder(Location, -count), y = count, fill = Unit),
position = "dodge", stat = "identity") +
xlab("Location") +
ylab("Count") +
ggtitle("Number of A/C Unit")
## Warning: Ignoring unknown parameters: stat

WHE <- ACC %>%
filter(Location == "Wheaton") %>%
group_by(GarageNumber, Unit) %>%
summarise(count = n()) %>%
drop_na() # remove missing data
## `summarise()` has grouped output by 'GarageNumber'. You can override using the `.groups` argument.
WHE
## # A tibble: 2 x 3
## # Groups: GarageNumber [1]
## GarageNumber Unit count
## <dbl> <chr> <int>
## 1 13 A/C 2
## 2 13 Vent 2
SIL <- ACC %>%
filter(Location == "Silver Spring") %>%
group_by(GarageNumber, Unit) %>%
summarise(count = n()) %>%
drop_na() # remove missing data
## `summarise()` has grouped output by 'GarageNumber'. You can override using the `.groups` argument.
SIL
## # A tibble: 14 x 3
## # Groups: GarageNumber [9]
## GarageNumber Unit count
## <dbl> <chr> <int>
## 1 2 2-in-1 AC/Vent 1
## 2 3 A/C 3
## 3 3 Vent 1
## 4 4 A/C 1
## 5 4 Vent 1
## 6 5 Vent 1
## 7 9 2-in-1 AC/Vent 1
## 8 9 A/C 3
## 9 9 Vent 2
## 10 16 Vent 1
## 11 58 Vent 1
## 12 60 Vent 1
## 13 61 A/C 2
## 14 61 Vent 2
BET <- ACC %>%
filter(Location == "Bethesda") %>%
group_by(GarageNumber, Unit) %>%
summarise(count = n()) %>%
drop_na() # remove missing data
## `summarise()` has grouped output by 'GarageNumber'. You can override using the `.groups` argument.
BET
## # A tibble: 12 x 3
## # Groups: GarageNumber [7]
## GarageNumber Unit count
## <dbl> <chr> <int>
## 1 11 2-in-1 AC/Vent 1
## 2 11 Furnace 1
## 3 31 Vent 1
## 4 40 2-in-1 AC/Vent 1
## 5 40 A/C 1
## 6 40 Vent 2
## 7 42 Vent 2
## 8 47 A/C 1
## 9 47 Vent 1
## 10 49 Vent 1
## 11 57 A/C 1
## 12 57 Vent 1
WHE %>%
ggplot() +
geom_col(aes(x = reorder(GarageNumber, -count), y = count, fill = Unit),
position = "dodge", stat = "identity") +
xlab("Garage Number") +
ylab("Count") +
ggtitle("Number of A/C Unit in Wheaton") +
coord_flip()
## Warning: Ignoring unknown parameters: stat

SIL %>%
ggplot() +
geom_col(aes(x = reorder(GarageNumber, -count), y = count, fill = Unit),
position = "dodge", stat = "identity") +
xlab("Garage Number") +
ylab("Count") +
ggtitle("Number of A/C Unit in Silver Spring") +
coord_flip()
## Warning: Ignoring unknown parameters: stat

BET %>%
ggplot() +
geom_col(aes(x = reorder(GarageNumber, -count), y = count, fill = Unit),
position = "dodge", stat = "identity") +
xlab("Garage Number") +
ylab("Count") +
ggtitle("Number of A/C Unit in Bethesda") +
coord_flip()
## Warning: Ignoring unknown parameters: stat
