Load Data

data <- read.csv("BuildingPermits2019_2024.csv", stringsAsFactors = FALSE)
head(data)
##      RECORD.ID                           ADDR.FULL.LINE.
## 1 BB-201900001     1138 HARWELL ST NW, ATLANTA, GA 30314
## 2 BB-201900002           791 WYLIE ST, ATLANTA, GA 30316
## 3 BB-201900003     170 HAYGOOD AVE SE, ATLANTA, GA 30315
## 4 BB-201900004   701 EAST SIDE AVE SE, ATLANTA, GA 30316
## 5 BB-201900005 3571 TUXEDO PARK DR NW, ATLANTA, GA 30305
## 6 BB-201900006    200 PEACHTREE ST NW, ATLANTA, GA 30303
##                        RECORD.TYPE DATE.OPENED RECORD.STATUS RECORD.STATUS.DATE
## 1           Residential Alteration    1/2/2019   ACA Pending           9/3/2019
## 2          Multi Family Alteration    1/2/2019   ACA Pending          3/11/2019
## 3           Residential Alteration    1/2/2019   ACA Pending           1/2/2019
## 4 Residential Misc. Non-Structural    1/2/2019   ACA Pending           1/2/2019
## 5             Residential Addition    1/2/2019   ACA Pending          3/15/2019
## 6            Commercial Alteration    1/2/2019   ACA Pending          9/26/2019
##       Zoning CDP.Land.use
## 1 SPI-11 SA6          SFR
## 2               undefined
## 3      R-5-C          SFR
## 4        R-4          SFR
## 5        R-2          SFR
## 6  SPI-1 SA1          HDC
##                                                                                                                                                                                                                                 DESCRIPTION
## 1                                               INTERIOR RENOVATIONS TO INCLUDE NEW KITCHEN, NEW 1/2 BATH, PLUMBING, ELECTRICAL, AND HVAC. EXTERIOR TO INCLUDE REPLACE WINDOW IN KITCHEN, REPLACE RAILING ON EXTERIOR STAIRS, AND PAINTING.
## 2                                                                                            REPLACE EXISTING CABINETRY, TILE AND PLUMBING FIXTURES IN (2) EXISTING BATHROOMS. NO STRUCTURAL RE-WORK AND NO PLUMBING RELOCATION IS INVOLVED
## 3                                                                                                                            DRYWALL, PAINT, NEW LIGHTS, NEW CABINETS, NEW LIGHT FIXTURES, NEW VANITIES, CROWN MOLDINGS, NEW INTERIOR DOORS
## 4                                                                                                                                                       CHANGING EXISTING SCREEN PORCH TO SUNROOM BY ADDING PLEXI-GLASS TO REPLACE SCREENS.
## 5                                                                                                                                        V - 18 - 261 ADD ATTACHED GARAGE TO SINGLE FAMILY HOUSE, RETAINING WALL AND PERGOLA / NEW DRIVEWAY
## 6 155 ROOM-TERRACE LEVEL: INTERIOR DEMO AND RENOVATION FOR AN EVENT SPACE, LOBBY AND CLUB ROOM. 07/11/2019: General contractor, Ryan Lawrence of Southern Commercial Services, LL has requested to be removed from project. Antonio Proctor
##   latitude longitude T  score     addr_type
## 1 33.75559 -84.42514 T 100.00  PointAddress
## 2 33.75222 -84.36024 T  99.55  PointAddress
## 3 33.72653 -84.38243 T 100.00  PointAddress
## 4 33.73522 -84.34005 T 100.00 StreetAddress
## 5 33.85100 -84.40153 T 100.00 StreetAddress
## 6 33.75912 -84.38835 T 100.00  PointAddress

Clean Column Names

colnames(data) <- make.names(colnames(data))
colnames(data)
##  [1] "RECORD.ID"          "ADDR.FULL.LINE."    "RECORD.TYPE"       
##  [4] "DATE.OPENED"        "RECORD.STATUS"      "RECORD.STATUS.DATE"
##  [7] "Zoning"             "CDP.Land.use"       "DESCRIPTION"       
## [10] "latitude"           "longitude"          "T"                 
## [13] "score"              "addr_type"

Convert Date

data$DATE.OPENED <- mdy(data$DATE.OPENED)
## Warning: 10 failed to parse.
str(data)
## 'data.frame':    38107 obs. of  14 variables:
##  $ RECORD.ID         : chr  "BB-201900001" "BB-201900002" "BB-201900003" "BB-201900004" ...
##  $ ADDR.FULL.LINE.   : chr  "1138 HARWELL ST NW, ATLANTA, GA 30314" "791 WYLIE ST, ATLANTA, GA 30316" "170 HAYGOOD AVE SE, ATLANTA, GA 30315" "701 EAST SIDE AVE SE, ATLANTA, GA 30316" ...
##  $ RECORD.TYPE       : chr  "Residential Alteration" "Multi Family Alteration" "Residential Alteration" "Residential Misc. Non-Structural" ...
##  $ DATE.OPENED       : Date, format: "2019-01-02" "2019-01-02" ...
##  $ RECORD.STATUS     : chr  "ACA Pending" "ACA Pending" "ACA Pending" "ACA Pending" ...
##  $ RECORD.STATUS.DATE: chr  "9/3/2019" "3/11/2019" "1/2/2019" "1/2/2019" ...
##  $ Zoning            : chr  "SPI-11 SA6" "" "R-5-C" "R-4" ...
##  $ CDP.Land.use      : chr  "SFR" "undefined" "SFR" "SFR" ...
##  $ DESCRIPTION       : chr  "INTERIOR RENOVATIONS TO INCLUDE NEW KITCHEN, NEW 1/2 BATH, PLUMBING, ELECTRICAL, AND HVAC. EXTERIOR TO INCLUDE "| __truncated__ "REPLACE EXISTING CABINETRY, TILE AND PLUMBING FIXTURES IN (2) EXISTING BATHROOMS. NO STRUCTURAL RE-WORK AND NO "| __truncated__ "DRYWALL, PAINT, NEW LIGHTS, NEW CABINETS, NEW LIGHT FIXTURES, NEW VANITIES, CROWN MOLDINGS, NEW INTERIOR DOORS" "CHANGING EXISTING SCREEN PORCH TO SUNROOM BY ADDING PLEXI-GLASS TO REPLACE SCREENS." ...
##  $ latitude          : num  33.8 33.8 33.7 33.7 33.9 ...
##  $ longitude         : num  -84.4 -84.4 -84.4 -84.3 -84.4 ...
##  $ T                 : chr  "T" "T" "T" "T" ...
##  $ score             : num  100 99.5 100 100 100 ...
##  $ addr_type         : chr  "PointAddress" "PointAddress" "PointAddress" "StreetAddress" ...

Clean Data

data_clean <- data %>%
  filter(!is.na(DATE.OPENED), !is.na(RECORD.TYPE), RECORD.TYPE != "")

Add Year and Month

data_clean <- data_clean %>%
  mutate(
    Year = year(DATE.OPENED),
    Month = month(DATE.OPENED, label = TRUE, abbr = TRUE)
  )

summary(data_clean)
##   RECORD.ID         ADDR.FULL.LINE.    RECORD.TYPE         DATE.OPENED        
##  Length:38097       Length:38097       Length:38097       Min.   :2019-01-02  
##  Class :character   Class :character   Class :character   1st Qu.:2020-02-14  
##  Mode  :character   Mode  :character   Mode  :character   Median :2021-07-09  
##                                                           Mean   :2021-07-07  
##                                                           3rd Qu.:2022-09-30  
##                                                           Max.   :2024-04-26  
##                                                                               
##  RECORD.STATUS      RECORD.STATUS.DATE    Zoning          CDP.Land.use      
##  Length:38097       Length:38097       Length:38097       Length:38097      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  DESCRIPTION           latitude       longitude           T            
##  Length:38097       Min.   :33.02   Min.   :-87.61   Length:38097      
##  Class :character   1st Qu.:33.74   1st Qu.:-84.42   Class :character  
##  Mode  :character   Median :33.77   Median :-84.39   Mode  :character  
##                     Mean   :33.77   Mean   :-84.40                     
##                     3rd Qu.:33.80   3rd Qu.:-84.37                     
##                     Max.   :41.54   Max.   :-75.59                     
##                                                                        
##      score         addr_type              Year          Month      
##  Min.   : 78.99   Length:38097       Min.   :2019   Mar    : 3838  
##  1st Qu.:100.00   Class :character   1st Qu.:2020   Feb    : 3453  
##  Median :100.00   Mode  :character   Median :2021   Jan    : 3441  
##  Mean   : 99.50                      Mean   :2021   Jun    : 3294  
##  3rd Qu.:100.00                      3rd Qu.:2022   Apr    : 3288  
##  Max.   :100.00                      Max.   :2024   Aug    : 3200  
##                                                     (Other):17583