Load Data
data <- read.csv("BuildingPermits2019_2024.csv", stringsAsFactors = FALSE)
head(data)
## RECORD.ID ADDR.FULL.LINE.
## 1 BB-201900001 1138 HARWELL ST NW, ATLANTA, GA 30314
## 2 BB-201900002 791 WYLIE ST, ATLANTA, GA 30316
## 3 BB-201900003 170 HAYGOOD AVE SE, ATLANTA, GA 30315
## 4 BB-201900004 701 EAST SIDE AVE SE, ATLANTA, GA 30316
## 5 BB-201900005 3571 TUXEDO PARK DR NW, ATLANTA, GA 30305
## 6 BB-201900006 200 PEACHTREE ST NW, ATLANTA, GA 30303
## RECORD.TYPE DATE.OPENED RECORD.STATUS RECORD.STATUS.DATE
## 1 Residential Alteration 1/2/2019 ACA Pending 9/3/2019
## 2 Multi Family Alteration 1/2/2019 ACA Pending 3/11/2019
## 3 Residential Alteration 1/2/2019 ACA Pending 1/2/2019
## 4 Residential Misc. Non-Structural 1/2/2019 ACA Pending 1/2/2019
## 5 Residential Addition 1/2/2019 ACA Pending 3/15/2019
## 6 Commercial Alteration 1/2/2019 ACA Pending 9/26/2019
## Zoning CDP.Land.use
## 1 SPI-11 SA6 SFR
## 2 undefined
## 3 R-5-C SFR
## 4 R-4 SFR
## 5 R-2 SFR
## 6 SPI-1 SA1 HDC
## DESCRIPTION
## 1 INTERIOR RENOVATIONS TO INCLUDE NEW KITCHEN, NEW 1/2 BATH, PLUMBING, ELECTRICAL, AND HVAC. EXTERIOR TO INCLUDE REPLACE WINDOW IN KITCHEN, REPLACE RAILING ON EXTERIOR STAIRS, AND PAINTING.
## 2 REPLACE EXISTING CABINETRY, TILE AND PLUMBING FIXTURES IN (2) EXISTING BATHROOMS. NO STRUCTURAL RE-WORK AND NO PLUMBING RELOCATION IS INVOLVED
## 3 DRYWALL, PAINT, NEW LIGHTS, NEW CABINETS, NEW LIGHT FIXTURES, NEW VANITIES, CROWN MOLDINGS, NEW INTERIOR DOORS
## 4 CHANGING EXISTING SCREEN PORCH TO SUNROOM BY ADDING PLEXI-GLASS TO REPLACE SCREENS.
## 5 V - 18 - 261 ADD ATTACHED GARAGE TO SINGLE FAMILY HOUSE, RETAINING WALL AND PERGOLA / NEW DRIVEWAY
## 6 155 ROOM-TERRACE LEVEL: INTERIOR DEMO AND RENOVATION FOR AN EVENT SPACE, LOBBY AND CLUB ROOM. 07/11/2019: General contractor, Ryan Lawrence of Southern Commercial Services, LL has requested to be removed from project. Antonio Proctor
## latitude longitude T score addr_type
## 1 33.75559 -84.42514 T 100.00 PointAddress
## 2 33.75222 -84.36024 T 99.55 PointAddress
## 3 33.72653 -84.38243 T 100.00 PointAddress
## 4 33.73522 -84.34005 T 100.00 StreetAddress
## 5 33.85100 -84.40153 T 100.00 StreetAddress
## 6 33.75912 -84.38835 T 100.00 PointAddress
Clean Column Names
colnames(data) <- make.names(colnames(data))
colnames(data)
## [1] "RECORD.ID" "ADDR.FULL.LINE." "RECORD.TYPE"
## [4] "DATE.OPENED" "RECORD.STATUS" "RECORD.STATUS.DATE"
## [7] "Zoning" "CDP.Land.use" "DESCRIPTION"
## [10] "latitude" "longitude" "T"
## [13] "score" "addr_type"
Convert Date
data$DATE.OPENED <- mdy(data$DATE.OPENED)
## Warning: 10 failed to parse.
str(data)
## 'data.frame': 38107 obs. of 14 variables:
## $ RECORD.ID : chr "BB-201900001" "BB-201900002" "BB-201900003" "BB-201900004" ...
## $ ADDR.FULL.LINE. : chr "1138 HARWELL ST NW, ATLANTA, GA 30314" "791 WYLIE ST, ATLANTA, GA 30316" "170 HAYGOOD AVE SE, ATLANTA, GA 30315" "701 EAST SIDE AVE SE, ATLANTA, GA 30316" ...
## $ RECORD.TYPE : chr "Residential Alteration" "Multi Family Alteration" "Residential Alteration" "Residential Misc. Non-Structural" ...
## $ DATE.OPENED : Date, format: "2019-01-02" "2019-01-02" ...
## $ RECORD.STATUS : chr "ACA Pending" "ACA Pending" "ACA Pending" "ACA Pending" ...
## $ RECORD.STATUS.DATE: chr "9/3/2019" "3/11/2019" "1/2/2019" "1/2/2019" ...
## $ Zoning : chr "SPI-11 SA6" "" "R-5-C" "R-4" ...
## $ CDP.Land.use : chr "SFR" "undefined" "SFR" "SFR" ...
## $ DESCRIPTION : chr "INTERIOR RENOVATIONS TO INCLUDE NEW KITCHEN, NEW 1/2 BATH, PLUMBING, ELECTRICAL, AND HVAC. EXTERIOR TO INCLUDE "| __truncated__ "REPLACE EXISTING CABINETRY, TILE AND PLUMBING FIXTURES IN (2) EXISTING BATHROOMS. NO STRUCTURAL RE-WORK AND NO "| __truncated__ "DRYWALL, PAINT, NEW LIGHTS, NEW CABINETS, NEW LIGHT FIXTURES, NEW VANITIES, CROWN MOLDINGS, NEW INTERIOR DOORS" "CHANGING EXISTING SCREEN PORCH TO SUNROOM BY ADDING PLEXI-GLASS TO REPLACE SCREENS." ...
## $ latitude : num 33.8 33.8 33.7 33.7 33.9 ...
## $ longitude : num -84.4 -84.4 -84.4 -84.3 -84.4 ...
## $ T : chr "T" "T" "T" "T" ...
## $ score : num 100 99.5 100 100 100 ...
## $ addr_type : chr "PointAddress" "PointAddress" "PointAddress" "StreetAddress" ...
Clean Data
data_clean <- data %>%
filter(!is.na(DATE.OPENED), !is.na(RECORD.TYPE), RECORD.TYPE != "")
Add Year and Month
data_clean <- data_clean %>%
mutate(
Year = year(DATE.OPENED),
Month = month(DATE.OPENED, label = TRUE, abbr = TRUE)
)
summary(data_clean)
## RECORD.ID ADDR.FULL.LINE. RECORD.TYPE DATE.OPENED
## Length:38097 Length:38097 Length:38097 Min. :2019-01-02
## Class :character Class :character Class :character 1st Qu.:2020-02-14
## Mode :character Mode :character Mode :character Median :2021-07-09
## Mean :2021-07-07
## 3rd Qu.:2022-09-30
## Max. :2024-04-26
##
## RECORD.STATUS RECORD.STATUS.DATE Zoning CDP.Land.use
## Length:38097 Length:38097 Length:38097 Length:38097
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## DESCRIPTION latitude longitude T
## Length:38097 Min. :33.02 Min. :-87.61 Length:38097
## Class :character 1st Qu.:33.74 1st Qu.:-84.42 Class :character
## Mode :character Median :33.77 Median :-84.39 Mode :character
## Mean :33.77 Mean :-84.40
## 3rd Qu.:33.80 3rd Qu.:-84.37
## Max. :41.54 Max. :-75.59
##
## score addr_type Year Month
## Min. : 78.99 Length:38097 Min. :2019 Mar : 3838
## 1st Qu.:100.00 Class :character 1st Qu.:2020 Feb : 3453
## Median :100.00 Mode :character Median :2021 Jan : 3441
## Mean : 99.50 Mean :2021 Jun : 3294
## 3rd Qu.:100.00 3rd Qu.:2022 Apr : 3288
## Max. :100.00 Max. :2024 Aug : 3200
## (Other):17583