Cleaning Complaints

This is the data cleaning, filtering and transformation document for my final project in data 110.

In this document I pay special attention to cleaning the housing complaints dataset.

This document is linked to

Notes for Ask:

Fix zip, join by zip, then join by troubled.

Create graph of average wait time based on cause.

Appendix

sources:

https://www.peoples-law.org/rent-court-eviction

https://apps.montgomerycountymd.gov/DHCA-OLTA/

https://www.montgomerycountymd.gov/DHCA/housing/landlordtenant/publications_forms.html#Landlord-Tenant%20Handbook

https://data.montgomerycountymd.gov/Consumer-Housing/Housing-Landlord-Tenant-Disputes/a7k3-gmfn/about_data

# for loading our data
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
# for plotting 
library(leaflet)
library(leaflet.extras)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
# for more stuff
library(dbscan)
## 
## Attaching package: 'dbscan'
## 
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(OpenStreetMap)
library(geosphere)
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
##   Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
##   OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
## 
## Attaching package: 'ggmap'
## 
## 
## The following object is masked from 'package:plotly':
## 
##     wind
library(RColorBrewer)
library(scales)
## 
## Attaching package: 'scales'
## 
## The following object is masked from 'package:purrr':
## 
##     discard
## 
## The following object is masked from 'package:readr':
## 
##     col_factor
library(flexdashboard)
library(ggthemes)
library(wesanderson)

Cleaning troubled housing dataset:

troubled_clean <- read_csv("/Users/gimle/Desktop/Data 110/Data 110: Project 2/montgomery_troubled_housing.csv")
## Rows: 673 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr   (4): Community.Name, Street.Address, City, Analysis.Rating
## dbl  (17): License.Number, Zip, Case.Number, Longitude, Latitude, Inspection...
## dttm  (1): First.Inspection.Date
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
troubled_clean <- troubled_clean |> 
  mutate(
    Inspection.Frequency = as.numeric(gsub("[^0-9.]", "", Inspection.Frequency)),
    Unit.Count = as.numeric(gsub("[^0-9.]", "", Unit.Count)),
    Average.Violations.Per.Unit = as.numeric(gsub("[^0-9.]", "", Average.Violations.Per.Unit)),
    Units.Inspected = as.numeric(gsub("[^0-9.]", "", Units.Inspected)),
    Severity.Index = as.numeric(gsub("[^0-9.]", "", Severity.Index)),
    No.Violations.Observed = as.numeric(gsub("[^0-9.]", "", No.Violations.Observed)),
    Infested.Units.Percentage = as.numeric(gsub("[^0-9.]", "", Infested.Units.Percentage)),
    Units.with.Mold = as.numeric(gsub("[^0-9.]", "", Units.with.Mold)),
    Longitude = as.numeric(gsub("[^0-9.]", "", Longitude)),
    Latitude = as.numeric(gsub("[^0-9.]", "", Latitude))
  )
# Needed to fix the timestamp again: 
troubled_clean$First.Inspection.Date <- as.POSIXct(troubled_clean$First.Inspection.Date, format="%Y-%m-%dT%H:%M:%SZ", tz="UTC")
# Fixing timestamp error
troubled_clean <- troubled_clean |> 
  mutate(First.Inspection.Date = ymd(First.Inspection.Date)) |> 
  filter(First.Inspection.Date >= ymd("2016-01-01") & First.Inspection.Date <= ymd("2024-12-31"))
# Got rid of the negative sign in front of the longitude by mistake
troubled_clean$Longitude <- -abs(troubled_clean$Longitude)
# Removing outlier placed in west virginia
troubled_clean <- troubled_clean |> 
  filter(Longitude >= -77.36482 & Longitude <= -76.93023)

Cleaning housing complaints dataset:

complaints <- read_csv("/Users/gimle/Desktop/Data 110/Datasets 110/Housing_Landlord-Tenant_Disputes_20240411.csv")
## Rows: 11155 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): City, Zip, Date Filed, Date Closed, Case Type, Type of Complaint, C...
## dbl (1): Case Number
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Cleaning Complaints:

  1. Regular time format for Date filed & Date closed
complaints$`Date Filed` <- as.POSIXct(complaints$`Date Filed`, format = "%m/%d/%Y", tz = "UTC")
complaints$`Date Closed` <- as.POSIXct(complaints$`Date Closed`, format = "%m/%d/%Y", tz = "UTC")
  1. Create column for number of days between opening and closing
complaints <- complaints |> 
  mutate(
    `Process Length` = as.numeric(difftime(`Date Closed`, `Date Filed`, units = "days"))
  )
  1. Replace names in case type
complaints$City <- str_to_title(tolower(complaints$City))
  1. Seperate location.
complaints <- complaints |> 
  mutate(
    coordinates = str_extract(Location, "\\([^)]+\\)"), 
    coordinates = str_remove_all(coordinates, "[\\(\\)]")  
  )
complaints <- complaints |> 
  select(!Location)
  1. Create columns for longitude and latitiude
complaints <- complaints |> 
  separate(col = coordinates, into = c("Latitude", "Longitude"), sep = ", ", convert = TRUE)
  1. Make sure City is correctly spelled
unique(complaints$City)
##  [1] "Gaithersburg"       "Clarksburg"         "Silver Spring"     
##  [4] "Rockville"          "Bethesda"           "Olney"             
##  [7] "Germantown"         "Kensington"         "Montgomery Village"
## [10] "Takoma Park"        "Chevy Chase"        "Burtonsville"      
## [13] "Brinklow"           "Wheaton"            "North Bethesda"    
## [16] "Germantown M"       "Potomac"            "Boyds"             
## [19] "Sandy Spring"       "Damascus"           "Derwood"           
## [22] "Glenmont"           "Beltsville"         "Behesda"           
## [25] "Poolesville"        "Aspen Hill"         "Hyattsville"       
## [28] "Gaitersburg"        "Brookeville"        "Gaitherburgh"      
## [31] "Silver Spring Md"   "Lanham"             "Hyattsvile"        
## [34] "N/A"                "North Potomac"      "Mont. Village"     
## [37] "Boys"               "Sillver Spring"     "Dickerson"         
## [40] "Ashton"             "Comus"              "N.bethesda"        
## [43] "Oxen Hill"          "Fort Washington"    "Montgomery V"      
## [46] "Beallsville"        "Darnestown"         "Cabin John"        
## [49] "Spring"             "Washington Grove"   "Montpelier"        
## [52] "Garrett Park"       "Laytonsville"       "Barnesville"       
## [55] "Nbethesda"          "Silver  Spring"     "Baltimore"         
## [58] "Frederick"          "Eilderstadtgermany" "Annapolis"         
## [61] "Spencerville"       "College Park"       "N. Bethesda"       
## [64] "Temple Hills"       "Midlothian"         "Odenton"           
## [67] "Washington"         "Dilkerson"          "Cumberland"        
## [70] "Npotomac"           "Glen Echo"          "Abingdon"          
## [73] "Randallstown"       "Cabin Jonh"         "Cambridge"         
## [76] "Adelphi"            "District Heights"   "Mount Airy"        
## [79] "Highland"           "Mchenry"
# Who is complaining from Filderstadt Germany??? 
# There is a lot here. 
  1. Removing all rows with OLTA-ADMIN, OLTA-INSPECTION , OLTA-LEASE REVIEW, OLTA-MCMF, OLTA-ACCESST,
complaints <- complaints |>  
  filter(!(`Case Type` %in% c("OLTA-ADMIN", "OLTA-INSPECTION", "OLTA-LEASE REVIEW", "OLTA-MCMF", "OLTA-ACCESST")))
  1. Remove “OLTA-”
complaints <- complaints |> 
  mutate(`Case Type` = str_remove_all(`Case Type`, "OLTA-"))
  1. Seperate landlord/tennant prefix from housing prefix
complaints <- complaints |> 
  mutate(
    Pursuant = case_when(
      substr(`Case Type`, 1, 1) == "T" ~ "Tenant",
      substr(`Case Type`, 1, 1) == "L" ~ "Landlord",
          TRUE ~ NA_character_ 
    )
    )
complaints <- complaints |> 
  mutate(
    `Home Type` = case_when(
      substr(`Case Type`, nchar(`Case Type`)-1, nchar(`Case Type`)) == "SF" ~ "Single Family",
      substr(`Case Type`, nchar(`Case Type`)-1, nchar(`Case Type`)) == "MF" ~ "Multi Family",
      TRUE ~ NA_character_ 
  )
  )
complaints <- complaints |> 
  select(!`Case Type`)

Removing cases that are not in montgomery county

comp_coord <- complaints |> 
  filter(Longitude >= -77.36482 & Longitude <= -76.93023)

11.Remove: duplicate cases, non-jurisdiction cases,
No jurisdiction cases, complaint withdrawn, Case mistakenly opened, remove cases

comp_coord <- comp_coord |>  
  filter(!(`Case Disposition` %in% c("Duplicate Case","No Jurisdiction-Geographical")))
  1. Filter out cases with a negative processing time:
comp_coord <- comp_coord |> 
  filter(`Process Length`>= 0)
  1. Filter by landlord, tennant and family housing.
unique_tenant_cases <- comp_coord |> 
  filter(Pursuant == "Tenant") |> 
  distinct()
unique(unique_tenant_cases$`Type of Complaint`)
##  [1] "Quit and Vacate"                              
##  [2] "Rent Increase"                                
##  [3] "Security Deposit"                             
##  [4] "Lease Agreement Tenant Rights"                
##  [5] "Maintenance - Infestation"                    
##  [6] "Ratio Utility Billing"                        
##  [7] "Noise"                                        
##  [8] "Fee"                                          
##  [9] "Rent Payment History"                         
## [10] "Lease Agreement"                              
## [11] "Harassment"                                   
## [12] "Eviction"                                     
## [13] "Failure to Pay Rent"                          
## [14] "Lease Agreement-Early Termination"            
## [15] "Maintenance- General"                         
## [16] "Other-Miscellaneous"                          
## [17] "Money Damage - General"                       
## [18] "No Jurisdiction - Room Rental"                
## [19] "Maintenance - Painting and Carpeting"         
## [20] "Maintenance - Mold/Mildew"                    
## [21] "Maintenance - Plumbing"                       
## [22] "Retaliatory Eviction"                         
## [23] "No Jurisdiction-Geographical"                 
## [24] "Parking"                                      
## [25] "Maintenance - Appliance"                      
## [26] "Maintenance - Roof Leak"                      
## [27] "Reduction in Services"                        
## [28] "Maintenance - Air Conditioning"               
## [29] "Maintenance - Sewer Backup"                   
## [30] "Management"                                   
## [31] "Application Deposit"                          
## [32] "Maintenance - Heat"                           
## [33] "Lease Review"                                 
## [34] "Money Damages - Fire"                         
## [35] "Maintenance - Electrical"                     
## [36] "Maintenance - Rubbish & Garbage Removal"      
## [37] "Homeless Prevention and Relocation Assistance"
## [38] "No Jurisdiction - Tenant Vs. Tenant"          
## [39] "Maintenance - Water Damages"                  
## [40] "Maintenance - Hot/Cold Water"                 
## [41] "Miscellaneous/Foreclosure, T want to buy"     
## [42] "N/A"                                          
## [43] NA                                             
## [44] "Rent"                                         
## [45] "Rental Credit Report"                         
## [46] "Rule Violations-Entering without notice"      
## [47] "Common Area Use"                              
## [48] "Walk through Inspections"                     
## [49] "Lease Agreement-Early termination"            
## [50] "Renovation"                                   
## [51] "Submetering"                                  
## [52] "Money Damages - Spoiled Food"                 
## [53] "No Jurisdiction - purchase of property"       
## [54] "Late Fee"                                     
## [55] "Notice to Vacate"                             
## [56] "Pets"                                         
## [57] "ACC"                                          
## [58] "Referred to OCP"                              
## [59] "No Jurisdiction - Embassy"                    
## [60] "Conversion"                                   
## [61] "Maintenance - Snow/Ice Removal"
unique_landlord_cases <- comp_coord |> 
  filter(Pursuant == "Landlord") |> 
  distinct()
unique(unique_landlord_cases$`Type of Complaint`)
##  [1] "Quit and Vacate"                         
##  [2] "Failure to Pay Rent"                     
##  [3] "Lease Agreement"                         
##  [4] "Other-Miscellaneous"                     
##  [5] "Fee"                                     
##  [6] "Security Deposit"                        
##  [7] "Lease Agreement-Early Termination"       
##  [8] "Rent Payment History"                    
##  [9] "Lease Review"                            
## [10] "Maintenance- General"                    
## [11] "N/A"                                     
## [12] "Money Damages - Fire"                    
## [13] "Eviction"                                
## [14] "Late Fee"                                
## [15] "No Jurisdiction-Geographical"            
## [16] "Rent"                                    
## [17] "Pets"                                    
## [18] "No Jurisdiction - Room Rental"           
## [19] "Money Damage - General"                  
## [20] "Harassment"                              
## [21] "Maintenance - Infestation"               
## [22] "Notice to Vacate"                        
## [23] "Ratio Utility Billing"                   
## [24] "Maintenance - Mold/Mildew"               
## [25] "Common Area Use"                         
## [26] "Rent Increase"                           
## [27] "Maintenance - Electrical"                
## [28] "Lease Agreement Tenant Rights"           
## [29] "Walk through Inspections"                
## [30] "Rule Violations-Entering without notice" 
## [31] "Maintenance - Air Conditioning"          
## [32] "Maintenance - Plumbing"                  
## [33] NA                                        
## [34] "Maintenance - Water Damages"             
## [35] "Money Damages - Spoiled Food"            
## [36] "Noise"                                   
## [37] "Parking"                                 
## [38] "Management"                              
## [39] "Miscellaneous/Foreclosure, T want to buy"
unique_mf_cases <- comp_coord |> 
  filter(`Home Type` == "Multi Family") |> 
  distinct()
unique(unique_mf_cases$`Type of Complaint`)
##  [1] "Quit and Vacate"                              
##  [2] "Security Deposit"                             
##  [3] "Lease Agreement Tenant Rights"                
##  [4] "Maintenance - Infestation"                    
##  [5] "Ratio Utility Billing"                        
##  [6] "Noise"                                        
##  [7] "Fee"                                          
##  [8] "Rent Payment History"                         
##  [9] "Eviction"                                     
## [10] "Rent Increase"                                
## [11] "Failure to Pay Rent"                          
## [12] "Lease Agreement-Early Termination"            
## [13] "Lease Agreement"                              
## [14] "Maintenance- General"                         
## [15] "Other-Miscellaneous"                          
## [16] "Money Damage - General"                       
## [17] "Maintenance - Painting and Carpeting"         
## [18] "Maintenance - Plumbing"                       
## [19] "Retaliatory Eviction"                         
## [20] "No Jurisdiction-Geographical"                 
## [21] "Maintenance - Mold/Mildew"                    
## [22] "Maintenance - Appliance"                      
## [23] "Maintenance - Roof Leak"                      
## [24] "Harassment"                                   
## [25] "Reduction in Services"                        
## [26] "Maintenance - Air Conditioning"               
## [27] "Maintenance - Sewer Backup"                   
## [28] "Management"                                   
## [29] "Application Deposit"                          
## [30] "Maintenance - Heat"                           
## [31] "Lease Review"                                 
## [32] "Parking"                                      
## [33] "Maintenance - Electrical"                     
## [34] "Maintenance - Rubbish & Garbage Removal"      
## [35] "No Jurisdiction - Tenant Vs. Tenant"          
## [36] "Maintenance - Water Damages"                  
## [37] "Maintenance - Hot/Cold Water"                 
## [38] "Homeless Prevention and Relocation Assistance"
## [39] "Money Damages - Fire"                         
## [40] "Miscellaneous/Foreclosure, T want to buy"     
## [41] "N/A"                                          
## [42] NA                                             
## [43] "Rent"                                         
## [44] "Rental Credit Report"                         
## [45] "Rule Violations-Entering without notice"      
## [46] "Common Area Use"                              
## [47] "No Jurisdiction - Room Rental"                
## [48] "Lease Agreement-Early termination"            
## [49] "Renovation"                                   
## [50] "Submetering"                                  
## [51] "Money Damages - Spoiled Food"                 
## [52] "Late Fee"                                     
## [53] "Notice to Vacate"                             
## [54] "Pets"                                         
## [55] "Walk through Inspections"                     
## [56] "Conversion"                                   
## [57] "Maintenance - Snow/Ice Removal"
unique_sf_cases <- comp_coord |> 
  filter(`Home Type` == "Single Family") |> 
  distinct()
unique(unique_sf_cases$`Type of Complaint`)
##  [1] "Quit and Vacate"                              
##  [2] "Rent Increase"                                
##  [3] "Lease Agreement Tenant Rights"                
##  [4] "Security Deposit"                             
##  [5] "Maintenance - Infestation"                    
##  [6] "Lease Agreement"                              
##  [7] "Harassment"                                   
##  [8] "Failure to Pay Rent"                          
##  [9] "Other-Miscellaneous"                          
## [10] "No Jurisdiction - Room Rental"                
## [11] "Maintenance - Mold/Mildew"                    
## [12] "Fee"                                          
## [13] "Lease Agreement-Early Termination"            
## [14] "Maintenance- General"                         
## [15] "Parking"                                      
## [16] "Maintenance - Appliance"                      
## [17] "Rent Payment History"                         
## [18] "Maintenance - Air Conditioning"               
## [19] "Reduction in Services"                        
## [20] "Money Damages - Fire"                         
## [21] "Money Damage - General"                       
## [22] "Homeless Prevention and Relocation Assistance"
## [23] "No Jurisdiction-Geographical"                 
## [24] "Eviction"                                     
## [25] "Lease Review"                                 
## [26] "N/A"                                          
## [27] "Maintenance - Water Damages"                  
## [28] "Miscellaneous/Foreclosure, T want to buy"     
## [29] "Maintenance - Roof Leak"                      
## [30] "Maintenance - Electrical"                     
## [31] "Maintenance - Plumbing"                       
## [32] "Walk through Inspections"                     
## [33] "Maintenance - Heat"                           
## [34] "Retaliatory Eviction"                         
## [35] NA                                             
## [36] "Late Fee"                                     
## [37] "Lease Agreement-Early termination"            
## [38] "Rent"                                         
## [39] "Pets"                                         
## [40] "No Jurisdiction - purchase of property"       
## [41] "Maintenance - Hot/Cold Water"                 
## [42] "Ratio Utility Billing"                        
## [43] "Notice to Vacate"                             
## [44] "Money Damages - Spoiled Food"                 
## [45] "No Jurisdiction - Tenant Vs. Tenant"          
## [46] "Application Deposit"                          
## [47] "Common Area Use"                              
## [48] "Rental Credit Report"                         
## [49] "Rule Violations-Entering without notice"      
## [50] "ACC"                                          
## [51] "Maintenance - Sewer Backup"                   
## [52] "Noise"                                        
## [53] "Referred to OCP"                              
## [54] "No Jurisdiction - Embassy"                    
## [55] "Management"                                   
## [56] "Maintenance - Painting and Carpeting"
  1. Combining categories:

Moisture damage “Maintenance - Water Damages” “Maintenance - Mold/Mildew”

General Maintenace “Maintenance - Rubbish & Garbage Removal” “Maintenance- General” “Maintenance - Snow/Ice Removal” “Maintenance - Roof Leak” “Maintenance - Painting and Carpeting”

Plumbing and Pipes “Maintenance - Plumbing”
“Maintenance - Sewer Backup” “Maintenance - Hot/Cold Water”

Essential Applicances “Maintenance - Appliance” “Maintenance - Air Conditioning” “Maintenance - Heat” “Maintenance - Electrical”

comp_mt<- comp_coord |> 
  mutate(`Type of Complaint` = case_when(
    `Type of Complaint` %in% c("Maintenance - Water Damages", "Maintenance - Mold/Mildew") ~ "Moisture damage",
    `Type of Complaint` %in% c("Maintenance - Rubbish & Garbage Removal", "Maintenance- General", 
                               "Maintenance - Snow/Ice Removal", "Maintenance - Roof Leak", 
                               "Maintenance - Painting and Carpeting") ~ "General Maintenance",
    `Type of Complaint` %in% c("Maintenance - Plumbing", "Maintenance - Sewer Backup", 
                               "Maintenance - Hot/Cold Water") ~ "Plumbing and Pipes",
    `Type of Complaint` %in% c("Maintenance - Appliance", "Maintenance - Air Conditioning", 
                               "Maintenance - Heat", "Maintenance - Electrical") ~ "Essential Appliances",
    `Type of Complaint` %in% c("Maintenance - Infestation") ~ "Infestation",
    TRUE ~ `Type of Complaint` 
  ))

Money Dispute “Application Deposit” “Rent” “Fee” “Rental Credit Report” “Submetering” “Money Damages - Spoiled Food” “Ratio Utility Billing” “Rent Increase” “Money Damages - Fire” “Failure to Pay Rent” “Rent Payment History” “Late Fee”

No Jurisdiction “No Jurisdiction - Embassy” “No Jurisdiction - Tenant Vs. Tenant” “No Jurisdiction - Room Rental” “No Jurisdiction-Geographical” “No Jurisdiction - purchase of property”

Lease Dispute “Lease Review” “Lease Agreement Tenant Rights” “Lease Agreement-Early termination” “Lease Agreement”

comp_mt <- comp_mt |> 
  mutate(
    `Type of Complaint` = case_when(
      `Type of Complaint` %in% c("Application Deposit", "Rent", "Fee", "Rental Credit Report", "Submetering",
                                 "Money Damages - Spoiled Food", "Ratio Utility Billing", "Rent Increase", 
                                 "Money Damages - Fire", "Failure to Pay Rent", "Rent Payment History", 
                                 "Late Fee", "Money Damage - General") ~ "Payment Dispute",
      `Type of Complaint` %in% c("No Jurisdiction - Embassy", "No Jurisdiction - Tenant Vs. Tenant", 
                                 "No Jurisdiction - Room Rental", "No Jurisdiction-Geographical", 
                                 "No Jurisdiction - purchase of property") ~ "No Jurisdiction",
      `Type of Complaint` %in% c("Lease Review", "Lease Agreement Tenant Rights", "Lease Agreement-Early termination", "Lease Agreement-Early Termination", 
                                 "Lease Agreement","Notice to Vacate","Reduction in Services") ~ "Lease Agreement",
      TRUE ~ `Type of Complaint`
    )
    )
comp_mt <- comp_mt |> 
  mutate(
    `Type of Complaint` = case_when(
      `Type of Complaint` %in% c("Noise","Parking","Pets","Common Area Use","N/A", "Miscellaneous/Foreclosure, T want to buy", "Conversion", "Renovation", "ACC", "Referred to OCP" ) ~ "Other",
      TRUE ~ `Type of Complaint`
    ))

comp_mt <- comp_mt |> 
  mutate(
    `Type of Complaint` = case_when(
      `Type of Complaint` %in% c("Rule Violations-Entering without notice","Harassment") ~ "Harassment",
      TRUE ~ `Type of Complaint`
    ))

comp_mt <- comp_mt |> 
  mutate(
    `Type of Complaint` = case_when(
      `Type of Complaint` %in% c("Notice to Vacate","Quit and Vacate") ~ "Quit and Vacate",
      TRUE ~ `Type of Complaint`
    ))
unique(comp_mt$`Type of Complaint`)
##  [1] "Quit and Vacate"                              
##  [2] "Payment Dispute"                              
##  [3] "Security Deposit"                             
##  [4] "Lease Agreement"                              
##  [5] "Infestation"                                  
##  [6] "Other"                                        
##  [7] "Harassment"                                   
##  [8] "Eviction"                                     
##  [9] "General Maintenance"                          
## [10] "Other-Miscellaneous"                          
## [11] "No Jurisdiction"                              
## [12] "Moisture damage"                              
## [13] "Plumbing and Pipes"                           
## [14] "Retaliatory Eviction"                         
## [15] "Essential Appliances"                         
## [16] "Management"                                   
## [17] "Homeless Prevention and Relocation Assistance"
## [18] NA                                             
## [19] "Walk through Inspections"

Referred “Referred to a MD State Government agency” “Referred to a Montgomery County Agency” “Referred to States Attorney’s Office” “Other referral (private attorney/legal aid/etc.)” “Referred to HOC” “Case Referred to another agency, not on disclosure record”

No Jurisdiction “Non-jurisdiction-Room Rental” “Non-jurisdiction-Cooperative housing unit” “Non-jurisdiction-Geographic Location” “Non-jurisdiction-Rental Application-Referred to OCA”

Complaint noted “Complaint noted for the record”

Favourable Settlement “Settlement Agreement” “Goods delivered or replaced” “Service performed or corrected” “Refund (payments/deposits)” “Relocation payments” “Extended Tenancy” “Referred to Landlord-Tenant Commission” “Refund/Rent Credit”

Unfavourable Settlement “Acceptable explanation” “Dismissed by Landlord-Tenant Commission”

Case Error “Case can not be found” “Case mistakenly closed”

Withdrawn or incomplete “Complaint withdrawn” “Other- no communication from tenant” “Case Reopened” “Case delete from record per supverisor”

Legal action “Legal Action Filed”

comp_mt <- comp_mt |> 
  mutate(
    `Case Disposition` = case_when(
      `Case Disposition` %in% c("Referred to a MD State Government agency", 
                                 "Referred to a Montgomery County Agency",
                                 "Referred to States Attorney's Office",
                                 "Other referral (private attorney/legal aid/etc.)",
                                 "Referred to HOC",
                                 "Case Referred to another agency, not on disclosure record") ~ "Referred",
      `Case Disposition` %in% c("Non-jurisdiction-Room Rental",
                                 "Non-jurisdiction-Cooperative housing unit",
                                 "Non-jurisdiction-Geographic Location",
                                 "Non-jurisdiction-Rental Application-Referred to OCA") ~ "No Jurisdiction",
      `Case Disposition` == "Complaint noted for the record" ~ "Complaint noted",
      `Case Disposition` %in% c("Settlement Agreement",
                                 "Goods delivered or replaced",
                                 "Service performed or corrected",
                                 "Refund (payments/deposits)",
                                 "Relocation payments",
                                 "Extended Tenancy",
                                 "Referred to Landlord-Tenant Commission",
                                 "Refund/Rent Credit") ~ "Favourable Settlement",
      `Case Disposition` %in% c("Acceptable explanation",
                                 "Dismissed by Landlord-Tenant Commission") ~ "Unfavourable Settlement",
      `Case Disposition` %in% c("Case can not be found",
                                 "Case mistakenly closed") ~ "Case Error",
      `Case Disposition` %in% c("Complaint withdrawn",
                                 "Other- no communication from tenant",
                                 "Case Reopened",
                                 "Case delete from record per supverisor") ~ "Withdrawn or incomplete",
      `Case Disposition` == "Legal Action Filed" ~ "Legal action",
      TRUE ~ `Case Disposition`
    )
  )
#Pre pivot prep
comp_prepivot <- comp_mt |> 
  mutate(`Type of Complaint` = str_c("Cause: ", `Type of Complaint`))
comp_prepivot <- comp_prepivot |> 
  mutate(`Case Disposition` = str_c("Status: ", `Case Disposition`))
  1. Pivoting longer
comp_prepivot <- comp_prepivot |> 
  select(!City )
head(comp_mt)
## # A tibble: 6 × 12
##   `Case Number` City          Zip   `Date Filed`        `Date Closed`      
##           <dbl> <chr>         <chr> <dttm>              <dttm>             
## 1         48204 Gaithersburg  20877 2023-07-05 00:00:00 2023-07-07 00:00:00
## 2         48121 Clarksburg    20871 2023-05-18 00:00:00 2023-06-16 00:00:00
## 3         48153 Silver Spring 20906 2023-06-07 00:00:00 2023-07-17 00:00:00
## 4         48079 Rockville     20850 2023-04-21 00:00:00 2023-07-13 00:00:00
## 5         48133 Rockville     20852 2023-05-25 00:00:00 2023-07-25 00:00:00
## 6         47888 Bethesda      20817 2023-03-02 00:00:00 2023-07-13 00:00:00
## # ℹ 7 more variables: `Type of Complaint` <chr>, `Case Disposition` <chr>,
## #   `Process Length` <dbl>, Latitude <dbl>, Longitude <dbl>, Pursuant <chr>,
## #   `Home Type` <chr>
unique(comp_mt$City)
##  [1] "Gaithersburg"       "Clarksburg"         "Silver Spring"     
##  [4] "Rockville"          "Bethesda"           "Olney"             
##  [7] "Germantown"         "Kensington"         "Montgomery Village"
## [10] "Takoma Park"        "Chevy Chase"        "Burtonsville"      
## [13] "Brinklow"           "Wheaton"            "North Bethesda"    
## [16] "Germantown M"       "Potomac"            "Boyds"             
## [19] "Sandy Spring"       "Damascus"           "Derwood"           
## [22] "Behesda"            "Aspen Hill"         "Gaitersburg"       
## [25] "Brookeville"        "Gaitherburgh"       "Silver Spring Md"  
## [28] "Hyattsvile"         "N/A"                "Mont. Village"     
## [31] "Boys"               "Sillver Spring"     "North Potomac"     
## [34] "Ashton"             "N.bethesda"         "Oxen Hill"         
## [37] "Fort Washington"    "Montgomery V"       "Darnestown"        
## [40] "Cabin John"         "Washington Grove"   "Hyattsville"       
## [43] "Garrett Park"       "Laytonsville"       "Nbethesda"         
## [46] "Silver  Spring"     "Spencerville"       "N. Bethesda"       
## [49] "Temple Hills"       "Washington"         "Npotomac"          
## [52] "Glen Echo"          "Adelphi"            "Mount Airy"        
## [55] "Poolesville"        "Highland"
  1. Fixing city names
comp_mt <- comp_mt |> 
  mutate(
    City = str_trim(City),  # Remove any leading or trailing whitespace
    City = str_replace_all(City, "Md$", ""), 
     City = str_replace_all(City, "M$", ""),  
     City = str_replace_all(City, "\\.", ""), 
     City = str_replace_all(City, "Behesda", "Bethesda"), 
     City = str_replace_all(City, "Gaithersburg|Gaitersburg|Gaitherburgh", "Gaithersburg"),
     City = str_replace_all(City, "N\\.?bethesda", "North Bethesda"),
     City = str_replace_all(City, "Hyattsvile", "Hyattsville"),
     City = str_replace_all(City, "Sillver Spring", "Silver Spring"),
     City = str_replace_all(City, "Npotomac", "North Potomac"),
     City = str_replace_all(City, "Boys", "Boyds"),
     City = str_replace_all(City, "Mont\\..*", "Montgomery Village"), 
     City = str_replace_all(City, "Dilkerson", "Dickerson"),
     City = str_replace_all(City, "^N/A$", NA_character_) 
  )
unique(comp_mt$City)
##  [1] "Gaithersburg"       "Clarksburg"         "Silver Spring"     
##  [4] "Rockville"          "Bethesda"           "Olney"             
##  [7] "Germantown"         "Kensington"         "Montgomery Village"
## [10] "Takoma Park"        "Chevy Chase"        "Burtonsville"      
## [13] "Brinklow"           "Wheaton"            "North Bethesda"    
## [16] "Germantown "        "Potomac"            "Boyds"             
## [19] "Sandy Spring"       "Damascus"           "Derwood"           
## [22] "Aspen Hill"         "Brookeville"        "Silver Spring "    
## [25] "Hyattsville"        NA                   "Mont Village"      
## [28] "North Potomac"      "Ashton"             "Oxen Hill"         
## [31] "Fort Washington"    "Montgomery V"       "Darnestown"        
## [34] "Cabin John"         "Washington Grove"   "Garrett Park"      
## [37] "Laytonsville"       "Silver  Spring"     "Spencerville"      
## [40] "N Bethesda"         "Temple Hills"       "Washington"        
## [43] "Glen Echo"          "Adelphi"            "Mount Airy"        
## [46] "Poolesville"        "Highland"
comp_mt <- comp_mt |> 
  mutate(Zip = str_extract(Zip, "\\d{5}"))
plot2 <- comp_mt |> 
ggplot(aes(x=`Date Filed`, y= `Process Length`))+
geom_point(aes(color = `Home Type`))+
  facet_wrap(~`Type of Complaint`)

plot2

plot2 <- comp_mt |> 
  ggplot(aes(x = `Date Closed`, y = `Process Length`)) +
  geom_point(aes(color = Pursuant)) +
  facet_wrap(~Pursuant) +
  scale_color_manual(values = wes_palette("Darjeeling1", n = length(unique(comp_mt$Pursuant)))) +
  theme_minimal()  
plot2

monthly_totals <- comp_mt |> 
  mutate(Month = floor_date(`Date Filed`, "month")) |>   
  group_by(Month, Pursuant) |> 
  summarise(Total_Cases = n(), .groups = 'drop')
Id_col <- comp_mt |> select(`Case Number`,`Process Length`, Latitude, Longitude)
comp_disp <- comp_prepivot |> 
  pivot_wider(
    id_cols = `Case Number`,               
    names_from = `Case Disposition`,  
    values_from = `Case Disposition`, 
    values_fill = list(`Case Disposition` = 0), 
    values_fn = list(`Case Disposition` = function(x) 1)
  )

comp_type <- comp_prepivot |> 
  pivot_wider(
    id_cols = `Case Number`,               
    names_from = `Type of Complaint`,  
    values_from = `Type of Complaint`, 
    values_fill = list(`Type of Complaint` = 0), 
    values_fn = list(`Type of Complaint` = function(x) 1)
  )

comp_pursuant <- comp_prepivot|> 
  pivot_wider(
    id_cols = `Case Number`,  
    names_from = Pursuant,    
    values_from = Pursuant, 
    values_fill = list(Pursuant = 0), 
    values_fn = list(Pursuant = function(x) 1)
  )


comp_wide <- Id_col |> 
  select(`Case Number`, `Process Length`, Latitude, Longitude) |>   
  distinct() |>  
  left_join(comp_disp, by = "Case Number") |> 
  left_join(comp_type, by = "Case Number") |>  
  left_join(comp_pursuant, by = "Case Number") 
aggregated_latlong <- comp_wide %>%
  group_by(Latitude, Longitude) %>%
  summarise(across(where(is.numeric), sum, na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(where(is.numeric), sum, na.rm = TRUE)`.
## ℹ In group 1: `Latitude = 38.75397` and `Longitude = -76.98503`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
# aggregated_comp <- aggregated_comp |> select(!`Case Number`)

ZIP APPROACH - Pivot the longest

comp_zip_id <- comp_mt |> select(`Zip`,`Process Length`,`Case Number`)
zip_disp <- comp_prepivot |> 
  pivot_wider(
    id_cols = `Case Number`,               
    names_from = `Case Disposition`,  
    values_from = `Case Disposition`, 
    values_fill = list(`Case Disposition` = 0), 
    values_fn = list(`Case Disposition` = function(x) 1)
  )

zip_type <- comp_prepivot |> 
  pivot_wider(
    id_cols = `Case Number`,               
    names_from = `Type of Complaint`,  
    values_from = `Type of Complaint`, 
    values_fill = list(`Type of Complaint` = 0), 
    values_fn = list(`Type of Complaint` = function(x) 1)
  )

zip_pursuant <- comp_prepivot|> 
  pivot_wider(
    id_cols = `Case Number`,  
    names_from = Pursuant,    
    values_from = Pursuant, 
    values_fill = list(Pursuant = 0), 
    values_fn = list(Pursuant = function(x) 1)
  )


comp_zip_wide <- comp_zip_id |> 
  select(`Zip`, `Process Length`,`Case Number`) |>   
  distinct() |>  
  left_join(zip_disp, by = "Case Number") |> 
  left_join(zip_type, by = "Case Number") |>  
  left_join(zip_pursuant, by = "Case Number") 
glimpse(comp_zip_wide)
## Rows: 10,789
## Columns: 41
## $ Zip                                                    <chr> "20877", "20871…
## $ `Process Length`                                       <dbl> 2, 29, 40, 83, …
## $ `Case Number`                                          <dbl> 48204, 48121, 4…
## $ `Status: Referred`                                     <dbl> 1, 0, 0, 0, 0, …
## $ `Status: Favourable Settlement`                        <dbl> 0, 1, 0, 1, 1, …
## $ `Status: Withdrawn or incomplete`                      <dbl> 0, 0, 1, 0, 0, …
## $ `Status: Unfavourable Settlement`                      <dbl> 0, 0, 0, 0, 0, …
## $ `Status: No jurisdiction`                              <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Legal action`                                 <dbl> 0, 0, 0, 0, 0, …
## $ `Status: No Jurisdiction`                              <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Complaint noted`                              <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Refer to Code Enforcement`                    <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Final walk through`                           <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Lease provision honored`                      <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Lease completion`                             <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Case mistakenly opened`                       <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Referred to a Federal agency`                 <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Case Error`                                   <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Appealed to Circuit Court`                    <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Citation Issued`                              <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Quit and Vacate`                               <dbl> 1, 1, 1, 0, 0, …
## $ `Cause: Payment Dispute`                               <dbl> 0, 0, 0, 1, 0, …
## $ `Cause: Security Deposit`                              <dbl> 0, 0, 0, 0, 1, …
## $ `Cause: Lease Agreement`                               <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Infestation`                                   <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Other`                                         <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Harassment`                                    <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Eviction`                                      <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: General Maintenance`                           <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Other-Miscellaneous`                           <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: No Jurisdiction`                               <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Moisture damage`                               <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Plumbing and Pipes`                            <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Retaliatory Eviction`                          <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Essential Appliances`                          <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Management`                                    <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Homeless Prevention and Relocation Assistance` <dbl> 0, 0, 0, 0, 0, …
## $ `NA`                                                   <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Walk through Inspections`                      <dbl> 0, 0, 0, 0, 0, …
## $ Landlord                                               <dbl> 1, 0, 0, 0, 0, …
## $ Tenant                                                 <dbl> 0, 1, 1, 1, 1, …
# Aggregating data by ZIP code
zip_summary <- comp_zip_wide |> 
  group_by(Zip) |> 
  summarize(
    Total_Processes = n(),
    Total_Process_Length = sum(`Process Length`, na.rm = TRUE),
    Median_Process_Length = median(`Process Length`, na.rm = TRUE),
    Total_Tenant_Cases = sum(Tenant, na.rm = TRUE),
    Total_Landlord_Cases = sum(Landlord, na.rm = TRUE),
    .groups = 'drop'  
  )

zip_summary <- zip_summary |>
  mutate(
    total_cases = Total_Tenant_Cases + Total_Landlord_Cases,  
    tenant_vs_landlord = (Total_Tenant_Cases / total_cases) 
  )
zip_comp_summary <- comp_mt %>%
  group_by(Zip, `Type of Complaint`) %>%
  summarize(
    `Process Total` = sum(`Process Length`, na.rm = TRUE), 
    `Process Median` = median(`Process Length`, na.rm = TRUE), 
    Cases = n(),
    `Tenant Cases` = sum(Pursuant == "Tenant", na.rm = TRUE), 
    `Landlord Cases` = sum(Pursuant == "Landlord", na.rm = TRUE) 
  ) %>%
  arrange(Zip, `Type of Complaint`)
## `summarise()` has grouped output by 'Zip'. You can override using the `.groups`
## argument.
# Assuming 'data' is your dataset
comp_zip_widest <- comp_zip_wide |> select(!c(`Process Length`, `Case Number`))

zip_totals <- comp_zip_widest %>%
  group_by(Zip) |> 
  summarize(across(where(is.numeric), sum, na.rm = TRUE), .groups = "drop")

comp_zip_fin <- left_join(zip_totals, zip_summary, by = "Zip")

Create percentages:

head(comp_zip_fin)
## # A tibble: 6 × 46
##   Zip   `Status: Referred` Status: Favourable Settlemen…¹ Status: Withdrawn or…²
##   <chr>              <dbl>                          <dbl>                  <dbl>
## 1 20008                  0                              0                      0
## 2 20017                  0                              0                      0
## 3 20032                  0                              0                      0
## 4 20314                  0                              0                      0
## 5 20744                  0                              0                      0
## 6 20745                  0                              0                      0
## # ℹ abbreviated names: ¹​`Status: Favourable Settlement`,
## #   ²​`Status: Withdrawn or incomplete`
## # ℹ 42 more variables: `Status: Unfavourable Settlement` <dbl>,
## #   `Status: No jurisdiction` <dbl>, `Status: Legal action` <dbl>,
## #   `Status: No Jurisdiction` <dbl>, `Status: Complaint noted` <dbl>,
## #   `Status: Refer to Code Enforcement` <dbl>,
## #   `Status: Final walk through` <dbl>, …
comp_zip_fin <- comp_zip_fin |> 
  mutate(across(
    2:37,  
    ~ . / sum(., na.rm = TRUE) * 100, 
    .names = "perc_{col}" 
  ))

Adding Zip codes from precleaned set

zip_codes <- st_read("/Users/gimle/Desktop/Data 110/Datasets 110/ZIPCODES_20240411")
## Reading layer `geo_export_36683266-22fc-450a-8b12-0aae59abf871' from data source `/Users/gimle/Desktop/Data 110/Datasets 110/ZIPCODES_20240411' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 97 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: -77.52768 ymin: 38.93424 xmax: -76.88764 ymax: 39.35426
## Geodetic CRS:  WGS84(DD)

Group by zipcode (NOT READY FOR PRIME TIME)

AGGREGATE DATA WITH ZIP & MAKE TROUBLED BY ZIP

zip_codes$mail_city <- str_to_title(tolower(zip_codes$mail_city))
zip_codes <- zip_codes |> rename(City = mail_city)
zip <- zip_codes |> rename(Zip = zipcode)
# Group by zip! 
summary_troubled <- troubled_clean |> 
  group_by(Zip) |> 
  summarise(
    Total_Units = sum(Unit.Count, na.rm = TRUE),
    Units_Inspected = sum(Units.Inspected, na.rm = TRUE),
    Average_Severity_Rate = weighted.mean(Severity.Index, Unit.Count, na.rm = TRUE)
  )
# Calculate the number of units per Analysis Rating category
rating_summary <- troubled_clean |> 
  group_by(Zip, Analysis.Rating) |> 
  summarise(
    Units = sum(Unit.Count, na.rm = TRUE),
    .groups = 'drop'
  ) |> 
  pivot_wider(
    names_from = Analysis.Rating,
    values_from = Units,
    values_fill = list(Units = 0)
  ) |> 
  rename_with(~ paste0(., "_Units"), everything())
rating_summary <- rating_summary |> rename(Zip = Zip_Units)
mean_value_l <- mean(unique_landlord_cases$`Process Length`, na.rm = TRUE)  # na.rm removes NA values before calculation
median_value_l <- median(unique_landlord_cases$`Process Length`, na.rm = TRUE)

mean_value <- mean(unique_tenant_cases$`Process Length`, na.rm = TRUE)  # na.rm removes NA values before calculation
median_value <- median(unique_tenant_cases$`Process Length`, na.rm = TRUE)

# Print the results
cat("Mean:", mean_value, "\n")
## Mean: 78.24682
cat("Median:", median_value, "\n")
## Median: 38
cat("Mean:", mean_value_l, "\n")
## Mean: 35.58834
cat("Median:", median_value_l, "\n")
## Median: 10

Wait times by category

plot2 <- comp_mt |> 
ggplot(aes(x=`Date Filed`, y= `Process Length`))+
geom_point(aes(color = `Home Type`))+
  facet_wrap(~`Type of Complaint`)

plot2