This is the data cleaning, filtering and transformation document for my final project in data 110.
In this document I pay special attention to cleaning the housing complaints dataset.
This document is linked to
Fix zip, join by zip, then join by troubled.
Create graph of average wait time based on cause.
sources:
https://www.peoples-law.org/rent-court-eviction
https://apps.montgomerycountymd.gov/DHCA-OLTA/
# for loading our data
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sf)
## Linking to GEOS 3.11.0, GDAL 3.5.3, PROJ 9.1.0; sf_use_s2() is TRUE
# for plotting
library(leaflet)
library(leaflet.extras)
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
# for more stuff
library(dbscan)
##
## Attaching package: 'dbscan'
##
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(OpenStreetMap)
library(geosphere)
library(ggmap)
## ℹ Google's Terms of Service: <https://mapsplatform.google.com>
## Stadia Maps' Terms of Service: <https://stadiamaps.com/terms-of-service/>
## OpenStreetMap's Tile Usage Policy: <https://operations.osmfoundation.org/policies/tiles/>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
##
## Attaching package: 'ggmap'
##
##
## The following object is masked from 'package:plotly':
##
## wind
library(RColorBrewer)
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(flexdashboard)
library(ggthemes)
library(wesanderson)
troubled_clean <- read_csv("/Users/gimle/Desktop/Data 110/Data 110: Project 2/montgomery_troubled_housing.csv")
## Rows: 673 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Community.Name, Street.Address, City, Analysis.Rating
## dbl (17): License.Number, Zip, Case.Number, Longitude, Latitude, Inspection...
## dttm (1): First.Inspection.Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
troubled_clean <- troubled_clean |>
mutate(
Inspection.Frequency = as.numeric(gsub("[^0-9.]", "", Inspection.Frequency)),
Unit.Count = as.numeric(gsub("[^0-9.]", "", Unit.Count)),
Average.Violations.Per.Unit = as.numeric(gsub("[^0-9.]", "", Average.Violations.Per.Unit)),
Units.Inspected = as.numeric(gsub("[^0-9.]", "", Units.Inspected)),
Severity.Index = as.numeric(gsub("[^0-9.]", "", Severity.Index)),
No.Violations.Observed = as.numeric(gsub("[^0-9.]", "", No.Violations.Observed)),
Infested.Units.Percentage = as.numeric(gsub("[^0-9.]", "", Infested.Units.Percentage)),
Units.with.Mold = as.numeric(gsub("[^0-9.]", "", Units.with.Mold)),
Longitude = as.numeric(gsub("[^0-9.]", "", Longitude)),
Latitude = as.numeric(gsub("[^0-9.]", "", Latitude))
)
# Needed to fix the timestamp again:
troubled_clean$First.Inspection.Date <- as.POSIXct(troubled_clean$First.Inspection.Date, format="%Y-%m-%dT%H:%M:%SZ", tz="UTC")
# Fixing timestamp error
troubled_clean <- troubled_clean |>
mutate(First.Inspection.Date = ymd(First.Inspection.Date)) |>
filter(First.Inspection.Date >= ymd("2016-01-01") & First.Inspection.Date <= ymd("2024-12-31"))
# Got rid of the negative sign in front of the longitude by mistake
troubled_clean$Longitude <- -abs(troubled_clean$Longitude)
# Removing outlier placed in west virginia
troubled_clean <- troubled_clean |>
filter(Longitude >= -77.36482 & Longitude <= -76.93023)
complaints <- read_csv("/Users/gimle/Desktop/Data 110/Datasets 110/Housing_Landlord-Tenant_Disputes_20240411.csv")
## Rows: 11155 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (8): City, Zip, Date Filed, Date Closed, Case Type, Type of Complaint, C...
## dbl (1): Case Number
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
complaints$`Date Filed` <- as.POSIXct(complaints$`Date Filed`, format = "%m/%d/%Y", tz = "UTC")
complaints$`Date Closed` <- as.POSIXct(complaints$`Date Closed`, format = "%m/%d/%Y", tz = "UTC")
complaints <- complaints |>
mutate(
`Process Length` = as.numeric(difftime(`Date Closed`, `Date Filed`, units = "days"))
)
complaints$City <- str_to_title(tolower(complaints$City))
complaints <- complaints |>
mutate(
coordinates = str_extract(Location, "\\([^)]+\\)"),
coordinates = str_remove_all(coordinates, "[\\(\\)]")
)
complaints <- complaints |>
select(!Location)
complaints <- complaints |>
separate(col = coordinates, into = c("Latitude", "Longitude"), sep = ", ", convert = TRUE)
unique(complaints$City)
## [1] "Gaithersburg" "Clarksburg" "Silver Spring"
## [4] "Rockville" "Bethesda" "Olney"
## [7] "Germantown" "Kensington" "Montgomery Village"
## [10] "Takoma Park" "Chevy Chase" "Burtonsville"
## [13] "Brinklow" "Wheaton" "North Bethesda"
## [16] "Germantown M" "Potomac" "Boyds"
## [19] "Sandy Spring" "Damascus" "Derwood"
## [22] "Glenmont" "Beltsville" "Behesda"
## [25] "Poolesville" "Aspen Hill" "Hyattsville"
## [28] "Gaitersburg" "Brookeville" "Gaitherburgh"
## [31] "Silver Spring Md" "Lanham" "Hyattsvile"
## [34] "N/A" "North Potomac" "Mont. Village"
## [37] "Boys" "Sillver Spring" "Dickerson"
## [40] "Ashton" "Comus" "N.bethesda"
## [43] "Oxen Hill" "Fort Washington" "Montgomery V"
## [46] "Beallsville" "Darnestown" "Cabin John"
## [49] "Spring" "Washington Grove" "Montpelier"
## [52] "Garrett Park" "Laytonsville" "Barnesville"
## [55] "Nbethesda" "Silver Spring" "Baltimore"
## [58] "Frederick" "Eilderstadtgermany" "Annapolis"
## [61] "Spencerville" "College Park" "N. Bethesda"
## [64] "Temple Hills" "Midlothian" "Odenton"
## [67] "Washington" "Dilkerson" "Cumberland"
## [70] "Npotomac" "Glen Echo" "Abingdon"
## [73] "Randallstown" "Cabin Jonh" "Cambridge"
## [76] "Adelphi" "District Heights" "Mount Airy"
## [79] "Highland" "Mchenry"
# Who is complaining from Filderstadt Germany???
# There is a lot here.
complaints <- complaints |>
filter(!(`Case Type` %in% c("OLTA-ADMIN", "OLTA-INSPECTION", "OLTA-LEASE REVIEW", "OLTA-MCMF", "OLTA-ACCESST")))
complaints <- complaints |>
mutate(`Case Type` = str_remove_all(`Case Type`, "OLTA-"))
complaints <- complaints |>
mutate(
Pursuant = case_when(
substr(`Case Type`, 1, 1) == "T" ~ "Tenant",
substr(`Case Type`, 1, 1) == "L" ~ "Landlord",
TRUE ~ NA_character_
)
)
complaints <- complaints |>
mutate(
`Home Type` = case_when(
substr(`Case Type`, nchar(`Case Type`)-1, nchar(`Case Type`)) == "SF" ~ "Single Family",
substr(`Case Type`, nchar(`Case Type`)-1, nchar(`Case Type`)) == "MF" ~ "Multi Family",
TRUE ~ NA_character_
)
)
complaints <- complaints |>
select(!`Case Type`)
Removing cases that are not in montgomery county
comp_coord <- complaints |>
filter(Longitude >= -77.36482 & Longitude <= -76.93023)
11.Remove: duplicate cases, non-jurisdiction cases,
No jurisdiction cases, complaint withdrawn, Case mistakenly opened,
remove cases
comp_coord <- comp_coord |>
filter(!(`Case Disposition` %in% c("Duplicate Case","No Jurisdiction-Geographical")))
comp_coord <- comp_coord |>
filter(`Process Length`>= 0)
unique_tenant_cases <- comp_coord |>
filter(Pursuant == "Tenant") |>
distinct()
unique(unique_tenant_cases$`Type of Complaint`)
## [1] "Quit and Vacate"
## [2] "Rent Increase"
## [3] "Security Deposit"
## [4] "Lease Agreement Tenant Rights"
## [5] "Maintenance - Infestation"
## [6] "Ratio Utility Billing"
## [7] "Noise"
## [8] "Fee"
## [9] "Rent Payment History"
## [10] "Lease Agreement"
## [11] "Harassment"
## [12] "Eviction"
## [13] "Failure to Pay Rent"
## [14] "Lease Agreement-Early Termination"
## [15] "Maintenance- General"
## [16] "Other-Miscellaneous"
## [17] "Money Damage - General"
## [18] "No Jurisdiction - Room Rental"
## [19] "Maintenance - Painting and Carpeting"
## [20] "Maintenance - Mold/Mildew"
## [21] "Maintenance - Plumbing"
## [22] "Retaliatory Eviction"
## [23] "No Jurisdiction-Geographical"
## [24] "Parking"
## [25] "Maintenance - Appliance"
## [26] "Maintenance - Roof Leak"
## [27] "Reduction in Services"
## [28] "Maintenance - Air Conditioning"
## [29] "Maintenance - Sewer Backup"
## [30] "Management"
## [31] "Application Deposit"
## [32] "Maintenance - Heat"
## [33] "Lease Review"
## [34] "Money Damages - Fire"
## [35] "Maintenance - Electrical"
## [36] "Maintenance - Rubbish & Garbage Removal"
## [37] "Homeless Prevention and Relocation Assistance"
## [38] "No Jurisdiction - Tenant Vs. Tenant"
## [39] "Maintenance - Water Damages"
## [40] "Maintenance - Hot/Cold Water"
## [41] "Miscellaneous/Foreclosure, T want to buy"
## [42] "N/A"
## [43] NA
## [44] "Rent"
## [45] "Rental Credit Report"
## [46] "Rule Violations-Entering without notice"
## [47] "Common Area Use"
## [48] "Walk through Inspections"
## [49] "Lease Agreement-Early termination"
## [50] "Renovation"
## [51] "Submetering"
## [52] "Money Damages - Spoiled Food"
## [53] "No Jurisdiction - purchase of property"
## [54] "Late Fee"
## [55] "Notice to Vacate"
## [56] "Pets"
## [57] "ACC"
## [58] "Referred to OCP"
## [59] "No Jurisdiction - Embassy"
## [60] "Conversion"
## [61] "Maintenance - Snow/Ice Removal"
unique_landlord_cases <- comp_coord |>
filter(Pursuant == "Landlord") |>
distinct()
unique(unique_landlord_cases$`Type of Complaint`)
## [1] "Quit and Vacate"
## [2] "Failure to Pay Rent"
## [3] "Lease Agreement"
## [4] "Other-Miscellaneous"
## [5] "Fee"
## [6] "Security Deposit"
## [7] "Lease Agreement-Early Termination"
## [8] "Rent Payment History"
## [9] "Lease Review"
## [10] "Maintenance- General"
## [11] "N/A"
## [12] "Money Damages - Fire"
## [13] "Eviction"
## [14] "Late Fee"
## [15] "No Jurisdiction-Geographical"
## [16] "Rent"
## [17] "Pets"
## [18] "No Jurisdiction - Room Rental"
## [19] "Money Damage - General"
## [20] "Harassment"
## [21] "Maintenance - Infestation"
## [22] "Notice to Vacate"
## [23] "Ratio Utility Billing"
## [24] "Maintenance - Mold/Mildew"
## [25] "Common Area Use"
## [26] "Rent Increase"
## [27] "Maintenance - Electrical"
## [28] "Lease Agreement Tenant Rights"
## [29] "Walk through Inspections"
## [30] "Rule Violations-Entering without notice"
## [31] "Maintenance - Air Conditioning"
## [32] "Maintenance - Plumbing"
## [33] NA
## [34] "Maintenance - Water Damages"
## [35] "Money Damages - Spoiled Food"
## [36] "Noise"
## [37] "Parking"
## [38] "Management"
## [39] "Miscellaneous/Foreclosure, T want to buy"
unique_mf_cases <- comp_coord |>
filter(`Home Type` == "Multi Family") |>
distinct()
unique(unique_mf_cases$`Type of Complaint`)
## [1] "Quit and Vacate"
## [2] "Security Deposit"
## [3] "Lease Agreement Tenant Rights"
## [4] "Maintenance - Infestation"
## [5] "Ratio Utility Billing"
## [6] "Noise"
## [7] "Fee"
## [8] "Rent Payment History"
## [9] "Eviction"
## [10] "Rent Increase"
## [11] "Failure to Pay Rent"
## [12] "Lease Agreement-Early Termination"
## [13] "Lease Agreement"
## [14] "Maintenance- General"
## [15] "Other-Miscellaneous"
## [16] "Money Damage - General"
## [17] "Maintenance - Painting and Carpeting"
## [18] "Maintenance - Plumbing"
## [19] "Retaliatory Eviction"
## [20] "No Jurisdiction-Geographical"
## [21] "Maintenance - Mold/Mildew"
## [22] "Maintenance - Appliance"
## [23] "Maintenance - Roof Leak"
## [24] "Harassment"
## [25] "Reduction in Services"
## [26] "Maintenance - Air Conditioning"
## [27] "Maintenance - Sewer Backup"
## [28] "Management"
## [29] "Application Deposit"
## [30] "Maintenance - Heat"
## [31] "Lease Review"
## [32] "Parking"
## [33] "Maintenance - Electrical"
## [34] "Maintenance - Rubbish & Garbage Removal"
## [35] "No Jurisdiction - Tenant Vs. Tenant"
## [36] "Maintenance - Water Damages"
## [37] "Maintenance - Hot/Cold Water"
## [38] "Homeless Prevention and Relocation Assistance"
## [39] "Money Damages - Fire"
## [40] "Miscellaneous/Foreclosure, T want to buy"
## [41] "N/A"
## [42] NA
## [43] "Rent"
## [44] "Rental Credit Report"
## [45] "Rule Violations-Entering without notice"
## [46] "Common Area Use"
## [47] "No Jurisdiction - Room Rental"
## [48] "Lease Agreement-Early termination"
## [49] "Renovation"
## [50] "Submetering"
## [51] "Money Damages - Spoiled Food"
## [52] "Late Fee"
## [53] "Notice to Vacate"
## [54] "Pets"
## [55] "Walk through Inspections"
## [56] "Conversion"
## [57] "Maintenance - Snow/Ice Removal"
unique_sf_cases <- comp_coord |>
filter(`Home Type` == "Single Family") |>
distinct()
unique(unique_sf_cases$`Type of Complaint`)
## [1] "Quit and Vacate"
## [2] "Rent Increase"
## [3] "Lease Agreement Tenant Rights"
## [4] "Security Deposit"
## [5] "Maintenance - Infestation"
## [6] "Lease Agreement"
## [7] "Harassment"
## [8] "Failure to Pay Rent"
## [9] "Other-Miscellaneous"
## [10] "No Jurisdiction - Room Rental"
## [11] "Maintenance - Mold/Mildew"
## [12] "Fee"
## [13] "Lease Agreement-Early Termination"
## [14] "Maintenance- General"
## [15] "Parking"
## [16] "Maintenance - Appliance"
## [17] "Rent Payment History"
## [18] "Maintenance - Air Conditioning"
## [19] "Reduction in Services"
## [20] "Money Damages - Fire"
## [21] "Money Damage - General"
## [22] "Homeless Prevention and Relocation Assistance"
## [23] "No Jurisdiction-Geographical"
## [24] "Eviction"
## [25] "Lease Review"
## [26] "N/A"
## [27] "Maintenance - Water Damages"
## [28] "Miscellaneous/Foreclosure, T want to buy"
## [29] "Maintenance - Roof Leak"
## [30] "Maintenance - Electrical"
## [31] "Maintenance - Plumbing"
## [32] "Walk through Inspections"
## [33] "Maintenance - Heat"
## [34] "Retaliatory Eviction"
## [35] NA
## [36] "Late Fee"
## [37] "Lease Agreement-Early termination"
## [38] "Rent"
## [39] "Pets"
## [40] "No Jurisdiction - purchase of property"
## [41] "Maintenance - Hot/Cold Water"
## [42] "Ratio Utility Billing"
## [43] "Notice to Vacate"
## [44] "Money Damages - Spoiled Food"
## [45] "No Jurisdiction - Tenant Vs. Tenant"
## [46] "Application Deposit"
## [47] "Common Area Use"
## [48] "Rental Credit Report"
## [49] "Rule Violations-Entering without notice"
## [50] "ACC"
## [51] "Maintenance - Sewer Backup"
## [52] "Noise"
## [53] "Referred to OCP"
## [54] "No Jurisdiction - Embassy"
## [55] "Management"
## [56] "Maintenance - Painting and Carpeting"
Moisture damage “Maintenance - Water Damages” “Maintenance - Mold/Mildew”
General Maintenace “Maintenance - Rubbish & Garbage Removal” “Maintenance- General” “Maintenance - Snow/Ice Removal” “Maintenance - Roof Leak” “Maintenance - Painting and Carpeting”
Plumbing and Pipes “Maintenance - Plumbing”
“Maintenance - Sewer Backup” “Maintenance - Hot/Cold Water”
Essential Applicances “Maintenance - Appliance” “Maintenance - Air Conditioning” “Maintenance - Heat” “Maintenance - Electrical”
comp_mt<- comp_coord |>
mutate(`Type of Complaint` = case_when(
`Type of Complaint` %in% c("Maintenance - Water Damages", "Maintenance - Mold/Mildew") ~ "Moisture damage",
`Type of Complaint` %in% c("Maintenance - Rubbish & Garbage Removal", "Maintenance- General",
"Maintenance - Snow/Ice Removal", "Maintenance - Roof Leak",
"Maintenance - Painting and Carpeting") ~ "General Maintenance",
`Type of Complaint` %in% c("Maintenance - Plumbing", "Maintenance - Sewer Backup",
"Maintenance - Hot/Cold Water") ~ "Plumbing and Pipes",
`Type of Complaint` %in% c("Maintenance - Appliance", "Maintenance - Air Conditioning",
"Maintenance - Heat", "Maintenance - Electrical") ~ "Essential Appliances",
`Type of Complaint` %in% c("Maintenance - Infestation") ~ "Infestation",
TRUE ~ `Type of Complaint`
))
Money Dispute “Application Deposit” “Rent” “Fee” “Rental Credit Report” “Submetering” “Money Damages - Spoiled Food” “Ratio Utility Billing” “Rent Increase” “Money Damages - Fire” “Failure to Pay Rent” “Rent Payment History” “Late Fee”
No Jurisdiction “No Jurisdiction - Embassy” “No Jurisdiction - Tenant Vs. Tenant” “No Jurisdiction - Room Rental” “No Jurisdiction-Geographical” “No Jurisdiction - purchase of property”
Lease Dispute “Lease Review” “Lease Agreement Tenant Rights” “Lease Agreement-Early termination” “Lease Agreement”
comp_mt <- comp_mt |>
mutate(
`Type of Complaint` = case_when(
`Type of Complaint` %in% c("Application Deposit", "Rent", "Fee", "Rental Credit Report", "Submetering",
"Money Damages - Spoiled Food", "Ratio Utility Billing", "Rent Increase",
"Money Damages - Fire", "Failure to Pay Rent", "Rent Payment History",
"Late Fee", "Money Damage - General") ~ "Payment Dispute",
`Type of Complaint` %in% c("No Jurisdiction - Embassy", "No Jurisdiction - Tenant Vs. Tenant",
"No Jurisdiction - Room Rental", "No Jurisdiction-Geographical",
"No Jurisdiction - purchase of property") ~ "No Jurisdiction",
`Type of Complaint` %in% c("Lease Review", "Lease Agreement Tenant Rights", "Lease Agreement-Early termination", "Lease Agreement-Early Termination",
"Lease Agreement","Notice to Vacate","Reduction in Services") ~ "Lease Agreement",
TRUE ~ `Type of Complaint`
)
)
comp_mt <- comp_mt |>
mutate(
`Type of Complaint` = case_when(
`Type of Complaint` %in% c("Noise","Parking","Pets","Common Area Use","N/A", "Miscellaneous/Foreclosure, T want to buy", "Conversion", "Renovation", "ACC", "Referred to OCP" ) ~ "Other",
TRUE ~ `Type of Complaint`
))
comp_mt <- comp_mt |>
mutate(
`Type of Complaint` = case_when(
`Type of Complaint` %in% c("Rule Violations-Entering without notice","Harassment") ~ "Harassment",
TRUE ~ `Type of Complaint`
))
comp_mt <- comp_mt |>
mutate(
`Type of Complaint` = case_when(
`Type of Complaint` %in% c("Notice to Vacate","Quit and Vacate") ~ "Quit and Vacate",
TRUE ~ `Type of Complaint`
))
unique(comp_mt$`Type of Complaint`)
## [1] "Quit and Vacate"
## [2] "Payment Dispute"
## [3] "Security Deposit"
## [4] "Lease Agreement"
## [5] "Infestation"
## [6] "Other"
## [7] "Harassment"
## [8] "Eviction"
## [9] "General Maintenance"
## [10] "Other-Miscellaneous"
## [11] "No Jurisdiction"
## [12] "Moisture damage"
## [13] "Plumbing and Pipes"
## [14] "Retaliatory Eviction"
## [15] "Essential Appliances"
## [16] "Management"
## [17] "Homeless Prevention and Relocation Assistance"
## [18] NA
## [19] "Walk through Inspections"
Referred “Referred to a MD State Government agency” “Referred to a Montgomery County Agency” “Referred to States Attorney’s Office” “Other referral (private attorney/legal aid/etc.)” “Referred to HOC” “Case Referred to another agency, not on disclosure record”
No Jurisdiction “Non-jurisdiction-Room Rental” “Non-jurisdiction-Cooperative housing unit” “Non-jurisdiction-Geographic Location” “Non-jurisdiction-Rental Application-Referred to OCA”
Complaint noted “Complaint noted for the record”
Favourable Settlement “Settlement Agreement” “Goods delivered or replaced” “Service performed or corrected” “Refund (payments/deposits)” “Relocation payments” “Extended Tenancy” “Referred to Landlord-Tenant Commission” “Refund/Rent Credit”
Unfavourable Settlement “Acceptable explanation” “Dismissed by Landlord-Tenant Commission”
Case Error “Case can not be found” “Case mistakenly closed”
Withdrawn or incomplete “Complaint withdrawn” “Other- no communication from tenant” “Case Reopened” “Case delete from record per supverisor”
Legal action “Legal Action Filed”
comp_mt <- comp_mt |>
mutate(
`Case Disposition` = case_when(
`Case Disposition` %in% c("Referred to a MD State Government agency",
"Referred to a Montgomery County Agency",
"Referred to States Attorney's Office",
"Other referral (private attorney/legal aid/etc.)",
"Referred to HOC",
"Case Referred to another agency, not on disclosure record") ~ "Referred",
`Case Disposition` %in% c("Non-jurisdiction-Room Rental",
"Non-jurisdiction-Cooperative housing unit",
"Non-jurisdiction-Geographic Location",
"Non-jurisdiction-Rental Application-Referred to OCA") ~ "No Jurisdiction",
`Case Disposition` == "Complaint noted for the record" ~ "Complaint noted",
`Case Disposition` %in% c("Settlement Agreement",
"Goods delivered or replaced",
"Service performed or corrected",
"Refund (payments/deposits)",
"Relocation payments",
"Extended Tenancy",
"Referred to Landlord-Tenant Commission",
"Refund/Rent Credit") ~ "Favourable Settlement",
`Case Disposition` %in% c("Acceptable explanation",
"Dismissed by Landlord-Tenant Commission") ~ "Unfavourable Settlement",
`Case Disposition` %in% c("Case can not be found",
"Case mistakenly closed") ~ "Case Error",
`Case Disposition` %in% c("Complaint withdrawn",
"Other- no communication from tenant",
"Case Reopened",
"Case delete from record per supverisor") ~ "Withdrawn or incomplete",
`Case Disposition` == "Legal Action Filed" ~ "Legal action",
TRUE ~ `Case Disposition`
)
)
#Pre pivot prep
comp_prepivot <- comp_mt |>
mutate(`Type of Complaint` = str_c("Cause: ", `Type of Complaint`))
comp_prepivot <- comp_prepivot |>
mutate(`Case Disposition` = str_c("Status: ", `Case Disposition`))
comp_prepivot <- comp_prepivot |>
select(!City )
head(comp_mt)
## # A tibble: 6 × 12
## `Case Number` City Zip `Date Filed` `Date Closed`
## <dbl> <chr> <chr> <dttm> <dttm>
## 1 48204 Gaithersburg 20877 2023-07-05 00:00:00 2023-07-07 00:00:00
## 2 48121 Clarksburg 20871 2023-05-18 00:00:00 2023-06-16 00:00:00
## 3 48153 Silver Spring 20906 2023-06-07 00:00:00 2023-07-17 00:00:00
## 4 48079 Rockville 20850 2023-04-21 00:00:00 2023-07-13 00:00:00
## 5 48133 Rockville 20852 2023-05-25 00:00:00 2023-07-25 00:00:00
## 6 47888 Bethesda 20817 2023-03-02 00:00:00 2023-07-13 00:00:00
## # ℹ 7 more variables: `Type of Complaint` <chr>, `Case Disposition` <chr>,
## # `Process Length` <dbl>, Latitude <dbl>, Longitude <dbl>, Pursuant <chr>,
## # `Home Type` <chr>
unique(comp_mt$City)
## [1] "Gaithersburg" "Clarksburg" "Silver Spring"
## [4] "Rockville" "Bethesda" "Olney"
## [7] "Germantown" "Kensington" "Montgomery Village"
## [10] "Takoma Park" "Chevy Chase" "Burtonsville"
## [13] "Brinklow" "Wheaton" "North Bethesda"
## [16] "Germantown M" "Potomac" "Boyds"
## [19] "Sandy Spring" "Damascus" "Derwood"
## [22] "Behesda" "Aspen Hill" "Gaitersburg"
## [25] "Brookeville" "Gaitherburgh" "Silver Spring Md"
## [28] "Hyattsvile" "N/A" "Mont. Village"
## [31] "Boys" "Sillver Spring" "North Potomac"
## [34] "Ashton" "N.bethesda" "Oxen Hill"
## [37] "Fort Washington" "Montgomery V" "Darnestown"
## [40] "Cabin John" "Washington Grove" "Hyattsville"
## [43] "Garrett Park" "Laytonsville" "Nbethesda"
## [46] "Silver Spring" "Spencerville" "N. Bethesda"
## [49] "Temple Hills" "Washington" "Npotomac"
## [52] "Glen Echo" "Adelphi" "Mount Airy"
## [55] "Poolesville" "Highland"
comp_mt <- comp_mt |>
mutate(
City = str_trim(City), # Remove any leading or trailing whitespace
City = str_replace_all(City, "Md$", ""),
City = str_replace_all(City, "M$", ""),
City = str_replace_all(City, "\\.", ""),
City = str_replace_all(City, "Behesda", "Bethesda"),
City = str_replace_all(City, "Gaithersburg|Gaitersburg|Gaitherburgh", "Gaithersburg"),
City = str_replace_all(City, "N\\.?bethesda", "North Bethesda"),
City = str_replace_all(City, "Hyattsvile", "Hyattsville"),
City = str_replace_all(City, "Sillver Spring", "Silver Spring"),
City = str_replace_all(City, "Npotomac", "North Potomac"),
City = str_replace_all(City, "Boys", "Boyds"),
City = str_replace_all(City, "Mont\\..*", "Montgomery Village"),
City = str_replace_all(City, "Dilkerson", "Dickerson"),
City = str_replace_all(City, "^N/A$", NA_character_)
)
unique(comp_mt$City)
## [1] "Gaithersburg" "Clarksburg" "Silver Spring"
## [4] "Rockville" "Bethesda" "Olney"
## [7] "Germantown" "Kensington" "Montgomery Village"
## [10] "Takoma Park" "Chevy Chase" "Burtonsville"
## [13] "Brinklow" "Wheaton" "North Bethesda"
## [16] "Germantown " "Potomac" "Boyds"
## [19] "Sandy Spring" "Damascus" "Derwood"
## [22] "Aspen Hill" "Brookeville" "Silver Spring "
## [25] "Hyattsville" NA "Mont Village"
## [28] "North Potomac" "Ashton" "Oxen Hill"
## [31] "Fort Washington" "Montgomery V" "Darnestown"
## [34] "Cabin John" "Washington Grove" "Garrett Park"
## [37] "Laytonsville" "Silver Spring" "Spencerville"
## [40] "N Bethesda" "Temple Hills" "Washington"
## [43] "Glen Echo" "Adelphi" "Mount Airy"
## [46] "Poolesville" "Highland"
comp_mt <- comp_mt |>
mutate(Zip = str_extract(Zip, "\\d{5}"))
plot2 <- comp_mt |>
ggplot(aes(x=`Date Filed`, y= `Process Length`))+
geom_point(aes(color = `Home Type`))+
facet_wrap(~`Type of Complaint`)
plot2
plot2 <- comp_mt |>
ggplot(aes(x = `Date Closed`, y = `Process Length`)) +
geom_point(aes(color = Pursuant)) +
facet_wrap(~Pursuant) +
scale_color_manual(values = wes_palette("Darjeeling1", n = length(unique(comp_mt$Pursuant)))) +
theme_minimal()
plot2
monthly_totals <- comp_mt |>
mutate(Month = floor_date(`Date Filed`, "month")) |>
group_by(Month, Pursuant) |>
summarise(Total_Cases = n(), .groups = 'drop')
Id_col <- comp_mt |> select(`Case Number`,`Process Length`, Latitude, Longitude)
comp_disp <- comp_prepivot |>
pivot_wider(
id_cols = `Case Number`,
names_from = `Case Disposition`,
values_from = `Case Disposition`,
values_fill = list(`Case Disposition` = 0),
values_fn = list(`Case Disposition` = function(x) 1)
)
comp_type <- comp_prepivot |>
pivot_wider(
id_cols = `Case Number`,
names_from = `Type of Complaint`,
values_from = `Type of Complaint`,
values_fill = list(`Type of Complaint` = 0),
values_fn = list(`Type of Complaint` = function(x) 1)
)
comp_pursuant <- comp_prepivot|>
pivot_wider(
id_cols = `Case Number`,
names_from = Pursuant,
values_from = Pursuant,
values_fill = list(Pursuant = 0),
values_fn = list(Pursuant = function(x) 1)
)
comp_wide <- Id_col |>
select(`Case Number`, `Process Length`, Latitude, Longitude) |>
distinct() |>
left_join(comp_disp, by = "Case Number") |>
left_join(comp_type, by = "Case Number") |>
left_join(comp_pursuant, by = "Case Number")
aggregated_latlong <- comp_wide %>%
group_by(Latitude, Longitude) %>%
summarise(across(where(is.numeric), sum, na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(where(is.numeric), sum, na.rm = TRUE)`.
## ℹ In group 1: `Latitude = 38.75397` and `Longitude = -76.98503`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
# aggregated_comp <- aggregated_comp |> select(!`Case Number`)
comp_zip_id <- comp_mt |> select(`Zip`,`Process Length`,`Case Number`)
zip_disp <- comp_prepivot |>
pivot_wider(
id_cols = `Case Number`,
names_from = `Case Disposition`,
values_from = `Case Disposition`,
values_fill = list(`Case Disposition` = 0),
values_fn = list(`Case Disposition` = function(x) 1)
)
zip_type <- comp_prepivot |>
pivot_wider(
id_cols = `Case Number`,
names_from = `Type of Complaint`,
values_from = `Type of Complaint`,
values_fill = list(`Type of Complaint` = 0),
values_fn = list(`Type of Complaint` = function(x) 1)
)
zip_pursuant <- comp_prepivot|>
pivot_wider(
id_cols = `Case Number`,
names_from = Pursuant,
values_from = Pursuant,
values_fill = list(Pursuant = 0),
values_fn = list(Pursuant = function(x) 1)
)
comp_zip_wide <- comp_zip_id |>
select(`Zip`, `Process Length`,`Case Number`) |>
distinct() |>
left_join(zip_disp, by = "Case Number") |>
left_join(zip_type, by = "Case Number") |>
left_join(zip_pursuant, by = "Case Number")
glimpse(comp_zip_wide)
## Rows: 10,789
## Columns: 41
## $ Zip <chr> "20877", "20871…
## $ `Process Length` <dbl> 2, 29, 40, 83, …
## $ `Case Number` <dbl> 48204, 48121, 4…
## $ `Status: Referred` <dbl> 1, 0, 0, 0, 0, …
## $ `Status: Favourable Settlement` <dbl> 0, 1, 0, 1, 1, …
## $ `Status: Withdrawn or incomplete` <dbl> 0, 0, 1, 0, 0, …
## $ `Status: Unfavourable Settlement` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: No jurisdiction` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Legal action` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: No Jurisdiction` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Complaint noted` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Refer to Code Enforcement` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Final walk through` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Lease provision honored` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Lease completion` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Case mistakenly opened` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Referred to a Federal agency` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Case Error` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Appealed to Circuit Court` <dbl> 0, 0, 0, 0, 0, …
## $ `Status: Citation Issued` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Quit and Vacate` <dbl> 1, 1, 1, 0, 0, …
## $ `Cause: Payment Dispute` <dbl> 0, 0, 0, 1, 0, …
## $ `Cause: Security Deposit` <dbl> 0, 0, 0, 0, 1, …
## $ `Cause: Lease Agreement` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Infestation` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Other` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Harassment` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Eviction` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: General Maintenance` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Other-Miscellaneous` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: No Jurisdiction` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Moisture damage` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Plumbing and Pipes` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Retaliatory Eviction` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Essential Appliances` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Management` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Homeless Prevention and Relocation Assistance` <dbl> 0, 0, 0, 0, 0, …
## $ `NA` <dbl> 0, 0, 0, 0, 0, …
## $ `Cause: Walk through Inspections` <dbl> 0, 0, 0, 0, 0, …
## $ Landlord <dbl> 1, 0, 0, 0, 0, …
## $ Tenant <dbl> 0, 1, 1, 1, 1, …
# Aggregating data by ZIP code
zip_summary <- comp_zip_wide |>
group_by(Zip) |>
summarize(
Total_Processes = n(),
Total_Process_Length = sum(`Process Length`, na.rm = TRUE),
Median_Process_Length = median(`Process Length`, na.rm = TRUE),
Total_Tenant_Cases = sum(Tenant, na.rm = TRUE),
Total_Landlord_Cases = sum(Landlord, na.rm = TRUE),
.groups = 'drop'
)
zip_summary <- zip_summary |>
mutate(
total_cases = Total_Tenant_Cases + Total_Landlord_Cases,
tenant_vs_landlord = (Total_Tenant_Cases / total_cases)
)
zip_comp_summary <- comp_mt %>%
group_by(Zip, `Type of Complaint`) %>%
summarize(
`Process Total` = sum(`Process Length`, na.rm = TRUE),
`Process Median` = median(`Process Length`, na.rm = TRUE),
Cases = n(),
`Tenant Cases` = sum(Pursuant == "Tenant", na.rm = TRUE),
`Landlord Cases` = sum(Pursuant == "Landlord", na.rm = TRUE)
) %>%
arrange(Zip, `Type of Complaint`)
## `summarise()` has grouped output by 'Zip'. You can override using the `.groups`
## argument.
# Assuming 'data' is your dataset
comp_zip_widest <- comp_zip_wide |> select(!c(`Process Length`, `Case Number`))
zip_totals <- comp_zip_widest %>%
group_by(Zip) |>
summarize(across(where(is.numeric), sum, na.rm = TRUE), .groups = "drop")
comp_zip_fin <- left_join(zip_totals, zip_summary, by = "Zip")
Create percentages:
head(comp_zip_fin)
## # A tibble: 6 × 46
## Zip `Status: Referred` Status: Favourable Settlemen…¹ Status: Withdrawn or…²
## <chr> <dbl> <dbl> <dbl>
## 1 20008 0 0 0
## 2 20017 0 0 0
## 3 20032 0 0 0
## 4 20314 0 0 0
## 5 20744 0 0 0
## 6 20745 0 0 0
## # ℹ abbreviated names: ¹`Status: Favourable Settlement`,
## # ²`Status: Withdrawn or incomplete`
## # ℹ 42 more variables: `Status: Unfavourable Settlement` <dbl>,
## # `Status: No jurisdiction` <dbl>, `Status: Legal action` <dbl>,
## # `Status: No Jurisdiction` <dbl>, `Status: Complaint noted` <dbl>,
## # `Status: Refer to Code Enforcement` <dbl>,
## # `Status: Final walk through` <dbl>, …
comp_zip_fin <- comp_zip_fin |>
mutate(across(
2:37,
~ . / sum(., na.rm = TRUE) * 100,
.names = "perc_{col}"
))
zip_codes <- st_read("/Users/gimle/Desktop/Data 110/Datasets 110/ZIPCODES_20240411")
## Reading layer `geo_export_36683266-22fc-450a-8b12-0aae59abf871' from data source `/Users/gimle/Desktop/Data 110/Datasets 110/ZIPCODES_20240411'
## using driver `ESRI Shapefile'
## Simple feature collection with 97 features and 4 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: -77.52768 ymin: 38.93424 xmax: -76.88764 ymax: 39.35426
## Geodetic CRS: WGS84(DD)
AGGREGATE DATA WITH ZIP & MAKE TROUBLED BY ZIP
zip_codes$mail_city <- str_to_title(tolower(zip_codes$mail_city))
zip_codes <- zip_codes |> rename(City = mail_city)
zip <- zip_codes |> rename(Zip = zipcode)
# Group by zip!
summary_troubled <- troubled_clean |>
group_by(Zip) |>
summarise(
Total_Units = sum(Unit.Count, na.rm = TRUE),
Units_Inspected = sum(Units.Inspected, na.rm = TRUE),
Average_Severity_Rate = weighted.mean(Severity.Index, Unit.Count, na.rm = TRUE)
)
# Calculate the number of units per Analysis Rating category
rating_summary <- troubled_clean |>
group_by(Zip, Analysis.Rating) |>
summarise(
Units = sum(Unit.Count, na.rm = TRUE),
.groups = 'drop'
) |>
pivot_wider(
names_from = Analysis.Rating,
values_from = Units,
values_fill = list(Units = 0)
) |>
rename_with(~ paste0(., "_Units"), everything())
rating_summary <- rating_summary |> rename(Zip = Zip_Units)
mean_value_l <- mean(unique_landlord_cases$`Process Length`, na.rm = TRUE) # na.rm removes NA values before calculation
median_value_l <- median(unique_landlord_cases$`Process Length`, na.rm = TRUE)
mean_value <- mean(unique_tenant_cases$`Process Length`, na.rm = TRUE) # na.rm removes NA values before calculation
median_value <- median(unique_tenant_cases$`Process Length`, na.rm = TRUE)
# Print the results
cat("Mean:", mean_value, "\n")
## Mean: 78.24682
cat("Median:", median_value, "\n")
## Median: 38
cat("Mean:", mean_value_l, "\n")
## Mean: 35.58834
cat("Median:", median_value_l, "\n")
## Median: 10
plot2 <- comp_mt |>
ggplot(aes(x=`Date Filed`, y= `Process Length`))+
geom_point(aes(color = `Home Type`))+
facet_wrap(~`Type of Complaint`)
plot2