library(tidyverse)
# Import data
DIR <- "C:/Users/sclee1/OneDrive/Documents/R/legalAnalytics/data/"
data <- read.csv(paste0(DIR,"stateCases.csv"), skip = 1) # 1st row is the description of variables
# A few cases have more than one case type. Manipulate it so that one row has only one case type.
# As a result, a case can show up more than once when it has more than once case type.
data <-
data %>%
# row number makes it easire to remove Cam's data
mutate(case_id = row_number()) %>%
separate(CaseType, c("s1","s2","s3")) %>%
gather(type,CaseType, s1:s3) %>%
filter(!is.na(CaseType)) %>%
select(-type)
# Import CaseType description and merge
CaseType_description <- read.csv(paste0(DIR,"CaseType_description.csv")) %>%
# delete explanations within ()
mutate(CaseType_des = str_remove_all(CaseType_des, regex("\\(.*")),
CaseType = as.factor(CaseType))
data_merged <-
data %>%
left_join(CaseType_description)
# Sturecture of data
str(data_merged)
## 'data.frame': 698 obs. of 41 variables:
## $ DoNo : Factor w/ 667 levels "211-2015-CV-00001",..: 605 62 357 122 295 356 120 27 28 41 ...
## $ FileDate : Factor w/ 344 levels "01/03/2017 •",..: 199 231 236 342 319 125 306 135 135 170 ...
## $ PNames : Factor w/ 556 levels "1466 Bodwell Road, LLC",..: 397 20 173 185 420 16 17 410 221 458 ...
## $ DNames : Factor w/ 655 levels "10 Commerce Park North Condominium Association; Albireo Properties, LLC; Andover - 10 Commerce Park North, LLC;"| __truncated__,..: 480 378 474 94 583 196 33 524 92 407 ...
## $ ProSe : Factor w/ 5 levels "Both","Defendant",..: 3 2 3 5 3 3 2 2 2 2 ...
## $ PLaw : Factor w/ 389 levels "9","A. Gerard O'Neil, Jr.",..: 88 120 378 16 172 227 102 25 312 3 ...
## $ PFirm : Factor w/ 262 levels "9","Abrahamsen Ratchford, P.C.",..: 211 262 149 5 171 36 262 67 192 221 ...
## $ EqReq : Factor w/ 5 levels "0","9","No","Unknown",..: 3 5 5 5 5 5 5 5 5 5 ...
## $ MonReq : Factor w/ 184 levels "$0 ","$0.00 ",..: 111 79 87 111 60 110 47 45 111 83 ...
## $ LoC : int 7 0 11 3 0 2 2 0 0 0 ...
## $ DLaw : Factor w/ 339 levels "0","9","Abbott, Jr., Winslow Kirk; Bello, James A",..: 93 2 28 314 26 240 2 2 2 2 ...
## $ DFirm : Factor w/ 189 levels "","0","9","Andrew Eills Law Offices, PLLC; Sheehan, Phinney, Bass + Green, P.A.",..: 46 3 128 130 31 6 3 3 3 3 ...
## $ X3d : Factor w/ 44 levels "","0","Alitz, Jeffrey L; Lasna, Meredith M.",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ X3dLaw : Factor w/ 17 levels "0","9","David W. Johnston",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ X3dFirm : Factor w/ 14 levels "0","9","Devine, Millimet & Branch, P.A.",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ CtrCl : int 1 0 1 0 0 0 0 0 0 0 ...
## $ CrsCl : int 0 0 0 2 0 0 0 0 0 0 ...
## $ Rec : Factor w/ 6 levels "Ignatius","MacLeod; Borenstein",..: NA NA NA NA NA NA NA NA NA NA ...
## $ Rem : Factor w/ 7 levels "HRC","Made the appeal and then withdrew, so NA (thought you'd like to know… the guy sued NH and won!)",..: NA NA NA NA NA NA NA NA NA NA ...
## $ Judge : Factor w/ 49 levels "","9","Abramson",..: 22 37 14 37 37 23 37 37 37 37 ...
## $ OthMot : Factor w/ 325 levels "","Assented Motion to Continue",..: 166 84 306 312 90 207 86 82 325 164 ...
## $ MTime : Factor w/ 6 levels "","0","1","1; 2",..: 6 2 6 2 2 2 2 2 6 2 ...
## $ MTA : Factor w/ 9 levels "","0","1","2",..: 2 2 4 3 2 2 2 2 2 2 ...
## $ MTD : Factor w/ 14 levels "0","1","1; 3",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ DSJ : Factor w/ 7 levels "0","1","10","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PSJ : Factor w/ 6 levels "0","1","1;3",..: 1 1 2 1 1 1 1 1 1 1 ...
## $ DMIL : Factor w/ 10 levels "0","1","1; 10",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PMIL : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Stp : int 0 0 0 1 1 0 0 0 0 0 ...
## $ Arb : Factor w/ 3 levels "N/A","No","Yes": 2 NA 2 2 NA 2 2 NA NA NA ...
## $ TrialType : Factor w/ 6 levels "0","1","2","8",..: 2 5 5 5 5 5 5 5 5 5 ...
## $ DISP : Factor w/ 16 levels "0","1","10","2",..: 9 15 4 7 6 6 11 6 6 15 ...
## $ JMent : Factor w/ 7 levels "1","1; NA","2",..: 1 1 1 2 NA NA 1 NA NA 1 ...
## $ NOJ : Factor w/ 7 levels "0","1","1; 0",..: 2 2 2 2 1 1 2 1 1 2 ...
## $ Award : Factor w/ 87 levels "$1,700,000 ",..: 5 47 54 50 24 70 16 71 71 51 ...
## $ Eq : Factor w/ 6 levels "$34,000 ","0",..: 2 6 2 2 6 2 2 6 6 6 ...
## $ TermDate : Factor w/ 448 levels "1/10/2017","1/10/2018",..: 302 89 59 49 271 252 210 444 253 164 ...
## $ Enforce : Factor w/ 10 levels "0","1","1; 2",..: 7 4 3 3 3 9 8 8 8 8 ...
## $ case_id : int 1 2 3 4 5 6 7 8 9 10 ...
## $ CaseType : chr "4" "2" "2" "2" ...
## $ CaseType_des: chr "Contract - Other" "Contract - Collection" "Contract - Collection" "Contract - Collection" ...
# Basic descriptive statistics of data
summary(data_merged)
## DoNo FileDate
## 212-2013-CV-00012: 3 2/24/2016 : 14
## 216-2011-CV-00105: 3 2/1/2016 : 12
## 217-2010-CV-05006: 3 10/31/2016: 9
## 217-2011-CV-00270: 3 5/2/2016 : 8
## 217-2011-CV-00730: 3 9/12/2016 : 8
## 217-2012-CV-00658: 3 1/25/2016 : 7
## (Other) :680 (Other) :640
## PNames
## Discover Bank : 20
## American Express Centurion Bank : 16
## Barclays Bank Delaware : 8
## Paugus Bay Plaza Condominium Association: 8
## Barcklay Bank Delaware : 7
## Concord Hospital : 6
## (Other) :633
## DNames
## Town of Gilford : 6
## AEO Associates, LLC; KMO Associates, LLC; KMO Associates, LP; Aaron Olson : 3
## Arthur Cleaves; Jacob Emerson; Tom Gerrish; Joseph Guerra; Mark Jadkowski; Majella Global Technologies Asia Pacific PTY LTD : 3
## Ballentine & Company, Inc.; Ballentine Partners, LLC; Roy Ballentine; Andrew McMorrow; Gregory Peterson; Kyle Schaffer; Claudia Shilo: 3
## BK Systems, Inc; Ernest Houle; Karlton Klardie : 3
## Bryan Gould; Thomas Janosz; N.H. Board of Registration of Funeral Directors and Embalmers; Susan Simonds : 3
## (Other) :677
## ProSe PLaw
## Both : 32 9 : 45
## Defendant:216 Robert L. O'Brien : 24
## Neither :386 O'Brien, Robert L : 19
## Plaintiff: 24 N/A : 12
## Some DF : 40 Arnold Rosenblatt : 10
## Michael J. Fontaine: 10
## (Other) :578
## PFirm EqReq
## 9 :164 0 : 2
## Schlee & Stillman, LLC : 24 9 : 14
## Zwicker & Associates, P.C. : 20 No : 94
## Welts, White, & Fontaine, P.C. : 16 Unknown: 7
## Cook, Little, Rosenblatt, & Manson, PLLC : 15 Yes :580
## McLane, Graf, Raulerson & Middleton, P.A.: 15 NA's : 1
## (Other) :444
## MonReq LoC DLaw
## 1 :383 Min. : 0.000 9 :188
## 0 :122 1st Qu.: 0.000 N/A : 83
## 9 : 11 Median : 0.000 Burt, Gary Michael: 5
## 25,000.00: 2 Mean : 3.378 Joseph D. Becher : 5
## $0 : 1 3rd Qu.: 4.000 Daniel J. Orroth : 4
## $0.00 : 1 Max. :56.000 Edward D. Philpot : 4
## (Other) :178 (Other) :409
## DFirm
## 9 :299
## N/A : 83
## Getman, Schulthess, Steere, & Poulin, P.A.: 11
## Civil Bureau : 8
## Mitchell Municipal Group, P.A. : 8
## Primmer, Piper, Eggleston, & Cramer, P.C. : 8
## (Other) :281
## X3d X3dLaw
## 0 :640 0 :667
## Andrew Howe; Martina Howe: 4 N/A : 8
## : 3 Ethan G. Wood : 4
## Cross Insurance Agency : 3 Donald A. Kennedy: 3
## Delaney O'Hara, Diana, : 3 Leigh S. Willey : 3
## Donald A. Kennedy : 3 Jamie N. Hage : 2
## (Other) : 42 (Other) : 11
## X3dFirm CtrCl CrsCl
## 0 :666 Min. :0.0000 Min. :0.00000
## N/A : 8 1st Qu.:0.0000 1st Qu.:0.00000
## 9 : 6 Median :0.0000 Median :0.00000
## Patrick Wood Law Office, PLLC : 4 Mean :0.1074 Mean :0.02436
## Devine, Millimet & Branch, P.A.: 3 3rd Qu.:0.0000 3rd Qu.:0.00000
## Hage Hodes, P.A. : 2 Max. :2.0000 Max. :2.00000
## (Other) : 9
## Rec Rem Judge
## Ignatius : 4 N/A :185 O'Neill :260
## MacLeod; Borenstein: 1 Removed : 22 Brown : 75
## N/a : 1 Transfer : 4 Abramson : 66
## N/A :192 Transferred: 2 McNamara : 58
## Na : 1 HRC : 1 Kissinger: 36
## O'Neill : 5 (Other) : 2 (Other) :151
## NA's :494 NA's :482 NA's : 52
## OthMot MTime MTA
## None :205 : 2 0 :615
## Motion for Entry of Final Judgment: 44 0 :486 1 : 66
## Motion for Voluntary Nonsuit : 16 1 : 30 2 : 10
## Assented to Motion to Continue : 14 1; 2: 5 3 : 2
## Motion to Strike : 12 1;2 : 6 : 1
## Motion to Amend Complaint : 9 2 :169 2;4 : 1
## (Other) :398 (Other): 3
## MTD DSJ PSJ DMIL PMIL
## 0 :601 0 :653 0 :667 0 :679 Min. : 0.000
## 1 : 32 1 : 9 1 : 17 10 : 5 1st Qu.: 0.000
## 3 : 20 10 : 21 1;3: 1 9 : 5 Median : 0.000
## 10 : 11 2 : 2 10 : 5 1 : 3 Mean : 0.149
## 2 : 9 2; 4: 1 3 : 7 1; 10 : 1 3rd Qu.: 0.000
## 6 : 9 3 : 11 6 : 1 1; 3 : 1 Max. :10.000
## (Other): 16 4 : 1 (Other): 4
## Stp Arb TrialType DISP JMent
## Min. :0.0000 N/A :193 0 : 17 3 :226 1 :142
## 1st Qu.:0.0000 No :110 1 : 89 9 :121 2 :102
## Median :0.0000 Yes : 58 2 : 54 8 : 83 x : 89
## Mean :0.5057 NA's:337 8 : 1 6 : 78 N/A : 23
## 3rd Qu.:1.0000 9 :535 1 : 73 9 : 21
## Max. :2.0000 N/A: 2 (Other):116 (Other): 3
## NA's : 1 NA's :318
## NOJ Award Eq TermDate
## 0 :362 0 :250 $34,000 : 1 Open : 19
## 9 :231 1 :216 0 :191 4/3/2017 : 6
## 1 : 89 9 :146 1 : 4 3/30/2015: 5
## 3 : 9 $143,552 : 3 2 : 2 4/13/2017: 5
## 2 : 3 $1,700,000 : 1 3 : 6 6/27/2016: 5
## (Other): 2 $1,700.74 : 1 9 :494 (Other) :657
## NA's : 2 (Other) : 81 NA's : 1
## Enforce case_id CaseType CaseType_des
## 0 :646 Min. : 1.0 Length:698 Length:698
## 1 : 19 1st Qu.:144.2 Class :character Class :character
## 3 : 16 Median :318.5 Mode :character Mode :character
## 1;3 : 4 Mean :321.9
## 2 : 4 3rd Qu.:492.8
## 1; 2 : 3 Max. :667.0
## (Other): 6
# Create a variable, def_cat (plt_cat), whether a business is involved in defendant (or plaintiff)
bus_symbols <- "\\sL\\.?L\\.?C\\.?|\\sP\\.?C\\.?|\\sP\\.?A\\.?\\s|\\sInc|\\sL\\.?P\\.?|\\sL\\.?L\\.?P\\.?|\\sP\\.?L\\.?P\\.?|\\sP\\.?L\\.?L\\.?P\\.?|\\sP\\.?L\\.?L\\.?C\\.?|\\sd\\.?b\\.?a\\.?|\\sd\\.?b\\.?a\\.?|\\sd/b/a|\\sp\\.?b\\.?c\\.?|\\sg\\.?p\\.?|\\scorp|\\slimited|\\sltd\\.?|\\sco(\\.|$)|\\scom(,\\s|\\s|$)|\\scomp\\s?|\\scompany\\s?|\\scompanies\\.?|\\spartnership|\\ssole\\sproprietorship|\\sassociation|bank"
gov_symbols <- "state|city|town"
data_processed <-
data_merged %>%
# separate docket number into county, year, type and case
separate(DoNo, c("county","year","type","caseN"), remove = FALSE) %>%
# create length of suit
mutate(FileDate = as.Date(FileDate, "%m/%d/%Y"),
TermDate = as.Date(TermDate, "%m/%d/%Y"),
lengthOfsuit = as.numeric(difftime(time1 = TermDate,
time2 = FileDate,
units = "days"))) %>%
# create def_cat, whether a business is involved in defendant
mutate(
def_cat = "Neither",
def_cat = case_when(
str_detect(DNames, regex(bus_symbols, ignore_case = TRUE)) ~ "Bus",
str_detect(DNames, regex(gov_symbols, ignore_case = TRUE)) ~ "Gov",
TRUE ~ def_cat),
plt_cat = "neither",
plt_cat = case_when(
str_detect(PNames, regex(bus_symbols, ignore_case = TRUE)) ~ "Bus",
str_detect(PNames, regex(gov_symbols, ignore_case = TRUE)) ~ "Gov",
TRUE ~ plt_cat),
business = "neither",
business = case_when(
def_cat == "Bus" & plt_cat == "Bus" ~ "both",
def_cat == "Bus" | plt_cat == "Bus" ~ "only_one",
TRUE ~ business),
government = "neither",
government = case_when(
def_cat == "Gov" & plt_cat == "Gov" ~ "both",
def_cat == "Gov" | plt_cat == "Gov" ~ "only_one",
TRUE ~ government)
) %>%
# label disposition
mutate(
DISP_des = case_when(
DISP == 1 ~ "dismissed",
DISP == 2 ~ "summary judgement",
DISP == 3 ~ "settled",
DISP == 4 ~ "jury verdict",
DISP == 5 ~ "bench trial",
DISP == 6 ~ "default judgment",
DISP == 7 ~ "voluntary nonsuit",
DISP == 8 ~ "other resolution",
DISP == 9 ~ "unknown resolution",
DISP == 10 ~ "pending"
)
) %>%
# label trial type
mutate(
TrialType_des = case_when(
TrialType == "1" ~ "jury trial",
TrialType == "2" ~ "bench trial",
TrialType == "9" ~ "no trial",
TrialType == "0" ~ "still open",
TRUE ~ as.character(TrialType)
)
) %>%
# label judgment
mutate(
JMent_des = case_when(
JMent == 1 ~ "Pltf",
JMent == 2 ~ "Def",
JMent == 3 ~ "Both parties",
JMent == 9 ~ "still open",
TRUE ~ "Settlement"
)
) %>%
# plt_winning will be a response var to predict the odds of the plaintiff winning
mutate(plt_winning = NA_character_,
plt_winning = case_when(
JMent_des == "Pltf" ~ "1",
JMent_des == "Def" ~ "0",
TRUE ~ plt_winning
)) %>%
# remove $ and , and white space from MonReq
mutate(MonReq = str_squish(MonReq),
MonReq = str_remove_all(MonReq, "\\$|\\,"),
MonReq = as.numeric(MonReq)) %>%
# remove $ and , and white space from Award
mutate(Award = str_squish(Award),
Award = str_remove_all(Award, "\\$|\\,"),
Award = as.numeric(Award)) %>%
# county (origin of case) is taken from docket id but Jurisdiction (where it's tried)
# is identified by Chantalle
mutate(
Jurisdiction = NA_character_,
Jurisdiction = case_when(
case_id <= 425 ~ "Belknap",
case_id >= 426 & case_id <= 618 ~ "Merrimack",
case_id >= 619 ~ "Hills North",
TRUE ~ Jurisdiction)
) %>%
# Chantalle request changes in case type after finding errors in students work
mutate(CaseType_des = case_when(
CaseType %in% c("1","2","4","24") ~ "Contract Claims",
CaseType %in% c("7","8","9","11","12","13","37","40") ~ "Negligence Claims other than Medical Malpractice",
CaseType %in% c("14","15","16","17") ~ "Real Property Cases",
TRUE ~ CaseType_des
)) %>%
mutate(CaseType_des = case_when(
CaseType_des == "Declaratory Judgement Unknown" ~ "Declaratory Judgment",
CaseType_des == "Negligence Claims other than Medical Malpractice" ~ "Negligence other than Medical Malpractice",
TRUE ~ CaseType_des
)) %>%
# simplify ProSe
mutate(
ProSe_rev = NA_character_,
ProSe_rev = case_when(
ProSe == "Defendant" | ProSe == "Plaintiff" ~ "only_one",
ProSe == "Both" ~ "both",
ProSe == "Neither" ~ "neither",
TRUE ~ ProSe_rev
))
glimpse(data_processed)
## Observations: 698
## Variables: 56
## $ DoNo <fct> 217-2011-CV-00264, 211-2015-CV-00092, 216-2012-C...
## $ county <chr> "217", "211", "216", "211", "211", "216", "211",...
## $ year <chr> "2011", "2015", "2012", "2015", "2016", "2012", ...
## $ type <chr> "CV", "CV", "CV", "CV", "CV", "CV", "CV", "CV", ...
## $ caseN <chr> "00264", "00092", "00521", "00203", "00224", "00...
## $ FileDate <date> 2011-04-27, 2015-05-06, 2012-06-13, 2015-09-08,...
## $ PNames <fct> "Olde Province Commons, LLC", "American Express ...
## $ DNames <fct> "Nitro Fireworks Co., Inc; Charles Shivery", "Ke...
## $ ProSe <fct> Neither, Defendant, Neither, Some DF, Neither, N...
## $ PLaw <fct> "Donald C. Crandlemire", "Frederick B. Seitz, IV...
## $ PFirm <fct> "Shaheen & Gordon, P.A.", "Zwicker & Associates,...
## $ EqReq <fct> No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes,...
## $ MonReq <dbl> 1.00, 4768.06, 485154.04, 1.00, 27112.01, 0.00, ...
## $ LoC <int> 7, 0, 11, 3, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ DLaw <fct> Edward D. Philpot, 9, Brian D. Duffy, Timothy K....
## $ DFirm <fct> "Edward D. Philpot, Jr., PLLC", "9", "Nixon Peab...
## $ X3d <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ X3dLaw <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ X3dFirm <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ CtrCl <int> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ CrsCl <int> 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Rec <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Rem <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Judge <fct> "McNamara", "O'Neill", "Garfunkel; McNamara", "O...
## $ OthMot <fct> Motion to Amend Brief Statement of Defenses; Mot...
## $ MTime <fct> 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, ...
## $ MTA <fct> 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...
## $ MTD <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ DSJ <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ PSJ <fct> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ DMIL <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ PMIL <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Stp <int> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Arb <fct> No, NA, No, No, NA, No, No, NA, NA, NA, NA, NA, ...
## $ TrialType <fct> 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, ...
## $ DISP <fct> 4, 8, 2, 3; 6, 3, 3, 6, 3, 3, 8, 6, 8, 6, 6, 6, ...
## $ JMent <fct> 1, 1, 1, 1; NA, NA, NA, 1, NA, NA, 1, 1, 1, 1, 1...
## $ NOJ <fct> 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, ...
## $ Award <dbl> 108628.00, 4069.06, 485154.04, 40000.00, 2200.00...
## $ Eq <fct> "0", "9", "0", "0", "9", "0", "0", "9", "9", "9"...
## $ TermDate <date> 2012-06-14, 2015-11-24, 2012-10-04, 2016-10-26,...
## $ Enforce <fct> 2; 3, 1; 3, 1; 2, 1; 2, 1; 2, 4, 3, 3, 3, 3, 3, ...
## $ case_id <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...
## $ CaseType <chr> "4", "2", "2", "2", "2", "19", "2", "4", "40", "...
## $ CaseType_des <chr> "Contract Claims", "Contract Claims", "Contract ...
## $ lengthOfsuit <dbl> 414, 202, 113, 414, 616, 466, 226, 196, 78, 310,...
## $ def_cat <chr> "Bus", "Neither", "Bus", "Bus", "Bus", "Neither"...
## $ plt_cat <chr> "Bus", "Bus", "Bus", "Bus", "Bus", "Bus", "neith...
## $ business <chr> "both", "only_one", "both", "both", "both", "onl...
## $ government <chr> "neither", "neither", "neither", "neither", "nei...
## $ DISP_des <chr> "jury verdict", "other resolution", "summary jud...
## $ TrialType_des <chr> "jury trial", "no trial", "no trial", "no trial"...
## $ JMent_des <chr> "Pltf", "Pltf", "Pltf", "Settlement", "Settlemen...
## $ plt_winning <chr> "1", "1", "1", NA, NA, NA, "1", NA, NA, "1", "1"...
## $ Jurisdiction <chr> "Belknap", "Belknap", "Belknap", "Belknap", "Bel...
## $ ProSe_rev <chr> "neither", "only_one", "neither", NA, "neither",...
#write.csv(data_processed, "C:/Users/sclee1/OneDrive/Documents/R/legalAnalytics/data/stateCases_spreadsheet.csv")
data_processed %>%
distinct(case_id, year) %>%
ggplot(aes(year)) +
geom_bar(position = "stack")
data_processed %>%
distinct(case_id, year, Jurisdiction) %>%
ggplot(aes(year, fill = Jurisdiction)) +
geom_bar(position = "stack") +
facet_wrap(~Jurisdiction, ncol = 1) +
coord_flip() +
labs(x = NULL,
y = "Number of cases")
data_processed %>%
filter(lengthOfsuit > 0) %>%
ggplot(aes(lengthOfsuit)) +
geom_histogram() +
scale_x_log10()
# without Restraining Order
data_processed %>%
count(CaseType_des, sort = TRUE) %>%
filter(CaseType_des != "Restraining Order") %>%
head(10) %>%
mutate(CaseType_des = str_sub(CaseType_des, 1, 30),
CaseType_des = fct_reorder(CaseType_des, n)) %>%
ggplot(aes(CaseType_des, n)) +
geom_col(fill = "cornflowerblue") +
coord_flip() +
labs(title = "Top 10 Most Common Case Types in New Hampshire",
x = NULL,
y = "Number of Cases")
data_processed %>%
count(CaseType_des, sort = TRUE) %>%
head(10) %>%
mutate(CaseType_des = str_sub(CaseType_des, 1, 30),
CaseType_des = fct_reorder(CaseType_des, n)) %>%
ggplot(aes(CaseType_des, n)) +
geom_col() +
coord_flip()
data_processed %>%
count(DISP_des, sort = TRUE)
## # A tibble: 11 x 2
## DISP_des n
## <chr> <int>
## 1 settled 226
## 2 unknown resolution 121
## 3 other resolution 83
## 4 default judgment 78
## 5 dismissed 73
## 6 voluntary nonsuit 63
## 7 pending 18
## 8 summary judgement 16
## 9 <NA> 11
## 10 bench trial 6
## 11 jury verdict 3
data_processed %>%
count(JMent_des, sort = TRUE)
## # A tibble: 5 x 2
## JMent_des n
## <chr> <int>
## 1 Settlement 432
## 2 Pltf 142
## 3 Def 102
## 4 still open 21
## 5 Both parties 1
data_processed %>%
count(DISP_des, JMent_des, sort = TRUE)
## # A tibble: 24 x 3
## DISP_des JMent_des n
## <chr> <chr> <int>
## 1 settled Settlement 226
## 2 unknown resolution Settlement 109
## 3 default judgment Pltf 69
## 4 dismissed Def 58
## 5 other resolution Pltf 48
## 6 voluntary nonsuit Settlement 38
## 7 other resolution Settlement 24
## 8 voluntary nonsuit Def 24
## 9 pending still open 18
## 10 dismissed Settlement 15
## # ... with 14 more rows
# the plaintiff sought an equitable remedy
data_processed %>%
count(EqReq, sort = TRUE)
## # A tibble: 6 x 2
## EqReq n
## <fct> <int>
## 1 Yes 580
## 2 No 94
## 3 9 14
## 4 Unknown 7
## 5 0 2
## 6 <NA> 1
# the plaintiff sought a monetary remedy
data_processed %>%
count(MonReq, sort = TRUE)
## # A tibble: 180 x 2
## MonReq n
## <dbl> <int>
## 1 1 383
## 2 0 124
## 3 9 11
## 4 35000 3
## 5 25000 2
## 6 1172. 1
## 7 1436. 1
## 8 1501. 1
## 9 1503. 1
## 10 1582. 1
## # ... with 170 more rows
# 1 and 0 are not dollar vlaues
data_processed %>%
filter(MonReq > 10) %>%
ggplot(aes(MonReq)) +
geom_histogram() +
scale_x_log10(label = scales::dollar)
data_processed %>%
filter(!Award %in% c("0","1","9")) %>%
ggplot(aes(Award)) +
geom_histogram(binwidth = 0.5) +
scale_x_log10(labels = scales::dollar)
data_processed %>%
filter(EqReq %in% c("Yes","No"),
MonReq > 10) %>%
count(EqReq, MonReq, sort = TRUE)
## # A tibble: 177 x 3
## EqReq MonReq n
## <fct> <dbl> <int>
## 1 Yes 35000 3
## 2 Yes 25000 2
## 3 Yes 1172. 1
## 4 Yes 1436. 1
## 5 Yes 1501. 1
## 6 Yes 1503. 1
## 7 Yes 1582. 1
## 8 Yes 1670. 1
## 9 Yes 1671. 1
## 10 Yes 1698. 1
## # ... with 167 more rows
data_processed %>%
count(county, sort = TRUE)
## # A tibble: 8 x 2
## county n
## <chr> <int>
## 1 211 342
## 2 216 252
## 3 217 66
## 4 218 14
## 5 212 9
## 6 226 8
## 7 215 4
## 8 219 3
data_processed %>%
count(Jurisdiction, sort = TRUE)
## # A tibble: 3 x 2
## Jurisdiction n
## <chr> <int>
## 1 Belknap 456
## 2 Merrimack 193
## 3 Hills North 49
data_processed %>%
count(def_cat, sort = TRUE)
## # A tibble: 3 x 2
## def_cat n
## <chr> <int>
## 1 Neither 387
## 2 Bus 286
## 3 Gov 25
data_processed %>%
count(plt_cat, sort = TRUE)
## # A tibble: 3 x 2
## plt_cat n
## <chr> <int>
## 1 neither 378
## 2 Bus 304
## 3 Gov 16
data_processed %>%
count(ProSe, sort = TRUE)
## # A tibble: 5 x 2
## ProSe n
## <fct> <int>
## 1 Neither 386
## 2 Defendant 216
## 3 Some DF 40
## 4 Both 32
## 5 Plaintiff 24
data_processed %>%
count(PLaw, sort = TRUE)
## # A tibble: 389 x 2
## PLaw n
## <fct> <int>
## 1 9 45
## 2 Robert L. O'Brien 24
## 3 O'Brien, Robert L 19
## 4 N/A 12
## 5 Arnold Rosenblatt 10
## 6 Michael J. Fontaine 10
## 7 Peter S. Cowan 9
## 8 Benjamin R. Roberge 8
## 9 Edward D. Philpot 8
## 10 Niederman, Jay M 8
## # ... with 379 more rows
# length of complaint pages
data_processed %>%
filter(LoC != 0) %>%
ggplot(aes(LoC)) +
geom_histogram(binwidth = 0.1) +
scale_x_log10()
# A majority had no trial; another large chunk left blank; 89 jury trials vs. 54 bench trials
data_processed %>%
count(TrialType_des, sort = TRUE)
## # A tibble: 6 x 2
## TrialType_des n
## <chr> <int>
## 1 no trial 535
## 2 jury trial 89
## 3 bench trial 54
## 4 still open 17
## 5 N/A 2
## 6 8 1
There is a strong positive one-to-one association between how much money requested and how much awarded. In addition, how much plaintiffs request doesn’t seem to reduce the likelihood of getting awarded.
# When $ was awarded
data_processed %>%
filter(MonReq > 10,
!Award %in% c("0","1","9")) %>%
ggplot(aes(Award, MonReq)) +
geom_point() +
scale_x_log10(labels = scales::dollar) +
scale_y_log10(labels = scales::dollar)
# When $ was awarded versus when not
library(ggbeeswarm)
data_processed %>%
mutate(Award_cat = if_else(Award == "0","No Monetary Award","Monetary Award")) %>%
filter(MonReq > 10,
# filter out undefined amount and I'm not sure what 9 means
!Award %in% c("1","9")) %>%
ggplot(aes(Award_cat, MonReq, color = Award_cat)) +
geom_boxplot(notch = TRUE) +
scale_y_log10(labels = scales::dollar) +
theme(legend.position = "none") +
labs(title = "Table 3: Effect of Monetary Request on Outcome and Award",
x = "Outcome",
y = "Plaintiff’s Monetary Request")
med_MonReqs <-
data_processed %>%
mutate(Award_cat = if_else(Award == "0","No Monetary Award","Monetary Award")) %>%
filter(MonReq > 10,
# filter out undefined amount and I'm not sure what 9 means
!Award %in% c("1","9")) %>%
group_by(Award_cat) %>%
summarise(med_MonReq = median(MonReq))
med_MonReqs
## # A tibble: 2 x 2
## Award_cat med_MonReq
## <chr> <dbl>
## 1 Monetary Award 7170.
## 2 No Monetary Award 14963
data_processed %>%
filter(MonReq > 10,
# filter out undefined amount and I'm not sure what 9 means
!Award %in% c("1","9")) %>%
mutate(Award_cat = if_else(Award == "0","No Monetary Award","Monetary Award")) %>%
ggplot(aes(x="", MonReq, color = Award_cat)) +
geom_quasirandom() +
scale_y_log10(labels = scales::dollar) +
theme(legend.position = "none") +
labs(title = "Table 3: Effect of Monetary Request on Outcome and Award",
x = "Outcome",
y = "Plaintiff’s Monetary Request") +
geom_hline(data = med_MonReqs, aes(yintercept = med_MonReq, color = Award_cat)) +
facet_wrap(~Award_cat)
Award mostly ranges from $1,000 and to $1 million, half of which fall below about $14,000. Interpret with caution as only 85 cases, of 667 cases, have an identified value of award.
# median award by case type
data_processed %>%
filter(!Award %in% c("0","1","9")) %>%
group_by(CaseType_des) %>%
summarise(median(Award, na.rm = TRUE))
## # A tibble: 11 x 2
## CaseType_des `median(Award, na.rm = TRU~
## <chr> <dbl>
## 1 "Complaint for Injunction/To Enjoin Foreclo~ 26481.
## 2 Consumer Protection Act 143552
## 3 Contract Claims 16115.
## 4 Declaratory Judgment 132700.
## 5 "Forfeiture " 2532.
## 6 Interpleder 9525.
## 7 Negligence other than Medical Malpractice 138009.
## 8 Plea of Indemnity 81649.
## 9 "Tort - Intentional " 143552
## 10 "Tort - Medical Malpractice " 125000
## 11 Wage Claim 5500
library(ggbeeswarm)
# Remove Cam's data using row numbers I created early on
data_processed_HilCounty <-
data_processed %>%
filter(case_id <= 425 | case_id >= 619)
data_processed %>%
filter(Award > 10) %>%
summarise(caseN_award = n())
## caseN_award
## 1 85
median_Award <-
data_processed %>%
filter(!Award %in% c("0","1","9")) %>%
summarise(award_median = median(Award, na.rm = TRUE)) %>%
pull()
data_processed %>%
filter(!Award %in% c("0","1","9")) %>%
ggplot(aes(Award)) +
geom_histogram(binwidth = 0.5, fill = "cornflowerblue") +
geom_vline(linetype = "dashed", xintercept = median_Award) +
scale_x_log10(labels = scales::dollar)
data_processed %>%
filter(!Award %in% c("0","1","9"),
county %in% c("211","216","217")) %>%
ggplot(aes(county, Award, color = county)) +
geom_quasirandom(alpha = 0.7,
size = 1.5) +
scale_y_log10(labels = scales::dollar) +
theme(legend.position = "none")
data_processed %>%
mutate(CaseType_des = fct_lump(CaseType_des, 3)) %>%
filter(!Award %in% c("0","1","9"),
CaseType_des != "Other") %>%
ggplot(aes(CaseType_des, Award, color = CaseType_des)) +
geom_quasirandom(alpha = 0.7,
size = 1.5) +
scale_y_log10(labels = scales::dollar) +
theme(legend.position = "none")
data_processed %>%
filter(!Award %in% c("0","1","9"),
TrialType_des %in% c("jury trial","bench trial","no trial")) %>%
ggplot(aes(TrialType_des, Award, color = TrialType)) +
geom_quasirandom(alpha = 0.7,
size = 1.5) +
scale_y_log10(labels = scales::dollar) +
theme(legend.position = "none")
data_processed %>%
filter(!Award %in% c("0","1","9")) %>%
ggplot(aes(def_cat, Award, color = def_cat)) +
geom_quasirandom(alpha = 0.7,
size = 1.5) +
scale_y_log10(labels = scales::dollar) +
theme(legend.position = "none")
data_processed %>%
filter(!Award %in% c("0","1","9")) %>%
ggplot(aes(plt_cat, Award, color = plt_cat)) +
geom_quasirandom(alpha = 0.7,
size = 1.5) +
scale_y_log10(labels = scales::dollar) +
theme(legend.position = "none")
data_processed %>%
filter(!Award %in% c("0","1","9"),
TrialType_des %in% c("jury trial","bench trial","no trial"),
county %in% c("211","216","217")) %>%
ggplot(aes(TrialType_des, Award, color = TrialType)) +
geom_quasirandom(alpha = 0.7,
size = 1.5) +
theme(legend.position = "none") +
scale_y_log10(labels = scales::dollar) +
facet_grid(county~def_cat) +
labs(subtitle = "def_cat on the horizontal axis and county on vertical axis")
The analysis includes the most common case types: 1) Contract Claims, 2) Tort - Automobile, and 3) Negligence Claims other than Medical Malpractice. Major findings are:
Case types are abbreviated in the plots.
# reduce # of levels Jment and CaseType_des
data_processed_simpleJment_simple_CaseType <-
data_processed %>%
mutate(CaseType_des = fct_lump(CaseType_des, 2)) %>%
filter(JMent_des %in% c("Settlement","Pltf","Def"),
CaseType_des != "Other") %>%
mutate(JMent_des = as.factor(JMent_des),
def_cat = as.factor(def_cat)) %>%
droplevels()
# Jurisdiction as a predictor
data_processed_simpleJment_simple_CaseType %>%
ggplot(aes(Jurisdiction, fill = JMent_des)) +
geom_bar(position = "fill") +
labs(y = "Proportion") +
coord_flip()
# Jurisdiction & CaseType_des as a predictor
data_processed_simpleJment %>%
mutate(JMent_des = as.character(JMent_des),
JMent_des = case_when(
JMent_des == "Def" ~ "Defendant",
JMent_des == "Pltf" ~ "Plaintiff",
TRUE ~ JMent_des
)) %>%
filter(CaseType_des %in% c("Contract Claims",
"Negligence other than Medical Malpractice",
"Declaratory Judgment",
"Real Property Cases")) %>%
ggplot(aes(Jurisdiction, fill = JMent_des)) +
geom_bar(position = "fill") +
labs(y = "Proportion") +
coord_flip() +
facet_wrap(~CaseType_des) +
labs(title = "Table 1: Judgment by Case Type and Jurisdiction",
fill = "Judgment")
# What's wrong with the bar in Hillsborough North & Negligence? It's all settlement!
# Understand Hillsborough N
data_processed_simpleJment %>%
filter(Jurisdiction == "Hills North") %>%
count(CaseType_des, sort = TRUE)
## # A tibble: 10 x 2
## CaseType_des n
## <chr> <int>
## 1 Contract Claims 22
## 2 Declaratory Judgment 8
## 3 Negligence other than Medical Malpractice 8
## 4 Real Property Cases 3
## 5 Replevin 2
## 6 "Complaint for Injunction/To Enjoin Foreclosure " 1
## 7 Foreclosure - Complaint to Enjoin Foreclosure 1
## 8 Plea of Indemnity 1
## 9 "Tort - Medical Malpractice " 1
## 10 Wage Claim 1
data_processed %>%
filter(Jurisdiction == "Hills North") %>%
count(year, sort = TRUE)
## # A tibble: 1 x 2
## year n
## <chr> <int>
## 1 2016 49
data_processed_simpleJment %>%
mutate(JMent_des = as.character(JMent_des),
JMent_des = case_when(
JMent_des == "Def" ~ "Defendant",
JMent_des == "Pltf" ~ "Plaintiff",
TRUE ~ JMent_des
)) %>%
filter(CaseType_des %in% c("Contract Claims",
"Negligence other than Medical Malpractice",
"Declaratory Judgment",
"Real Property Cases")) %>%
ggplot(aes(Jurisdiction, fill = JMent_des)) +
geom_bar(position = "stack") +
labs(y = "Proportion") +
coord_flip() +
facet_wrap(~CaseType_des) +
labs(title = "Table 1: Judgment by Case Type and Jurisdiction",
fill = "Judgment")
# mosaic: judgment vs. jurisdiction
# create a table
tbl <- xtabs(~JMent_des + Jurisdiction,
data_processed_simpleJment_simple_CaseType)
ftable(tbl)
## Jurisdiction Belknap Hills North Merrimack
## JMent_des
## Def 31 1 24
## Pltf 59 2 28
## Settlement 146 27 79
library(vcd)
mosaic(tbl,
shade = TRUE,
legend = TRUE,
labeling_args = list(set_varnames = c(JMent_des = "Judgment")),
main = "Court data")
# mosaic: judgment vs. case type
# create a table
tbl <- xtabs(~JMent_des + CaseType_des,
data_processed_simpleJment_simple_CaseType)
ftable(tbl)
## CaseType_des Contract Claims Negligence other than Medical Malpractice
## JMent_des
## Def 42 14
## Pltf 82 7
## Settlement 131 121
library(vcd)
mosaic(tbl,
shade = TRUE,
legend = TRUE,
labeling_args = list(set_varnames = c(JMent_des = "Judgment",
CaseType_des = "Case Type")),
set_labels = list(CaseType_des = c("Contract",
"Negligence",
"Auto")),
main = "Court data")