Import data

library(tidyverse)

# Import data
DIR <- "C:/Users/sclee1/OneDrive/Documents/R/legalAnalytics/data/"
data <- read.csv(paste0(DIR,"stateCases.csv"), skip = 1) # 1st row is the description of variables

# A few cases have more than one case type. Manipulate it so that one row has only one case type. 
# As a result, a case can show up more than once when it has more than once case type.
data <-
  data %>% 
  # row number makes it easire to remove Cam's data
  mutate(case_id = row_number()) %>% 
  separate(CaseType, c("s1","s2","s3")) %>% 
  gather(type,CaseType, s1:s3) %>% 
  filter(!is.na(CaseType)) %>%
  select(-type) 

# Import CaseType description and merge
CaseType_description <- read.csv(paste0(DIR,"CaseType_description.csv")) %>%
  # delete explanations within ()
  mutate(CaseType_des = str_remove_all(CaseType_des, regex("\\(.*")),
         CaseType = as.factor(CaseType)) 

data_merged <-
  data %>%
  left_join(CaseType_description) 

# Sturecture of data
str(data_merged)
## 'data.frame':    698 obs. of  41 variables:
##  $ DoNo        : Factor w/ 667 levels "211-2015-CV-00001",..: 605 62 357 122 295 356 120 27 28 41 ...
##  $ FileDate    : Factor w/ 344 levels "01/03/2017 •",..: 199 231 236 342 319 125 306 135 135 170 ...
##  $ PNames      : Factor w/ 556 levels "1466 Bodwell Road, LLC",..: 397 20 173 185 420 16 17 410 221 458 ...
##  $ DNames      : Factor w/ 655 levels "10 Commerce Park North Condominium Association; Albireo Properties, LLC; Andover - 10 Commerce Park North, LLC;"| __truncated__,..: 480 378 474 94 583 196 33 524 92 407 ...
##  $ ProSe       : Factor w/ 5 levels "Both","Defendant",..: 3 2 3 5 3 3 2 2 2 2 ...
##  $ PLaw        : Factor w/ 389 levels "9","A. Gerard O'Neil, Jr.",..: 88 120 378 16 172 227 102 25 312 3 ...
##  $ PFirm       : Factor w/ 262 levels "9","Abrahamsen Ratchford, P.C.",..: 211 262 149 5 171 36 262 67 192 221 ...
##  $ EqReq       : Factor w/ 5 levels "0","9","No","Unknown",..: 3 5 5 5 5 5 5 5 5 5 ...
##  $ MonReq      : Factor w/ 184 levels "$0 ","$0.00 ",..: 111 79 87 111 60 110 47 45 111 83 ...
##  $ LoC         : int  7 0 11 3 0 2 2 0 0 0 ...
##  $ DLaw        : Factor w/ 339 levels "0","9","Abbott, Jr., Winslow Kirk; Bello, James A",..: 93 2 28 314 26 240 2 2 2 2 ...
##  $ DFirm       : Factor w/ 189 levels "","0","9","Andrew Eills Law Offices, PLLC; Sheehan, Phinney, Bass + Green, P.A.",..: 46 3 128 130 31 6 3 3 3 3 ...
##  $ X3d         : Factor w/ 44 levels "","0","Alitz, Jeffrey L; Lasna, Meredith M.",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ X3dLaw      : Factor w/ 17 levels "0","9","David W. Johnston",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ X3dFirm     : Factor w/ 14 levels "0","9","Devine, Millimet & Branch, P.A.",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ CtrCl       : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ CrsCl       : int  0 0 0 2 0 0 0 0 0 0 ...
##  $ Rec         : Factor w/ 6 levels "Ignatius","MacLeod; Borenstein",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ Rem         : Factor w/ 7 levels "HRC","Made the appeal and then withdrew, so NA (thought you'd like to know… the guy sued NH and won!)",..: NA NA NA NA NA NA NA NA NA NA ...
##  $ Judge       : Factor w/ 49 levels "","9","Abramson",..: 22 37 14 37 37 23 37 37 37 37 ...
##  $ OthMot      : Factor w/ 325 levels "","Assented Motion to Continue",..: 166 84 306 312 90 207 86 82 325 164 ...
##  $ MTime       : Factor w/ 6 levels "","0","1","1; 2",..: 6 2 6 2 2 2 2 2 6 2 ...
##  $ MTA         : Factor w/ 9 levels "","0","1","2",..: 2 2 4 3 2 2 2 2 2 2 ...
##  $ MTD         : Factor w/ 14 levels "0","1","1; 3",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ DSJ         : Factor w/ 7 levels "0","1","10","2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ PSJ         : Factor w/ 6 levels "0","1","1;3",..: 1 1 2 1 1 1 1 1 1 1 ...
##  $ DMIL        : Factor w/ 10 levels "0","1","1; 10",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ PMIL        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ Stp         : int  0 0 0 1 1 0 0 0 0 0 ...
##  $ Arb         : Factor w/ 3 levels "N/A","No","Yes": 2 NA 2 2 NA 2 2 NA NA NA ...
##  $ TrialType   : Factor w/ 6 levels "0","1","2","8",..: 2 5 5 5 5 5 5 5 5 5 ...
##  $ DISP        : Factor w/ 16 levels "0","1","10","2",..: 9 15 4 7 6 6 11 6 6 15 ...
##  $ JMent       : Factor w/ 7 levels "1","1; NA","2",..: 1 1 1 2 NA NA 1 NA NA 1 ...
##  $ NOJ         : Factor w/ 7 levels "0","1","1; 0",..: 2 2 2 2 1 1 2 1 1 2 ...
##  $ Award       : Factor w/ 87 levels "$1,700,000 ",..: 5 47 54 50 24 70 16 71 71 51 ...
##  $ Eq          : Factor w/ 6 levels "$34,000 ","0",..: 2 6 2 2 6 2 2 6 6 6 ...
##  $ TermDate    : Factor w/ 448 levels "1/10/2017","1/10/2018",..: 302 89 59 49 271 252 210 444 253 164 ...
##  $ Enforce     : Factor w/ 10 levels "0","1","1; 2",..: 7 4 3 3 3 9 8 8 8 8 ...
##  $ case_id     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ CaseType    : chr  "4" "2" "2" "2" ...
##  $ CaseType_des: chr  "Contract - Other" "Contract - Collection" "Contract - Collection" "Contract - Collection" ...

# Basic descriptive statistics of data
summary(data_merged)
##                 DoNo           FileDate  
##  212-2013-CV-00012:  3   2/24/2016 : 14  
##  216-2011-CV-00105:  3   2/1/2016  : 12  
##  217-2010-CV-05006:  3   10/31/2016:  9  
##  217-2011-CV-00270:  3   5/2/2016  :  8  
##  217-2011-CV-00730:  3   9/12/2016 :  8  
##  217-2012-CV-00658:  3   1/25/2016 :  7  
##  (Other)          :680   (Other)   :640  
##                                       PNames   
##  Discover Bank                           : 20  
##  American Express Centurion Bank         : 16  
##  Barclays Bank Delaware                  :  8  
##  Paugus Bay Plaza Condominium Association:  8  
##  Barcklay Bank Delaware                  :  7  
##  Concord Hospital                        :  6  
##  (Other)                                 :633  
##                                                                                                                                    DNames   
##  Town of Gilford                                                                                                                      :  6  
##  AEO Associates, LLC; KMO Associates, LLC; KMO Associates, LP; Aaron Olson                                                            :  3  
##  Arthur Cleaves; Jacob Emerson; Tom Gerrish; Joseph Guerra; Mark Jadkowski; Majella Global Technologies Asia Pacific PTY LTD          :  3  
##  Ballentine & Company, Inc.; Ballentine Partners, LLC; Roy Ballentine; Andrew McMorrow; Gregory Peterson; Kyle Schaffer; Claudia Shilo:  3  
##  BK Systems, Inc; Ernest Houle; Karlton Klardie                                                                                       :  3  
##  Bryan Gould; Thomas Janosz; N.H. Board of Registration of Funeral Directors and Embalmers; Susan Simonds                             :  3  
##  (Other)                                                                                                                              :677  
##        ProSe                      PLaw    
##  Both     : 32   9                  : 45  
##  Defendant:216   Robert L. O'Brien  : 24  
##  Neither  :386   O'Brien, Robert L  : 19  
##  Plaintiff: 24   N/A                : 12  
##  Some DF  : 40   Arnold Rosenblatt  : 10  
##                  Michael J. Fontaine: 10  
##                  (Other)            :578  
##                                        PFirm         EqReq    
##  9                                        :164   0      :  2  
##  Schlee & Stillman, LLC                   : 24   9      : 14  
##  Zwicker & Associates, P.C.               : 20   No     : 94  
##  Welts, White, & Fontaine, P.C.           : 16   Unknown:  7  
##  Cook, Little, Rosenblatt, & Manson, PLLC : 15   Yes    :580  
##  McLane, Graf, Raulerson & Middleton, P.A.: 15   NA's   :  1  
##  (Other)                                  :444                
##        MonReq         LoC                         DLaw    
##  1        :383   Min.   : 0.000   9                 :188  
##  0        :122   1st Qu.: 0.000   N/A               : 83  
##  9        : 11   Median : 0.000   Burt, Gary Michael:  5  
##  25,000.00:  2   Mean   : 3.378   Joseph D. Becher  :  5  
##  $0       :  1   3rd Qu.: 4.000   Daniel J. Orroth  :  4  
##  $0.00    :  1   Max.   :56.000   Edward D. Philpot :  4  
##  (Other)  :178                    (Other)           :409  
##                                         DFirm    
##  9                                         :299  
##  N/A                                       : 83  
##  Getman, Schulthess, Steere, & Poulin, P.A.: 11  
##  Civil Bureau                              :  8  
##  Mitchell Municipal Group, P.A.            :  8  
##  Primmer, Piper, Eggleston, & Cramer, P.C. :  8  
##  (Other)                                   :281  
##                         X3d                    X3dLaw   
##  0                        :640   0                :667  
##  Andrew Howe; Martina Howe:  4   N/A              :  8  
##                           :  3   Ethan G. Wood    :  4  
##  Cross Insurance Agency   :  3   Donald A. Kennedy:  3  
##  Delaney O'Hara, Diana,   :  3   Leigh S. Willey  :  3  
##  Donald A. Kennedy        :  3   Jamie N. Hage    :  2  
##  (Other)                  : 42   (Other)          : 11  
##                             X3dFirm        CtrCl            CrsCl        
##  0                              :666   Min.   :0.0000   Min.   :0.00000  
##  N/A                            :  8   1st Qu.:0.0000   1st Qu.:0.00000  
##  9                              :  6   Median :0.0000   Median :0.00000  
##  Patrick Wood Law Office, PLLC  :  4   Mean   :0.1074   Mean   :0.02436  
##  Devine, Millimet & Branch, P.A.:  3   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Hage Hodes, P.A.               :  2   Max.   :2.0000   Max.   :2.00000  
##  (Other)                        :  9                                     
##                   Rec               Rem            Judge    
##  Ignatius           :  4   N/A        :185   O'Neill  :260  
##  MacLeod; Borenstein:  1   Removed    : 22   Brown    : 75  
##  N/a                :  1   Transfer   :  4   Abramson : 66  
##  N/A                :192   Transferred:  2   McNamara : 58  
##  Na                 :  1   HRC        :  1   Kissinger: 36  
##  O'Neill            :  5   (Other)    :  2   (Other)  :151  
##  NA's               :494   NA's       :482   NA's     : 52  
##                                 OthMot     MTime          MTA     
##  None                              :205       :  2   0      :615  
##  Motion for Entry of Final Judgment: 44   0   :486   1      : 66  
##  Motion for Voluntary Nonsuit      : 16   1   : 30   2      : 10  
##  Assented to Motion to Continue    : 14   1; 2:  5   3      :  2  
##  Motion to Strike                  : 12   1;2 :  6          :  1  
##  Motion to Amend Complaint         :  9   2   :169   2;4    :  1  
##  (Other)                           :398              (Other):  3  
##       MTD        DSJ       PSJ           DMIL          PMIL       
##  0      :601   0   :653   0  :667   0      :679   Min.   : 0.000  
##  1      : 32   1   :  9   1  : 17   10     :  5   1st Qu.: 0.000  
##  3      : 20   10  : 21   1;3:  1   9      :  5   Median : 0.000  
##  10     : 11   2   :  2   10 :  5   1      :  3   Mean   : 0.149  
##  2      :  9   2; 4:  1   3  :  7   1; 10  :  1   3rd Qu.: 0.000  
##  6      :  9   3   : 11   6  :  1   1; 3   :  1   Max.   :10.000  
##  (Other): 16   4   :  1             (Other):  4                   
##       Stp           Arb      TrialType      DISP         JMent    
##  Min.   :0.0000   N/A :193   0  : 17   3      :226   1      :142  
##  1st Qu.:0.0000   No  :110   1  : 89   9      :121   2      :102  
##  Median :0.0000   Yes : 58   2  : 54   8      : 83   x      : 89  
##  Mean   :0.5057   NA's:337   8  :  1   6      : 78   N/A    : 23  
##  3rd Qu.:1.0000              9  :535   1      : 73   9      : 21  
##  Max.   :2.0000              N/A:  2   (Other):116   (Other):  3  
##                                        NA's   :  1   NA's   :318  
##       NOJ              Award            Eq           TermDate  
##  0      :362   0          :250   $34,000 :  1   Open     : 19  
##  9      :231   1          :216   0       :191   4/3/2017 :  6  
##  1      : 89   9          :146   1       :  4   3/30/2015:  5  
##  3      :  9   $143,552   :  3   2       :  2   4/13/2017:  5  
##  2      :  3   $1,700,000 :  1   3       :  6   6/27/2016:  5  
##  (Other):  2   $1,700.74  :  1   9       :494   (Other)  :657  
##  NA's   :  2   (Other)    : 81                  NA's     :  1  
##     Enforce       case_id        CaseType         CaseType_des      
##  0      :646   Min.   :  1.0   Length:698         Length:698        
##  1      : 19   1st Qu.:144.2   Class :character   Class :character  
##  3      : 16   Median :318.5   Mode  :character   Mode  :character  
##  1;3    :  4   Mean   :321.9                                        
##  2      :  4   3rd Qu.:492.8                                        
##  1; 2   :  3   Max.   :667.0                                        
##  (Other):  6

Clean

# Create a variable, def_cat (plt_cat), whether a business is involved in defendant (or plaintiff)
bus_symbols <- "\\sL\\.?L\\.?C\\.?|\\sP\\.?C\\.?|\\sP\\.?A\\.?\\s|\\sInc|\\sL\\.?P\\.?|\\sL\\.?L\\.?P\\.?|\\sP\\.?L\\.?P\\.?|\\sP\\.?L\\.?L\\.?P\\.?|\\sP\\.?L\\.?L\\.?C\\.?|\\sd\\.?b\\.?a\\.?|\\sd\\.?b\\.?a\\.?|\\sd/b/a|\\sp\\.?b\\.?c\\.?|\\sg\\.?p\\.?|\\scorp|\\slimited|\\sltd\\.?|\\sco(\\.|$)|\\scom(,\\s|\\s|$)|\\scomp\\s?|\\scompany\\s?|\\scompanies\\.?|\\spartnership|\\ssole\\sproprietorship|\\sassociation|bank"
gov_symbols <- "state|city|town"

data_processed <-
  data_merged %>%
  # separate docket number into county, year, type and case
  separate(DoNo, c("county","year","type","caseN"), remove = FALSE) %>%
  # create length of suit
  mutate(FileDate = as.Date(FileDate, "%m/%d/%Y"),
         TermDate = as.Date(TermDate, "%m/%d/%Y"),
         lengthOfsuit = as.numeric(difftime(time1 = TermDate,
                                            time2 = FileDate,
                                            units = "days"))) %>%
  # create def_cat, whether a business is involved in defendant 
  mutate(
    def_cat = "Neither",
    def_cat = case_when(
      str_detect(DNames, regex(bus_symbols, ignore_case = TRUE)) ~ "Bus",
      str_detect(DNames, regex(gov_symbols, ignore_case = TRUE)) ~ "Gov",
      TRUE ~ def_cat),
    plt_cat = "neither",
    plt_cat = case_when(
      str_detect(PNames, regex(bus_symbols, ignore_case = TRUE)) ~ "Bus",
      str_detect(PNames, regex(gov_symbols, ignore_case = TRUE)) ~ "Gov",
      TRUE ~ plt_cat),
    business = "neither",
    business = case_when(
      def_cat == "Bus" & plt_cat == "Bus" ~ "both",
      def_cat == "Bus" | plt_cat == "Bus" ~ "only_one",
      TRUE ~ business),
    government = "neither",
    government = case_when(
      def_cat == "Gov" & plt_cat == "Gov" ~ "both",
      def_cat == "Gov" | plt_cat == "Gov" ~ "only_one",
      TRUE ~ government)
  ) %>%
  # label disposition
  mutate(
    DISP_des = case_when(
      DISP == 1 ~ "dismissed",
      DISP == 2 ~ "summary judgement",
      DISP == 3 ~ "settled",
      DISP == 4 ~ "jury verdict",
      DISP == 5 ~ "bench trial",
      DISP == 6 ~ "default judgment",
      DISP == 7 ~ "voluntary nonsuit",
      DISP == 8 ~ "other resolution",
      DISP == 9 ~ "unknown resolution",
      DISP == 10 ~ "pending"
      )
    ) %>%
  # label trial type
  mutate(
    TrialType_des = case_when(
      TrialType == "1" ~ "jury trial",
      TrialType == "2" ~ "bench trial",
      TrialType == "9" ~ "no trial",
      TrialType == "0" ~ "still open",
      TRUE ~ as.character(TrialType)
    )
  ) %>%
  # label judgment
  mutate(
    JMent_des = case_when(
      JMent == 1 ~ "Pltf",
      JMent == 2 ~ "Def",
      JMent == 3 ~ "Both parties",
      JMent == 9 ~ "still open",
      TRUE ~ "Settlement"
    )
  )  %>%
  # plt_winning will be a response var to predict the odds of the plaintiff winning
  mutate(plt_winning = NA_character_,
         plt_winning = case_when(
           JMent_des == "Pltf" ~ "1",
           JMent_des == "Def" ~ "0",
           TRUE ~ plt_winning
         )) %>% 
  # remove $ and , and white space from MonReq 
  mutate(MonReq = str_squish(MonReq),
         MonReq = str_remove_all(MonReq, "\\$|\\,"),
         MonReq = as.numeric(MonReq)) %>% 
  # remove $ and , and white space from Award 
  mutate(Award = str_squish(Award),
         Award = str_remove_all(Award, "\\$|\\,"),
         Award = as.numeric(Award)) %>%
  # county (origin of case) is taken from docket id but Jurisdiction (where it's tried) 
  # is identified by Chantalle
  mutate(
    Jurisdiction = NA_character_,
    Jurisdiction = case_when(
      case_id <= 425 ~ "Belknap",
      case_id >= 426 & case_id <= 618 ~ "Merrimack",
      case_id >= 619 ~ "Hills North",
      TRUE ~ Jurisdiction)
    )  %>%
  # Chantalle request changes in case type after finding errors in students work
  mutate(CaseType_des = case_when(
    CaseType %in% c("1","2","4","24") ~ "Contract Claims",
    CaseType %in% c("7","8","9","11","12","13","37","40") ~ "Negligence Claims other than Medical Malpractice",
    CaseType %in% c("14","15","16","17") ~ "Real Property Cases",
    TRUE ~ CaseType_des
  )) %>%
  mutate(CaseType_des = case_when(
    CaseType_des == "Declaratory Judgement Unknown" ~ "Declaratory Judgment",
    CaseType_des == "Negligence Claims other than Medical Malpractice" ~ "Negligence other than Medical Malpractice",
    TRUE ~ CaseType_des
  )) %>%
  # simplify ProSe
  mutate(
    ProSe_rev = NA_character_,
    ProSe_rev = case_when(
    ProSe == "Defendant" | ProSe == "Plaintiff" ~ "only_one",
    ProSe == "Both" ~ "both",
    ProSe == "Neither" ~ "neither",
    TRUE ~ ProSe_rev
  ))

glimpse(data_processed)
## Observations: 698
## Variables: 56
## $ DoNo          <fct> 217-2011-CV-00264, 211-2015-CV-00092, 216-2012-C...
## $ county        <chr> "217", "211", "216", "211", "211", "216", "211",...
## $ year          <chr> "2011", "2015", "2012", "2015", "2016", "2012", ...
## $ type          <chr> "CV", "CV", "CV", "CV", "CV", "CV", "CV", "CV", ...
## $ caseN         <chr> "00264", "00092", "00521", "00203", "00224", "00...
## $ FileDate      <date> 2011-04-27, 2015-05-06, 2012-06-13, 2015-09-08,...
## $ PNames        <fct> "Olde Province Commons, LLC", "American Express ...
## $ DNames        <fct> "Nitro Fireworks Co., Inc; Charles Shivery", "Ke...
## $ ProSe         <fct> Neither, Defendant, Neither, Some DF, Neither, N...
## $ PLaw          <fct> "Donald C. Crandlemire", "Frederick B. Seitz, IV...
## $ PFirm         <fct> "Shaheen & Gordon, P.A.", "Zwicker & Associates,...
## $ EqReq         <fct> No, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes, Yes,...
## $ MonReq        <dbl> 1.00, 4768.06, 485154.04, 1.00, 27112.01, 0.00, ...
## $ LoC           <int> 7, 0, 11, 3, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
## $ DLaw          <fct> Edward D. Philpot, 9, Brian D. Duffy, Timothy K....
## $ DFirm         <fct> "Edward D. Philpot, Jr., PLLC", "9", "Nixon Peab...
## $ X3d           <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ X3dLaw        <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ X3dFirm       <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ CtrCl         <int> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ CrsCl         <int> 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Rec           <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Rem           <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, ...
## $ Judge         <fct> "McNamara", "O'Neill", "Garfunkel; McNamara", "O...
## $ OthMot        <fct> Motion to Amend Brief Statement of Defenses; Mot...
## $ MTime         <fct> 2, 0, 2, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, ...
## $ MTA           <fct> 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...
## $ MTD           <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ DSJ           <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ PSJ           <fct> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ DMIL          <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ PMIL          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Stp           <int> 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
## $ Arb           <fct> No, NA, No, No, NA, No, No, NA, NA, NA, NA, NA, ...
## $ TrialType     <fct> 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, ...
## $ DISP          <fct> 4, 8, 2, 3; 6, 3, 3, 6, 3, 3, 8, 6, 8, 6, 6, 6, ...
## $ JMent         <fct> 1, 1, 1, 1; NA, NA, NA, 1, NA, NA, 1, 1, 1, 1, 1...
## $ NOJ           <fct> 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, ...
## $ Award         <dbl> 108628.00, 4069.06, 485154.04, 40000.00, 2200.00...
## $ Eq            <fct> "0", "9", "0", "0", "9", "0", "0", "9", "9", "9"...
## $ TermDate      <date> 2012-06-14, 2015-11-24, 2012-10-04, 2016-10-26,...
## $ Enforce       <fct> 2; 3, 1; 3, 1; 2, 1; 2, 1; 2, 4, 3, 3, 3, 3, 3, ...
## $ case_id       <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1...
## $ CaseType      <chr> "4", "2", "2", "2", "2", "19", "2", "4", "40", "...
## $ CaseType_des  <chr> "Contract Claims", "Contract Claims", "Contract ...
## $ lengthOfsuit  <dbl> 414, 202, 113, 414, 616, 466, 226, 196, 78, 310,...
## $ def_cat       <chr> "Bus", "Neither", "Bus", "Bus", "Bus", "Neither"...
## $ plt_cat       <chr> "Bus", "Bus", "Bus", "Bus", "Bus", "Bus", "neith...
## $ business      <chr> "both", "only_one", "both", "both", "both", "onl...
## $ government    <chr> "neither", "neither", "neither", "neither", "nei...
## $ DISP_des      <chr> "jury verdict", "other resolution", "summary jud...
## $ TrialType_des <chr> "jury trial", "no trial", "no trial", "no trial"...
## $ JMent_des     <chr> "Pltf", "Pltf", "Pltf", "Settlement", "Settlemen...
## $ plt_winning   <chr> "1", "1", "1", NA, NA, NA, "1", NA, NA, "1", "1"...
## $ Jurisdiction  <chr> "Belknap", "Belknap", "Belknap", "Belknap", "Bel...
## $ ProSe_rev     <chr> "neither", "only_one", "neither", NA, "neither",...

Export spreadsheet data

#write.csv(data_processed, "C:/Users/sclee1/OneDrive/Documents/R/legalAnalytics/data/stateCases_spreadsheet.csv")

Explore

data_processed %>%
  distinct(case_id, year) %>%
  ggplot(aes(year)) +
  geom_bar(position = "stack")


data_processed %>%
  distinct(case_id, year, Jurisdiction) %>%
  ggplot(aes(year, fill = Jurisdiction)) +
  geom_bar(position = "stack") +
  facet_wrap(~Jurisdiction, ncol = 1) +
  coord_flip() +
  labs(x = NULL,
       y = "Number of cases")


data_processed %>%
  filter(lengthOfsuit > 0) %>%
  ggplot(aes(lengthOfsuit)) +
  geom_histogram() +
  scale_x_log10()


# without Restraining Order
data_processed %>%
  count(CaseType_des, sort = TRUE) %>%
  filter(CaseType_des != "Restraining Order") %>%
  head(10) %>%
  mutate(CaseType_des = str_sub(CaseType_des, 1, 30),
         CaseType_des = fct_reorder(CaseType_des, n)) %>%
  ggplot(aes(CaseType_des, n)) +
  geom_col(fill = "cornflowerblue") +
  coord_flip() +
  labs(title = "Top 10 Most Common Case Types in New Hampshire",
       x = NULL,
       y = "Number of Cases")


data_processed %>%
  count(CaseType_des, sort = TRUE) %>%
  head(10) %>%
  mutate(CaseType_des = str_sub(CaseType_des, 1, 30),
         CaseType_des = fct_reorder(CaseType_des, n)) %>%
  ggplot(aes(CaseType_des, n)) +
  geom_col() +
  coord_flip()


data_processed %>%
  count(DISP_des, sort = TRUE) 
## # A tibble: 11 x 2
##    DISP_des               n
##    <chr>              <int>
##  1 settled              226
##  2 unknown resolution   121
##  3 other resolution      83
##  4 default judgment      78
##  5 dismissed             73
##  6 voluntary nonsuit     63
##  7 pending               18
##  8 summary judgement     16
##  9 <NA>                  11
## 10 bench trial            6
## 11 jury verdict           3

data_processed %>%
  count(JMent_des, sort = TRUE) 
## # A tibble: 5 x 2
##   JMent_des        n
##   <chr>        <int>
## 1 Settlement     432
## 2 Pltf           142
## 3 Def            102
## 4 still open      21
## 5 Both parties     1

data_processed %>%
  count(DISP_des, JMent_des, sort = TRUE) 
## # A tibble: 24 x 3
##    DISP_des           JMent_des      n
##    <chr>              <chr>      <int>
##  1 settled            Settlement   226
##  2 unknown resolution Settlement   109
##  3 default judgment   Pltf          69
##  4 dismissed          Def           58
##  5 other resolution   Pltf          48
##  6 voluntary nonsuit  Settlement    38
##  7 other resolution   Settlement    24
##  8 voluntary nonsuit  Def           24
##  9 pending            still open    18
## 10 dismissed          Settlement    15
## # ... with 14 more rows

# the plaintiff sought an equitable remedy
data_processed %>%
  count(EqReq, sort = TRUE) 
## # A tibble: 6 x 2
##   EqReq       n
##   <fct>   <int>
## 1 Yes       580
## 2 No         94
## 3 9          14
## 4 Unknown     7
## 5 0           2
## 6 <NA>        1

# the plaintiff sought a monetary remedy
data_processed %>%
  count(MonReq, sort = TRUE) 
## # A tibble: 180 x 2
##    MonReq     n
##     <dbl> <int>
##  1     1    383
##  2     0    124
##  3     9     11
##  4 35000      3
##  5 25000      2
##  6  1172.     1
##  7  1436.     1
##  8  1501.     1
##  9  1503.     1
## 10  1582.     1
## # ... with 170 more rows

# 1 and 0 are not dollar vlaues
data_processed %>%
  filter(MonReq > 10) %>%
  ggplot(aes(MonReq)) +
  geom_histogram() +
  scale_x_log10(label = scales::dollar)


data_processed %>%
  filter(!Award %in% c("0","1","9")) %>% 
  ggplot(aes(Award)) +
  geom_histogram(binwidth = 0.5) +
  scale_x_log10(labels = scales::dollar)


data_processed %>%
  filter(EqReq %in% c("Yes","No"),
         MonReq > 10) %>%
  count(EqReq, MonReq, sort = TRUE) 
## # A tibble: 177 x 3
##    EqReq MonReq     n
##    <fct>  <dbl> <int>
##  1 Yes   35000      3
##  2 Yes   25000      2
##  3 Yes    1172.     1
##  4 Yes    1436.     1
##  5 Yes    1501.     1
##  6 Yes    1503.     1
##  7 Yes    1582.     1
##  8 Yes    1670.     1
##  9 Yes    1671.     1
## 10 Yes    1698.     1
## # ... with 167 more rows

data_processed %>%
  count(county, sort = TRUE)
## # A tibble: 8 x 2
##   county     n
##   <chr>  <int>
## 1 211      342
## 2 216      252
## 3 217       66
## 4 218       14
## 5 212        9
## 6 226        8
## 7 215        4
## 8 219        3

data_processed %>%
  count(Jurisdiction, sort = TRUE)
## # A tibble: 3 x 2
##   Jurisdiction     n
##   <chr>        <int>
## 1 Belknap        456
## 2 Merrimack      193
## 3 Hills North     49

data_processed %>%
  count(def_cat, sort = TRUE)
## # A tibble: 3 x 2
##   def_cat     n
##   <chr>   <int>
## 1 Neither   387
## 2 Bus       286
## 3 Gov        25

data_processed %>%
  count(plt_cat, sort = TRUE)
## # A tibble: 3 x 2
##   plt_cat     n
##   <chr>   <int>
## 1 neither   378
## 2 Bus       304
## 3 Gov        16

data_processed %>%
  count(ProSe, sort = TRUE)
## # A tibble: 5 x 2
##   ProSe         n
##   <fct>     <int>
## 1 Neither     386
## 2 Defendant   216
## 3 Some DF      40
## 4 Both         32
## 5 Plaintiff    24

data_processed %>%
  count(PLaw, sort = TRUE)
## # A tibble: 389 x 2
##    PLaw                    n
##    <fct>               <int>
##  1 9                      45
##  2 Robert L. O'Brien      24
##  3 O'Brien, Robert L      19
##  4 N/A                    12
##  5 Arnold Rosenblatt      10
##  6 Michael J. Fontaine    10
##  7 Peter S. Cowan          9
##  8 Benjamin R. Roberge     8
##  9 Edward D. Philpot       8
## 10 Niederman, Jay M        8
## # ... with 379 more rows

# length of complaint pages
data_processed %>%
  filter(LoC != 0) %>%
  ggplot(aes(LoC)) +
  geom_histogram(binwidth = 0.1) +
  scale_x_log10()


# A majority had no trial; another large chunk left blank; 89 jury trials vs. 54 bench trials
data_processed %>%
  count(TrialType_des, sort = TRUE)
## # A tibble: 6 x 2
##   TrialType_des     n
##   <chr>         <int>
## 1 no trial        535
## 2 jury trial       89
## 3 bench trial      54
## 4 still open       17
## 5 N/A               2
## 6 8                 1

MonReq versus. Award

There is a strong positive one-to-one association between how much money requested and how much awarded. In addition, how much plaintiffs request doesn’t seem to reduce the likelihood of getting awarded.

# When $ was awarded
data_processed %>%
  filter(MonReq > 10,
         !Award %in% c("0","1","9")) %>%
  ggplot(aes(Award, MonReq)) +
  geom_point() +
  scale_x_log10(labels = scales::dollar) +
  scale_y_log10(labels = scales::dollar)


# When $ was awarded versus when not
library(ggbeeswarm)

data_processed %>%
  mutate(Award_cat = if_else(Award == "0","No Monetary Award","Monetary Award")) %>%
  filter(MonReq > 10,
         # filter out undefined amount and I'm not sure what 9 means
         !Award %in% c("1","9")) %>% 
  ggplot(aes(Award_cat, MonReq, color = Award_cat)) +
  geom_boxplot(notch = TRUE) +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none") +
  labs(title = "Table 3: Effect of Monetary Request on Outcome and Award",
       x = "Outcome",
       y = "Plaintiff’s Monetary Request")


med_MonReqs <-
  data_processed %>%
  mutate(Award_cat = if_else(Award == "0","No Monetary Award","Monetary Award")) %>%
  filter(MonReq > 10,
         # filter out undefined amount and I'm not sure what 9 means
         !Award %in% c("1","9")) %>%
  group_by(Award_cat) %>%
  summarise(med_MonReq = median(MonReq))

med_MonReqs
## # A tibble: 2 x 2
##   Award_cat         med_MonReq
##   <chr>                  <dbl>
## 1 Monetary Award         7170.
## 2 No Monetary Award     14963

data_processed %>%
  filter(MonReq > 10,
         # filter out undefined amount and I'm not sure what 9 means
         !Award %in% c("1","9")) %>%
  mutate(Award_cat = if_else(Award == "0","No Monetary Award","Monetary Award")) %>% 
  ggplot(aes(x="", MonReq, color = Award_cat)) +
  geom_quasirandom() +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none") +
  labs(title = "Table 3: Effect of Monetary Request on Outcome and Award",
       x = "Outcome",
       y = "Plaintiff’s Monetary Request") +
  geom_hline(data = med_MonReqs, aes(yintercept = med_MonReq, color = Award_cat)) +
  facet_wrap(~Award_cat)

Award by county, by NOS, by TrialType, and by Plt_Cat and Def_Cat

Award mostly ranges from $1,000 and to $1 million, half of which fall below about $14,000. Interpret with caution as only 85 cases, of 667 cases, have an identified value of award.

# median award by case type
data_processed %>%
  filter(!Award %in% c("0","1","9")) %>%
  group_by(CaseType_des) %>%
  summarise(median(Award, na.rm = TRUE))
## # A tibble: 11 x 2
##    CaseType_des                                 `median(Award, na.rm = TRU~
##    <chr>                                                              <dbl>
##  1 "Complaint for Injunction/To Enjoin Foreclo~                      26481.
##  2 Consumer Protection Act                                          143552 
##  3 Contract Claims                                                   16115.
##  4 Declaratory Judgment                                             132700.
##  5 "Forfeiture "                                                      2532.
##  6 Interpleder                                                        9525.
##  7 Negligence other than Medical Malpractice                        138009.
##  8 Plea of Indemnity                                                 81649.
##  9 "Tort - Intentional "                                            143552 
## 10 "Tort - Medical Malpractice "                                    125000 
## 11 Wage Claim                                                         5500
  

library(ggbeeswarm)

# Remove Cam's data using row numbers I created early on
data_processed_HilCounty <-
  data_processed %>%
  filter(case_id <= 425 | case_id >= 619)

data_processed %>%
  filter(Award > 10) %>%
  summarise(caseN_award = n())
##   caseN_award
## 1          85

median_Award <-
  data_processed %>%
  filter(!Award %in% c("0","1","9")) %>%
  summarise(award_median = median(Award, na.rm = TRUE)) %>%
  pull()

data_processed %>%
  filter(!Award %in% c("0","1","9")) %>% 
  ggplot(aes(Award)) +
  geom_histogram(binwidth = 0.5, fill = "cornflowerblue") +
  geom_vline(linetype = "dashed", xintercept = median_Award) +
  scale_x_log10(labels = scales::dollar)



data_processed %>%
  filter(!Award %in% c("0","1","9"),
         county %in% c("211","216","217")) %>% 
  ggplot(aes(county, Award, color = county)) +
  geom_quasirandom(alpha = 0.7,
                   size = 1.5)  +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none")


data_processed %>% 
  mutate(CaseType_des = fct_lump(CaseType_des, 3)) %>%
  filter(!Award %in% c("0","1","9"),
         CaseType_des != "Other") %>%
  ggplot(aes(CaseType_des, Award, color = CaseType_des)) +
  geom_quasirandom(alpha = 0.7,
                   size = 1.5)  +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none")


data_processed %>%
  filter(!Award %in% c("0","1","9"),
         TrialType_des %in% c("jury trial","bench trial","no trial")) %>% 
  ggplot(aes(TrialType_des, Award, color = TrialType)) +
  geom_quasirandom(alpha = 0.7,
                   size = 1.5)  +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none") 


data_processed %>%
  filter(!Award %in% c("0","1","9")) %>%
  ggplot(aes(def_cat, Award, color = def_cat)) +
  geom_quasirandom(alpha = 0.7,
                   size = 1.5)  +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none")


data_processed %>%
  filter(!Award %in% c("0","1","9")) %>%
  ggplot(aes(plt_cat, Award, color = plt_cat)) +
  geom_quasirandom(alpha = 0.7,
                   size = 1.5)  +
  scale_y_log10(labels = scales::dollar) +
  theme(legend.position = "none")


data_processed %>%
  filter(!Award %in% c("0","1","9"),
         TrialType_des %in% c("jury trial","bench trial","no trial"),
         county %in% c("211","216","217")) %>% 
  ggplot(aes(TrialType_des, Award, color = TrialType)) +
  geom_quasirandom(alpha = 0.7,
                   size = 1.5) +
  theme(legend.position = "none") +
  scale_y_log10(labels = scales::dollar) +
  facet_grid(county~def_cat) +
  labs(subtitle = "def_cat on the horizontal axis and county on vertical axis")

Correlated variables with Judgment

The list of variables that are strongly associated with Judgment includes ProSe, nature of suit, def_cat (whether defendant is business, government or neither), and county.

Belknap Superior Court (211) tends to enter judgment in favor of the plaintiff more than expected by chance.

# reduce # of levels Jment
data_processed_simpleJment <-
  data_processed %>%
  filter(JMent_des %in% c("Settlement","Pltf","Def")) %>%
  mutate(JMent_des = as.factor(JMent_des),
         def_cat = as.factor(def_cat),
         Jurisdiction = factor(Jurisdiction, levels = c("Hills North","Merrimack","Belknap")))

# business as a predictor
data_processed_simpleJment %>%
  ggplot(aes(x = business, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# government as a predictor
data_processed_simpleJment %>%
  ggplot(aes(government, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~JMent_des + government, data_processed_simpleJment)
ftable(tbl)
##            government both neither only_one
## JMent_des                                  
## Def                      0      94        8
## Pltf                     0     134        8
## Settlement               1     410       21

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# ProSe as a predictor
data_processed_simpleJment %>%
  ggplot(aes(ProSe_rev, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~JMent_des + ProSe_rev, data_processed_simpleJment)
ftable(tbl)
##            ProSe_rev both neither only_one
## JMent_des                                 
## Def                     9      43       45
## Pltf                   14      17      109
## Settlement              8     309       83

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# case type as a predictor
data_processed_simpleJment %>%
  mutate(CaseType_des = fct_lump(CaseType_des, 6))  %>%
  ggplot(aes(CaseType_des, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# equitable remedy as a predictor
data_processed_simpleJment %>%
  filter(EqReq %in% c("Yes","No")) %>%
  ggplot(aes(EqReq, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~JMent_des + EqReq, data_processed_simpleJment)
# reduce levels in EqReq
tbl <- tbl[,c("Yes","No")]
ftable(tbl)
##            EqReq Yes  No
## JMent_des               
## Def               80  20
## Pltf             122  17
## Settlement       360  53

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# def_cat as a predictor
data_processed_simpleJment %>%
  ggplot(aes(def_cat, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~JMent_des + def_cat, data_processed_simpleJment)
ftable(tbl)
##            def_cat Bus Gov Neither
## JMent_des                         
## Def                 26   5      71
## Pltf                42   5      95
## Settlement         210  14     208

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE,
       labeling_args = list(set_varnames = c(def_cat = "Type of Defendant",
                                             JMent_des = "Judgment")), 
       main = "Table 2: Correlation of Outcome with Type of Defendant")


# plt_cat as a predictor
data_processed_simpleJment %>%
  ggplot(aes(plt_cat, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~JMent_des + plt_cat, data_processed_simpleJment)
ftable(tbl)
##            plt_cat Bus Gov neither
## JMent_des                         
## Def                 47   3      52
## Pltf                77   3      62
## Settlement         176   9     247

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# county as a predictor
data_processed_simpleJment %>%
  filter(county %in% c(211,216,217,218)) %>%
  ggplot(aes(county, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~JMent_des + county, data_processed_simpleJment)
# reduce levels in county
tbl <- tbl[,c("211","216","217","218")]
ftable(tbl)
##            county 211 216 217 218
## JMent_des                        
## Def                59  35   7   1
## Pltf               88  46   7   1
## Settlement        181 166  52  12

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")

Judgment by Jurisdiction and by Case Type

The analysis includes the most common case types: 1) Contract Claims, 2) Tort - Automobile, and 3) Negligence Claims other than Medical Malpractice. Major findings are:

Case types are abbreviated in the plots.

# reduce # of levels Jment and CaseType_des
data_processed_simpleJment_simple_CaseType <-
  data_processed %>%
  mutate(CaseType_des = fct_lump(CaseType_des, 2)) %>%
  filter(JMent_des %in% c("Settlement","Pltf","Def"),
         CaseType_des != "Other") %>%
  mutate(JMent_des = as.factor(JMent_des),
         def_cat = as.factor(def_cat)) %>% 
  droplevels()

# Jurisdiction as a predictor
data_processed_simpleJment_simple_CaseType %>%
  ggplot(aes(Jurisdiction, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# Jurisdiction & CaseType_des as a predictor
data_processed_simpleJment %>%
  mutate(JMent_des = as.character(JMent_des),
         JMent_des = case_when(
           JMent_des == "Def" ~ "Defendant",
           JMent_des == "Pltf" ~ "Plaintiff",
           TRUE ~ JMent_des
         )) %>%
  filter(CaseType_des %in% c("Contract Claims",
                             "Negligence other than Medical Malpractice",
                             "Declaratory Judgment",
                             "Real Property Cases")) %>%
  ggplot(aes(Jurisdiction, fill = JMent_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip() +
  facet_wrap(~CaseType_des) +
  labs(title = "Table 1: Judgment by Case Type and Jurisdiction",
       fill = "Judgment")


# What's wrong with the bar in Hillsborough North & Negligence? It's all settlement!
# Understand Hillsborough N 
data_processed_simpleJment %>%
  filter(Jurisdiction == "Hills North") %>%
  count(CaseType_des, sort = TRUE) 
## # A tibble: 10 x 2
##    CaseType_des                                          n
##    <chr>                                             <int>
##  1 Contract Claims                                      22
##  2 Declaratory Judgment                                  8
##  3 Negligence other than Medical Malpractice             8
##  4 Real Property Cases                                   3
##  5 Replevin                                              2
##  6 "Complaint for Injunction/To Enjoin Foreclosure "     1
##  7 Foreclosure - Complaint to Enjoin Foreclosure         1
##  8 Plea of Indemnity                                     1
##  9 "Tort - Medical Malpractice "                         1
## 10 Wage Claim                                            1

data_processed %>%
  filter(Jurisdiction == "Hills North") %>%
  count(year, sort = TRUE) 
## # A tibble: 1 x 2
##   year      n
##   <chr> <int>
## 1 2016     49

data_processed_simpleJment %>%
  mutate(JMent_des = as.character(JMent_des),
         JMent_des = case_when(
           JMent_des == "Def" ~ "Defendant",
           JMent_des == "Pltf" ~ "Plaintiff",
           TRUE ~ JMent_des
         )) %>%
  filter(CaseType_des %in% c("Contract Claims",
                             "Negligence other than Medical Malpractice",
                             "Declaratory Judgment",
                             "Real Property Cases")) %>%
  ggplot(aes(Jurisdiction, fill = JMent_des)) + 
  geom_bar(position = "stack") +
  labs(y = "Proportion") +
  coord_flip() +
  facet_wrap(~CaseType_des) +
  labs(title = "Table 1: Judgment by Case Type and Jurisdiction",
       fill = "Judgment")


# mosaic: judgment vs. jurisdiction
# create a table
tbl <- xtabs(~JMent_des + Jurisdiction, 
             data_processed_simpleJment_simple_CaseType)
ftable(tbl)
##            Jurisdiction Belknap Hills North Merrimack
## JMent_des                                            
## Def                          31           1        24
## Pltf                         59           2        28
## Settlement                  146          27        79

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE,
       labeling_args = list(set_varnames = c(JMent_des = "Judgment")), 
       main = "Court data")



# mosaic: judgment vs. case type
# create a table
tbl <- xtabs(~JMent_des + CaseType_des, 
             data_processed_simpleJment_simple_CaseType)
ftable(tbl)
##            CaseType_des Contract Claims Negligence other than Medical Malpractice
## JMent_des                                                                        
## Def                                  42                                        14
## Pltf                                 82                                         7
## Settlement                          131                                       121

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE,
       labeling_args = list(set_varnames = c(JMent_des = "Judgment",
                                             CaseType_des = "Case Type")),
       set_labels = list(CaseType_des = c("Contract", 
                                          "Negligence",
                                          "Auto")), 
       main = "Court data")

Correlated variables with disposition, DISP, outcomes of cases

The list of variables that are strongly associated with Disposition includes business (whether business is either party, both parties, or neither), ProSe, EqReq, nature of suit, def_cat (whether defendant is business, government or neither), plt_cat (whether plaintiff is business, government or neither).

# reduce # of levels DISP
# consider adding more levels:e.g., pending and summary judgement
data_processed_simpleDISP <-
  data_processed %>%
  filter(DISP_des %in% c("settled","other resolution","default judgment","dismissed","voluntary nonsuit")) 

# business as a predictor
data_processed_simpleDISP %>%
  ggplot(aes(x = business, fill = DISP_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~DISP_des + business, data_processed_simpleDISP)
ftable(tbl)
##                   business both neither only_one
## DISP_des                                        
## default judgment             13      14       51
## dismissed                     9      34       30
## other resolution              9      39       35
## settled                      47      79      100
## voluntary nonsuit            14      16       33

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# government as a predictor
data_processed_simpleDISP %>%
  ggplot(aes(government, fill = DISP_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~DISP_des + government, data_processed_simpleDISP)
ftable(tbl)
##                   government both neither only_one
## DISP_des                                          
## default judgment                0      75        3
## dismissed                       0      67        6
## other resolution                0      74        9
## settled                         0     215       11
## voluntary nonsuit               1      60        2

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# ProSe as a predictor
data_processed_simpleDISP %>%
  ggplot(aes(ProSe_rev, fill = DISP_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~DISP_des + ProSe_rev, data_processed_simpleDISP)
ftable(tbl)
##                   ProSe_rev both neither only_one
## DISP_des                                         
## default judgment               0       9       66
## dismissed                     11      30       27
## other resolution              11      24       44
## settled                        3     181       28
## voluntary nonsuit              1      26       34

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# case type as a predictor
data_processed_simpleDISP %>%
  mutate(CaseType_des = fct_lump(CaseType_des, 6))  %>%
  ggplot(aes(CaseType_des, fill = DISP_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# equitable remedy as a predictor
data_processed_simpleDISP %>%
  filter(EqReq %in% c("Yes","No")) %>%
  ggplot(aes(EqReq, fill = DISP_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~DISP_des + EqReq, data_processed_simpleDISP)
# reduce levels in EqReq
tbl <- tbl[,c("Yes","No")]
ftable(tbl)
##                   EqReq Yes  No
## DISP_des                       
## default judgment         66  10
## dismissed                50  20
## other resolution         58  23
## settled                 205  15
## voluntary nonsuit        55   8

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# def_cat as a predictor
data_processed_simpleDISP %>%
  ggplot(aes(def_cat, fill = DISP_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~DISP_des + def_cat, data_processed_simpleDISP)
ftable(tbl)
##                   def_cat Bus Gov Neither
## DISP_des                                 
## default judgment           24   1      53
## dismissed                  19   4      50
## other resolution           23   7      53
## settled                   104   5     117
## voluntary nonsuit          23   2      38

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")


# plt_cat as a predictor
data_processed_simpleDISP %>%
  ggplot(aes(plt_cat, fill = DISP_des)) + 
  geom_bar(position = "fill") +
  labs(y = "Proportion") +
  coord_flip()


# create a table
tbl <- xtabs(~DISP_des + plt_cat, data_processed_simpleDISP)
ftable(tbl)
##                   plt_cat Bus Gov neither
## DISP_des                                 
## default judgment           53   2      23
## dismissed                  29   2      42
## other resolution           30   2      51
## settled                    90   6     130
## voluntary nonsuit          38   2      23

library(vcd)
mosaic(tbl, 
       shade = TRUE,
       legend = TRUE, 
       main = "Court data")

Appendix