The data we are using here are obtained from the different sources: the federal judiciary center and the free law project.
# Import data
data <- read.csv("data/data_merged.csv")
# Sturecture of data
str(data)
## 'data.frame': 4200 obs. of 51 variables:
## $ CIRCUIT : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DISTRICT : int 2 2 2 2 2 2 2 2 2 2 ...
## $ OFFICE : int 1 1 1 1 1 1 1 1 1 1 ...
## $ DOCKET : int 265 409 100015 100016 100018 100019 100020 100028 100030 100031 ...
## $ ORIGIN : int 4 4 1 1 5 1 1 5 1 1 ...
## $ FILEDATE : Factor w/ 2566 levels "1/1/2012","1/10/2001",..: 1107 1237 2 2 18 18 18 85 85 85 ...
## $ FILEYEAR : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
## $ FDATEUSE : Factor w/ 204 levels "1/1/2001","1/1/2002",..: 86 103 1 1 1 1 1 1 1 1 ...
## $ JURIS : int 3 4 1 1 4 3 4 4 4 4 ...
## $ NOS : int 840 190 190 190 190 440 110 365 365 365 ...
## $ TITLE : Factor w/ 33 levels "-8","0","10",..: 5 13 13 13 13 20 13 20 13 13 ...
## $ SECTION : Factor w/ 144 levels "-8","1","10",..: 10 20 27 27 20 73 22 138 20 20 ...
## $ SUBSECT : Factor w/ 73 levels "-8","1","12",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ RESIDENC : int -8 43 -8 -8 25 -8 52 55 15 15 ...
## $ CLASSACT : int -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 ...
## $ DEMANDED : int 0 0 0 0 0 0 0 0 0 0 ...
## $ FILEJUDG : logi NA NA NA NA NA NA ...
## $ FILEMAG : logi NA NA NA NA NA NA ...
## $ COUNTY : int 99999 33015 33013 33013 88888 88888 88888 88888 33007 33015 ...
## $ ARBIT : Factor w/ 4 levels "-8","E","M","V": 1 1 1 1 1 1 1 1 1 1 ...
## $ MDLDOCK : int -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 ...
## $ PLT : Factor w/ 3128 levels "-8","108 DEGREES. LLC",..: 2968 1362 2944 2944 1546 268 85 1813 2166 1767 ...
## $ DEF : Factor w/ 3369 levels "-8",", ET AL",..: 1024 1637 993 2405 3274 649 2845 76 118 119 ...
## $ TRANSDAT : logi NA NA NA NA NA NA ...
## $ TRANSOFF : int -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 ...
## $ TRANSDOC : int -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 ...
## $ TRANSORG : int -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 ...
## $ TERMDATE : Factor w/ 2560 levels "1/10/2002","1/10/2003",..: 1184 2184 1584 1191 1082 2161 2042 1013 1100 1043 ...
## $ TDATEUSE : Factor w/ 204 levels "1/1/2001","1/1/2002",..: 86 171 120 86 86 171 154 69 86 69 ...
## $ TRCLACT : int -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 ...
## $ TERMJUDG : logi NA NA NA NA NA NA ...
## $ TERMMAG : logi NA NA NA NA NA NA ...
## $ PROCPROG : int 5 4 2 2 2 2 5 2 1 2 ...
## $ DISP : int 5 13 18 18 14 3 6 14 10 12 ...
## $ NOJ : int 2 -8 0 0 -8 -8 2 -8 -8 -8 ...
## $ AMTREC : int 5 54 0 0 0 0 33 0 0 0 ...
## $ JUDGMENT : int 1 -8 -8 -8 -8 -8 2 -8 -8 -8 ...
## $ DJOINED : Factor w/ 1512 levels "","1/10/2005",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ PRETRIAL : Factor w/ 418 levels "","1/10/2012",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TRIBEGAN : Factor w/ 40 levels "","1/20/2016",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TRIALEND : Factor w/ 83 levels "","1/13/2004",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ TRMARB : int -8 -8 -8 -8 -8 -8 -8 -8 -8 -8 ...
## $ PROSE : int 0 0 0 0 0 1 2 0 0 0 ...
## $ IFP : Factor w/ 2 levels "-8","FP": 1 1 1 1 1 1 1 1 1 1 ...
## $ STATUSCD : Factor w/ 1 level "L": 1 1 1 1 1 1 1 1 1 1 ...
## $ TAPEYEAR : int 2001 2001 2001 2001 2001 2001 2001 2001 2001 2001 ...
## $ nature_of_suit: Factor w/ 44 levels "ADMINISTRATIVE PROCEDURE ACT/REVIEW OR APPEAL OF AGENCY DECISION",..: 43 29 29 29 29 28 19 36 36 36 ...
## $ busType_def : Factor w/ 5 levels "CORP","LLC","PARTNERSHIP",..: NA NA NA NA NA NA NA NA NA NA ...
## $ busType_plt : Factor w/ 4 levels "CORP","LLC","PARTNERSHIP",..: NA NA NA NA NA NA NA NA NA NA ...
## $ busType : Factor w/ 3 levels "Both","Neither",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ cause : Factor w/ 173 levels "","0.415277778",..: NA NA NA NA NA NA NA NA NA NA ...
# See the first six rows of the data
head(data)
## CIRCUIT DISTRICT OFFICE DOCKET ORIGIN FILEDATE FILEYEAR FDATEUSE JURIS
## 1 1 2 1 265 4 3/21/2001 2001 3/1/2001 3
## 2 1 2 1 409 4 4/1/2001 2001 4/1/2001 4
## 3 1 2 1 100015 1 1/10/2001 2001 1/1/2001 1
## 4 1 2 1 100016 1 1/10/2001 2001 1/1/2001 1
## 5 1 2 1 100018 5 1/12/2001 2001 1/1/2001 4
## 6 1 2 1 100019 1 1/12/2001 2001 1/1/2001 3
## NOS TITLE SECTION SUBSECT RESIDENC CLASSACT DEMANDED FILEJUDG FILEMAG
## 1 840 15 1114 -8 -8 -8 0 NA NA
## 2 190 28 1332 -8 43 -8 0 NA NA
## 3 190 28 1345 -8 -8 -8 0 NA NA
## 4 190 28 1345 -8 -8 -8 0 NA NA
## 5 190 28 1332 -8 25 -8 0 NA NA
## 6 440 42 1983 -8 -8 -8 0 NA NA
## COUNTY ARBIT MDLDOCK PLT
## 1 99999 -8 -8 VELCRO INDUSTRIES BV, ET AL
## 2 33015 -8 -8 HIGHDATA SOFTWARE
## 3 33013 -8 -8 USA
## 4 33013 -8 -8 USA
## 5 88888 -8 -8 KAPUR
## 6 88888 -8 -8 BICKFORD
## DEF TRANSDAT TRANSOFF TRANSDOC TRANSORG TERMDATE
## 1 FASTECH, ET AL NA -8 -8 -8 3/22/2001
## 2 KOTHANDAN NA -8 -8 -8 8/15/2001
## 3 EVANS NA -8 -8 -8 5/17/2001
## 4 PICUCCI NA -8 -8 -8 3/23/2001
## 5 WEBMANAGE TECHNOLOG, ET AL NA -8 -8 -8 3/1/2001
## 6 CONCORD DISTRICT CT NA -8 -8 -8 8/10/2001
## TDATEUSE TRCLACT TERMJUDG TERMMAG PROCPROG DISP NOJ AMTREC JUDGMENT
## 1 3/1/2001 -8 NA NA 5 5 2 5 1
## 2 8/1/2001 -8 NA NA 4 13 -8 54 -8
## 3 5/1/2001 -8 NA NA 2 18 0 0 -8
## 4 3/1/2001 -8 NA NA 2 18 0 0 -8
## 5 3/1/2001 -8 NA NA 2 14 -8 0 -8
## 6 8/1/2001 -8 NA NA 2 3 -8 0 -8
## DJOINED PRETRIAL TRIBEGAN TRIALEND TRMARB PROSE IFP STATUSCD TAPEYEAR
## 1 -8 0 -8 L 2001
## 2 -8 0 -8 L 2001
## 3 -8 0 -8 L 2001
## 4 -8 0 -8 L 2001
## 5 -8 0 -8 L 2001
## 6 -8 1 -8 L 2001
## nature_of_suit busType_def busType_plt busType cause
## 1 TRADEMARK <NA> <NA> Neither <NA>
## 2 OTHER CONTRACT ACTIONS <NA> <NA> Neither <NA>
## 3 OTHER CONTRACT ACTIONS <NA> <NA> Neither <NA>
## 4 OTHER CONTRACT ACTIONS <NA> <NA> Neither <NA>
## 5 OTHER CONTRACT ACTIONS <NA> <NA> Neither <NA>
## 6 OTHER CIVIL RIGHTS <NA> <NA> Neither <NA>
# Basic descriptive statistics of data
summary(data)
## CIRCUIT DISTRICT OFFICE DOCKET ORIGIN
## Min. :1 Min. :2 Min. :1 Min. : 73 Min. : 1.000
## 1st Qu.:1 1st Qu.:2 1st Qu.:1 1st Qu.: 400138 1st Qu.: 1.000
## Median :1 Median :2 Median :1 Median : 800364 Median : 1.000
## Mean :1 Mean :2 Mean :1 Mean : 855885 Mean : 1.467
## 3rd Qu.:1 3rd Qu.:2 3rd Qu.:1 3rd Qu.:1200343 3rd Qu.: 2.000
## Max. :1 Max. :2 Max. :1 Max. :9900606 Max. :13.000
##
## FILEDATE FILEYEAR FDATEUSE JURIS
## 9/16/2002 : 10 Min. :2001 10/1/2002: 50 Min. :1.000
## 11/25/2003: 9 1st Qu.:2004 5/1/2002 : 40 1st Qu.:3.000
## 9/24/2002 : 8 Median :2008 9/1/2002 : 40 Median :3.000
## 11/23/2010: 7 Mean :2008 10/1/2003: 36 Mean :3.315
## 5/7/2004 : 7 3rd Qu.:2012 4/1/2013 : 36 3rd Qu.:4.000
## 11/23/2004: 6 Max. :2017 6/1/2005 : 34 Max. :4.000
## (Other) :4153 (Other) :3964
## NOS TITLE SECTION SUBSECT
## Min. :110.0 28 :2717 1332 :1147 -8 :1562
## 1st Qu.:350.0 42 : 656 1441 : 912 CV : 430
## Median :440.0 15 : 396 1983 : 355 PI : 269
## Mean :443.9 29 : 112 1331 : 322 BC : 249
## 3rd Qu.:446.0 17 : 63 1692 : 163 ED : 229
## Max. :899.0 47 : 55 2000 : 146 OC : 177
## (Other): 201 (Other):1155 (Other):1284
## RESIDENC CLASSACT DEMANDED FILEJUDG
## Min. :-8.000 Min. :-8.000 Min. : 0.00 Mode:logical
## 1st Qu.:-8.000 1st Qu.:-8.000 1st Qu.: 0.00 NA's:4200
## Median :-8.000 Median :-8.000 Median : 0.00
## Mean : 6.339 Mean :-7.916 Mean : 2.64
## 3rd Qu.:15.000 3rd Qu.:-8.000 3rd Qu.: 0.00
## Max. :64.000 Max. : 1.000 Max. :5000.00
##
## FILEMAG COUNTY ARBIT MDLDOCK
## Mode:logical Min. :33001 -8:4180 Min. : -8.000
## NA's:4200 1st Qu.:33011 E : 7 1st Qu.: -8.000
## Median :33013 M : 5 Median : -8.000
## Mean :46637 V : 8 Mean : 9.216
## 3rd Qu.:33019 3rd Qu.: -8.000
## Max. :99999 Max. :2320.000
##
## PLT DEF TRANSDAT
## USA : 82 -8 : 61 Mode:logical
## -8 : 20 TYCO INTERNATIONAL, ET AL : 34 NA's:4200
## DIRECTV, INC.: 20 USA : 34
## AMATUCCI : 17 GUTIERREZ, ET AL : 12
## WILSON : 15 NH DEPARTMENT OF HEALTH AND HU: 12
## JOHNSON : 13 SEALED : 11
## (Other) :4033 (Other) :4036
## TRANSOFF TRANSDOC TRANSORG TERMDATE TDATEUSE
## Min. :-8 Min. :-8 Min. :-8 2/4/2015 : 13 3/1/2006 : 43
## 1st Qu.:-8 1st Qu.:-8 1st Qu.:-8 3/3/2006 : 13 12/1/2011: 42
## Median :-8 Median :-8 Median :-8 10/14/2004: 9 4/1/2003 : 35
## Mean :-8 Mean :-8 Mean :-8 3/6/2006 : 9 10/1/2003: 34
## 3rd Qu.:-8 3rd Qu.:-8 3rd Qu.:-8 12/19/2011: 8 10/1/2006: 33
## Max. :-8 Max. :-8 Max. :-8 2/1/2016 : 7 3/1/2005 : 33
## (Other) :4141 (Other) :3980
## TRCLACT TERMJUDG TERMMAG PROCPROG
## Min. :-8.000 Mode:logical Mode:logical Min. : 1.000
## 1st Qu.:-8.000 NA's:4200 NA's:4200 1st Qu.: 2.000
## Median :-8.000 Median : 5.000
## Mean :-7.854 Mean : 4.229
## 3rd Qu.:-8.000 3rd Qu.: 5.000
## Max. : 3.000 Max. :13.000
##
## DISP NOJ AMTREC JUDGMENT
## Min. : 0.0 Min. :-8.000 Min. : 0.00 Min. :-8.00
## 1st Qu.: 6.0 1st Qu.:-8.000 1st Qu.: 0.00 1st Qu.:-8.00
## Median :13.0 Median : 0.000 Median : 0.00 Median : 0.00
## Mean :10.5 Mean :-3.177 Mean : 35.09 Mean :-3.25
## 3rd Qu.:13.0 3rd Qu.: 0.000 3rd Qu.: 0.00 3rd Qu.: 0.00
## Max. :20.0 Max. : 6.000 Max. :9999.00 Max. : 4.00
##
## DJOINED PRETRIAL TRIBEGAN TRIALEND
## :2163 :3717 :4160 :4118
## 7/1/2013 : 8 10/12/2011: 7 2/17/2016: 2 1/13/2004: 1
## 11/14/2011: 6 10/24/2011: 4 1/20/2016: 1 1/20/2016: 1
## 10/12/2006: 5 3/5/2012 : 4 1/4/2011 : 1 1/23/2007: 1
## 10/22/2004: 5 11/24/2009: 3 1/6/2015 : 1 1/27/2009: 1
## 5/14/2007 : 5 12/16/2011: 3 1/6/2016 : 1 1/27/2016: 1
## (Other) :2008 (Other) : 462 (Other) : 34 (Other) : 77
## TRMARB PROSE IFP STATUSCD TAPEYEAR
## Min. :-8 Min. :0.000 -8:3943 L:4200 Min. :2001
## 1st Qu.:-8 1st Qu.:0.000 FP: 257 1st Qu.:2005
## Median :-8 Median :0.000 Median :2009
## Mean :-8 Mean :0.221 Mean :2009
## 3rd Qu.:-8 3rd Qu.:0.000 3rd Qu.:2013
## Max. :-8 Max. :3.000 Max. :2018
##
## nature_of_suit busType_def busType_plt
## OTHER CONTRACT ACTIONS : 610 CORP : 877 CORP : 409
## OTHER CIVIL RIGHTS : 574 LLC : 390 LLC : 264
## CIVIL RIGHTS JOBS : 437 PARTNERSHIP: 30 PARTNERSHIP: 4
## OTHER PERSONAL INJURY : 356 PC : 6 PC : 9
## OTHER STATUTORY ACTIONS: 344 PLLC : 2 NA's :3514
## INSURANCE : 245 NA's :2895
## (Other) :1634
## busType cause
## Both : 275 42:1983 Civil Rights Act : 183
## Neither :2484 28:1332 Diversity-Personal Injury : 79
## Only one:1441 28:1332 Diversity-Breach of Contract : 78
## 28:1441 Petition for Removal - Employment Discrim: 70
## 28:1331 Federal Question: Other Civil Rights : 68
## (Other) :1250
## NA's :2472
The length of suit (days) was calculated by taking the difference between FILEDATE
and TERMDATE
. Datacamp offers a video lecture on times and dates.
# Convert to date object
data$FILEDATE = as.Date(data$FILEDATE, "%m/%d/%Y")
data$TERMDATE = as.Date(data$TERMDATE, "%m/%d/%Y")
# Create a new variable, length of suit
data$lengthOfsuit = as.numeric(difftime(time1 = data$TERMDATE,
time2 = data$FILEDATE,
units = "days"))
# Create a table
library(dplyr)
library(DT)
table <- data %>%
group_by(nature_of_suit) %>%
summarise(numberOfcases = n(),
avg_lengthOfsuit = round(mean(lengthOfsuit, na.rm = TRUE), 0)) %>%
arrange(desc(avg_lengthOfsuit))
datatable(table, options = list(pageLength = 10))
# Step by step
library(ggplot2)
data %>%
group_by(nature_of_suit) %>%
summarise(numberOfcases = n(),
avg_lengthOfsuit = round(mean(lengthOfsuit, na.rm = TRUE), 0)) %>%
arrange(desc(avg_lengthOfsuit)) %>%
filter(numberOfcases > 50) %>%
top_n(10, avg_lengthOfsuit) %>%
ggplot(aes(reorder(x = nature_of_suit, avg_lengthOfsuit), y = avg_lengthOfsuit, fill = nature_of_suit)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(title = "Top 10 Nature of Suit by Length of Suit\n(Nature of Suit > 50 cases)",
x = "Nature of Suit",
y = "Length of Suit (days)")
library(ggplot2)
data %>%
group_by(nature_of_suit) %>%
summarise(numberOfcases = n(),
avg_lengthOfsuit = round(mean(lengthOfsuit, na.rm = TRUE), 0)) %>%
filter(numberOfcases > 50) %>%
top_n(10, avg_lengthOfsuit) %>%
ggplot(aes(x = reorder(nature_of_suit, avg_lengthOfsuit), y = avg_lengthOfsuit, fill = "Orange")) +
geom_col(show.legend = FALSE) +
labs(title = "Top 10 Nature of Suit by Length of Suit\n(Nature of Suit > 50 cases)",
x = "Nature of Suit",
y = "Length of Suit (days)") +
coord_flip()
# Step by step
# Can you do the same for Causes of Action in place of Nature of Suit?
The manner in which the case was disposed of.
Cases transferred or remanded:
Dismissals:
Judgment on:
See CIVIL CODE sheets, under disposition, for explanation of the three manners.
data$DISP[data$DISP == "0"] <- "transfer to another district"
data$DISP[data$DISP == "1"] <- "remanded to state court"
data$DISP[data$DISP == "10"] <- "multi district litigation transfer"
data$DISP[data$DISP == "11"] <- "remanded to U.S. Agency"
data$DISP[data$DISP == "2"] <- "want of prosecution"
data$DISP[data$DISP == "3"] <- "lack of jurisdiction"
data$DISP[data$DISP == "12"] <- "voluntarily"
data$DISP[data$DISP == "13"] <- "settled"
data$DISP[data$DISP == "14"] <- "other, dismissals"
data$DISP[data$DISP == "4"] <- "default"
data$DISP[data$DISP == "5"] <- "consent"
data$DISP[data$DISP == "6"] <- "motion before trial"
data$DISP[data$DISP == "7"] <- "jury verdict"
data$DISP[data$DISP == "8"] <- "directed verdict"
data$DISP[data$DISP == "9"] <- "court trial"
data$DISP[data$DISP == "15"] <- "award of arbitrator"
data$DISP[data$DISP == "16"] <- "stayed pending bankruptcy"
data$DISP[data$DISP == "17"] <- "other, judgement on"
data$DISP[data$DISP == "18"] <- "statistical closing"
data$DISP[data$DISP == "19"] <- "appeal affirmed"
data$DISP[data$DISP == "20"] <- "appeal denied"
data$DISP[data$DISP == "-8"] <- "missing"
# Easy fix, but not ordered
data %>%
group_by(DISP, nature_of_suit) %>%
summarise(numberOfcases = n(),
avg_lengthOfsuit = round(mean(lengthOfsuit, na.rm = TRUE), 0)) %>%
filter(numberOfcases > 50) %>%
ggplot(aes(x = reorder(nature_of_suit, avg_lengthOfsuit), y = avg_lengthOfsuit, fill = DISP)) +
geom_col(show.legend = FALSE) +
labs(title = "Top 10 Nature of Suit by Length of Suit\n(Nature of Suit > 50 cases)",
x = NULL,
y = "Length of Suit (days)") +
coord_flip() +
facet_wrap(~ DISP, scales = "free", ncol = 2)
# Ordered but long fix
data_DISP <-
data %>%
group_by(DISP, nature_of_suit) %>%
summarise(numberOfcases = n(),
avg_lengthOfsuit = round(mean(lengthOfsuit, na.rm = TRUE), 0)) %>%
ungroup() %>%
arrange(avg_lengthOfsuit) %>%
mutate(order = row_number())
data_DISP
## # A tibble: 374 x 5
## DISP nature_of_suit numberO~ avg_leng~ order
## <chr> <fctr> <int> <dbl> <int>
## 1 appeal denied PERSONAL INJURY ~ 1 0 1
## 2 consent INSURANCE 1 0 2
## 3 other, dismissals LABOR/MANAGEMENT~ 1 0 3
## 4 settled ANTITRUST 1 5.00 4
## 5 consent CIVIL RIGHTS ADA~ 1 6.00 5
## 6 remanded to state court OTHER FRAUD 3 7.00 6
## 7 statistical closing ARBITRATION 1 10.0 7
## 8 settled LABOR/MANAGEMENT~ 1 13.0 8
## 9 transfer to another district TORT PRODUCT LIA~ 1 13.0 9
## 10 statistical closing CIVIL RIGHTS ACC~ 1 14.0 10
## # ... with 364 more rows
data_DISP %>%
filter(numberOfcases > 50) %>%
ggplot(aes(x = as.factor(order), y = avg_lengthOfsuit, fill = DISP)) +
geom_col(show.legend = FALSE) +
labs(title = "Top 10 Nature of Suit by Length of Suit\n(Nature of Suit > 50 cases)",
x = NULL,
y = "Length of Suit (days)") +
coord_flip() +
facet_wrap(~ DISP, scales = "free", ncol = 2) +
# Add categories to axis
scale_x_discrete(
breaks = data_DISP$order,
labels = data_DISP$nature_of_suit,
expand = c(0,0)
)