This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

These are the judge database attempts:

The first step to cleaning up the data and trying to look at it

library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
dirtyJudge <- read.csv("historydata/jb.csv", stringsAsFactors = FALSE)
dirtyJudge <- tbl_df(dirtyJudge)
#dirtyJudge


#whew, file is too big to preview. Let's filter out some data we're interested in.

managableJudge <- dirtyJudge %>%
  select(lastname = Judge.Last.Name,
         firstname = Judge.First.Name,
         middlename = Judge.Middle.Name,
         birth = Birth.year,
         death = Death.year,
         gender = Gender,
         raceth = Race.or.Ethnicity,
         court = Court.Name)
managableJudge
## Source: local data frame [3,532 x 8]
## 
##    lastname firstname middlename birth death gender   raceth
## 1    Abrams    Ronnie             1968    NA      F    White
## 2   Abruzzo   Matthew         T.  1889  1971      M    White
## 3   Acheson    Marcus     Wilson  1828  1906      M    White
## 4     Acker   William      Marsh  1927    NA      M    White
## 5  Ackerman    Harold     Arnold  1928  2009      M    White
## 6  Ackerman     James      Waldo  1926  1984      M    White
## 7    Acosta   Raymond         L.  1925    NA      M Hispanic
## 8     Adair J[ackson]      Leroy  1887  1956      M    White
## 9     Adams     Arlin     Marvin  1921    NA      M    White
## 10    Adams     Elmer      Bragg  1842  1916      M    White
## ..      ...       ...        ...   ...   ...    ...      ...
## Variables not shown: court (chr)
#okay, more managable, but still ugly. 
# Let's look at the judges in chronological order by birth year
chronologyJudge <- managableJudge %>%
  arrange(birth)
chronologyJudge
## Source: local data frame [3,532 x 8]
## 
##     lastname firstname middlename birth death gender raceth
## 1      Blair      John             1732  1800      M  White
## 2    Cushing   William             1732  1810      M  White
## 3    Drayton   William             1732  1790      M  White
## 4    Johnson    Thomas             1732  1819      M  White
## 5      Duane     James             1733  1797      M  White
## 6        Law   Richard             1733  1806      M  White
## 7     Sewall     David             1735  1825      M  White
## 8  Hopkinson   Francis             1737  1791      M  White
## 9  Pickering      John             1737  1805      M  White
## 10    Hobart      John      Sloss  1738  1805      M  White
## ..       ...       ...        ...   ...   ...    ...    ...
## Variables not shown: court (chr)
median(chronologyJudge$birth, na.rm = TRUE)
## [1] 1923
# median year of birth for judges = 1923
# lets look at median age

ageJudge <- managableJudge %>%
  mutate(ageJudge = death - birth)

median(ageJudge$ageJudge, na.rm = TRUE)
## [1] 76
mean(ageJudge$ageJudge, na.rm = TRUE)
## [1] 75.24
# median age for the judges is 76, mean age is 75.23 (this is more like lifespan than age... )
# that was fun. I guess.
#Let's look at how many female judges there are!
# First, we'll make sure there are no empty gender fields - organize by gender so I can look through.
genderJudge <- managableJudge %>%
  arrange(gender)
genderJudge
## Source: local data frame [3,532 x 8]
## 
##    lastname firstname      middlename birth death gender           raceth
## 1    Abrams    Ronnie                  1968    NA      F            White
## 2     Aiken       Ann              L.  1951    NA      F            White
## 3   Aldrich       Ann                  1927  2010      F            White
## 4     Allen    Arenda Lauretta Wright  1960    NA      F African American
## 5     Allen  Florence       Ellinwood  1884  1966      F            White
## 6  Altonaga   Cecilia              M.  1962    NA      F         Hispanic
## 7   Alvarez   Micaela                  1958    NA      F         Hispanic
## 8   Ambrose   Donetta              W.  1945    NA      F            White
## 9      Amon     Carol          Bagley  1946    NA      F            White
## 10 Arguello Christine              M.  1955    NA      F         Hispanic
## ..      ...       ...             ...   ...   ...    ...              ...
## Variables not shown: court (chr)
# great. All M or F, add a filter and see ONLY female Judges

femaleJudge <- managableJudge %>%
  filter(gender == "F")
femaleJudge
## Source: local data frame [389 x 8]
## 
##    lastname firstname      middlename birth death gender           raceth
## 1    Abrams    Ronnie                  1968    NA      F            White
## 2     Aiken       Ann              L.  1951    NA      F            White
## 3   Aldrich       Ann                  1927  2010      F            White
## 4     Allen    Arenda Lauretta Wright  1960    NA      F African American
## 5     Allen  Florence       Ellinwood  1884  1966      F            White
## 6  Altonaga   Cecilia              M.  1962    NA      F         Hispanic
## 7   Alvarez   Micaela                  1958    NA      F         Hispanic
## 8   Ambrose   Donetta              W.  1945    NA      F            White
## 9      Amon     Carol          Bagley  1946    NA      F            White
## 10 Arguello Christine              M.  1955    NA      F         Hispanic
## ..      ...       ...             ...   ...   ...    ...              ...
## Variables not shown: court (chr)
# great - but that I still have to look to see how many there are or how many male judges there are: give me a number and do the work for me!

gendersJudge <- managableJudge %>%
  group_by(gender) %>%
  summarize(amountGender = n())
gendersJudge
## Source: local data frame [2 x 2]
## 
##   gender amountGender
## 1      F          389
## 2      M         3143
#what can I learn about the distribution of race/ethnicity?

reJudge <- managableJudge %>%
  group_by(raceth) %>%
  summarize(distribRE = n()) %>%
  arrange(desc(distribRE))
reJudge
## Source: local data frame [12 x 2]
## 
##                  raceth distribRE
## 1                 White      3164
## 2      African American       201
## 3              Hispanic       116
## 4        Asian American        31
## 5                               9
## 6       American Indian         3
## 7  African Am./Hispanic         2
## 8   Pac. Isl./Asian Am.         2
## 9    Hispanic/Asian Am.         1
## 10       Hispanic/White         1
## 11      Pac. Isl./White         1
## 12      White/Asian Am.         1
#fascinating.
#next I'd like to play with race and gender - this will require me to mutate/spread the column to set a new variable. Ran out of time.