Updated PRC Aggregated Breaches

Breach Type

Org Class

(I did some substitution in the source data for Org Class tho to make graphs easier to read)

breaches <- read.csv("~/data/prc-2013.csv", stringsAsFactors = FALSE)

# make 'real' objects and convert certain cols to factors
breaches$Date <- as.Date(breaches$Date, format = "%d-%b-%y")
breaches$Org.Class <- factor(breaches$Org.Class)
breaches$Loss.Type <- factor(breaches$Loss.Type)
breaches$State <- factor(breaches$State)
breaches$Year <- factor(breaches$Year)

levels(breaches$State)
##  [1] ""                     "Alabama"              "Alaska"              
##  [4] "Arizona"              "Arkansas"             "California"          
##  [7] "Colorado"             "Connecticut"          "Delaware"            
## [10] "District Of Columbia" "Florida"              "Georgia"             
## [13] "Hawaii"               "Idaho"                "Illinois"            
## [16] "Indiana"              "Iowa"                 "Kansas"              
## [19] "Kentucky"             "London City of"       "Louisiana"           
## [22] "Maine"                "Maryland"             "Massachusetts"       
## [25] "Michigan"             "Minnesota"            "Mississippi"         
## [28] "Missouri"             "Montana"              "Nebraska"            
## [31] "Nevada"               "New Hampshire"        "New Jersey"          
## [34] "New Mexico"           "New York"             "Noord Holland"       
## [37] "North Carolina"       "North Dakota"         "Ohio"                
## [40] "Oklahoma"             "Oregon"               "Pennsylvania"        
## [43] "Puerto Rico"          "Rhode Island"         "South Carolina"      
## [46] "South Dakota"         "Tennessee"            "Texas"               
## [49] "Utah"                 "Vermont"              "Virginia"            
## [52] "Washington"           "West Virginia"        "Wisconsin"           
## [55] "Wyoming"

That's a bit of a mess. Some missing data, some non-US data. Let's just look at the US.

# stinking Americans
breaches.us <- breaches[!breaches$State %in% c("", "London City of", "Noord Holland"), 
    ]

by.year.org <- count(breaches.us, c("Year", "Org.Class"))
by.year.type <- count(breaches.us, c("Year", "Loss.Type"))
by.both <- count(breaches.us, c("Year", "Org.Class", "Loss.Type"))

# since neither NGO nor GOV has CARD by default, add it so grid is nice
yrs <- range(as.numeric(levels(by.both$Year)))
yrs <- as.character(seq(yrs[1], yrs[2]))

by.both <- rbind(by.both, data.frame(Year = yrs, Org.Class = "GOV", Loss.Type = "CARD", 
    freq = 0))
by.both <- rbind(by.both, data.frame(Year = yrs, Org.Class = "NGO", Loss.Type = "CARD", 
    freq = 0))
gg <- ggplot(data = by.both, aes(x = Year, y = freq))
gg <- gg + geom_bar(aes(fill = Org.Class), stat = "identity")
gg <- gg + facet_wrap(~Org.Class + Loss.Type)
gg <- gg + labs(x = "", y = "# Breaches")
gg <- gg + theme_bw()
gg <- gg + theme(legend.position = "none")
gg <- gg + theme(axis.text.x = element_text(angle = 90, hjust = 1))
gg

plot of chunk unnamed-chunk-3

gg <- ggplot(data = by.year.type, aes(x = Year, y = freq))
gg <- gg + geom_bar(aes(fill = Loss.Type), stat = "identity")
gg <- gg + facet_wrap(~Loss.Type)
gg <- gg + labs(x = "", y = "# Breaches")
gg <- gg + theme_bw()
gg <- gg + theme(legend.position = "none")
gg <- gg + theme(axis.text.x = element_text(angle = 90, hjust = 1))
gg