library(plyr)
library(ggplot2)
library(RColorBrewer)

data2 <- data1[c(11, 88, 23)]
data3 <- count(data2)



# for correct ordering by median given limits on plot
data4 <- data3[data3$freq <= 110, ]

# table of number of dissertations per major, minimum of five
pop.maj1 <- sort(table(data4$Cnty_ID), decreasing = T)
pop.maj1 <- data.frame(num_crash = pop.maj1)
pop.maj1$Cnty_ID <- row.names(pop.maj1)

head(pop.maj1)
##     num_crash Cnty_ID
## 101       424     101
## 220       416     220
## 71        415      71
## 178       414     178
## 227       406     227
## 161       401     161
# subset data by majors with at least five
# add number of dissertations to major column
to_plo1 <- data4
to_plo1 <- merge(to_plo1, pop.maj1, by = 'Cnty_ID', all.x = T)
to_plo1$Cnty_ID <- paste0(to_plo1$Cnty_ID, ' (', to_plo1$num_crash, ')')

head(to_plo1)
##   Cnty_ID Prsn_Age Year freq num_crash
## 1  1 (56)       54 2011    1        56
## 2  1 (56)       45 2013    1        56
## 3  1 (56)       86 2014    1        56
## 4  1 (56)       51 2009    1        56
## 5  1 (56)       48 2013    1        56
## 6  1 (56)       57 2010    1        56
# median page lengthy by dissertation
med_pgs1 <- ddply(to_plo1,
                 .(Cnty_ID), 
                 summarize, 
                 median = median(Prsn_Age)
)
med_pgs1 <- med_pgs1[order(med_pgs1$median),]

# reorder major as factor by median page length
to_plo1$Cnty_ID <- factor(to_plo1$Cnty_ID, labels = med_pgs1$Cnty_ID, 
                       levels = med_pgs1$Cnty_ID)

# merge with median pages for fill
to_plo1 <- merge(to_plo1, med_pgs1, by = 'Cnty_ID', all.x = T)
to_plo1 <- merge(to_plo1, pop.maj1, by = 'Cnty_ID', all.x = T)

head(to_plo1)
##    Cnty_ID Prsn_Age Year freq num_crash.x median num_crash.y
## 1 108 (92)       34 2011    1          92     32          NA
## 2 108 (92)       21 2012    1          92     32          NA
## 3 108 (92)       16 2009    1          92     32          NA
## 4 108 (92)       44 2014    1          92     32          NA
## 5 108 (92)       25 2009    2          92     32          NA
## 6 108 (92)       57 2011    1          92     32          NA
######
# boxplots of pages by major, collored by median
diss_plo1 <- ggplot(to_plo1, aes(x = Cnty_ID, y = Prsn_Age, fill = median)) + 
  geom_boxplot(lwd = 0.3) + 
  ylim(0,105) + 
  coord_flip() +
  theme_bw() +
  scale_fill_gradientn(colours = brewer.pal(11, 'Spectral')) +
  theme(legend.position = "none", axis.title.y = element_blank(), 
        axis.text.y = element_text(size = 16))
print(diss_plo1)

######
# barplots, number of dissertations by major

to_plo1 <- pop.maj1
to_plo1$Cnty_ID <- factor(pop.maj1$Cnty_ID, labels = pop.maj1$Cnty_ID, 
                       levels = pop.maj1$Cnty_ID)

diss_plo <- ggplot(to_plo1, aes(x = Cnty_ID, y = num_crash, fill = num_crash))  + 
  geom_bar(lwd = 0.3, stat = 'identity') + 
  theme_bw() +
  scale_fill_gradientn(colours = brewer.pal(11, 'Spectral')) +
  ylab('Number of Crashes') +
  theme(legend.position = "none", axis.title.x = element_blank(), 
        axis.text.x = element_text(size = 8, angle = 90, hjust = 1, 
                                   vjust = 0))
print(diss_plo)

###


require(devtools)
## Loading required package: devtools
source_gist('5281518')
## Sourcing https://gist.githubusercontent.com/fawda123/5281518/raw/29a1ab398df96658fdedd966870fb95351c55d20/plot_qual.r
## SHA-1 hash of file is 5003a8077c070fcd75fd92a56d04c1e241336f4b
par(mar=numeric(4),family='serif')
plot.qual(dat8,rs.ln=c(3,15))
## Loading required package: scales

par(mar=c(0,0,1,0),family='serif')
plot.qual(dat8,y.locs=c(0.05,1),ln.cl=c('lightblue','purple','green'),
          main='County Wise Slopegraph for Texas')