Secretary of Education Report

January 24, 2014

For more info also see this post.

Grab the Data

URL <- "http://en.wikipedia.org/wiki/United_States_Secretary_of_Education" 
Table2 <- readHTMLTable(URL,  
    colClasses = rep("character", 2), 
    skip.rows=1, 
    which=2)

tab <- Table2[-4, -c(1,2,4,7 )] 

tab[, 4] <- as.character(tab[, 4])
tab[2, 4] <- tab[1, 4]
tab[4, 4] <- "George H. Bush"
tab[3, 4] <- paste(tab[c(2, 4), 4], collapse="/")
tab[7, 4] <- tab[6, 4]
tab[] <- lapply(tab, as.character)

tab <- rbind(c("Shirley M. Hufstedler", "November 30, 1979", 
    "January 20, 1981", "Jimmy Carter"), tab)
tab$k_12_educator <- as.logical(c(0, 1, 0, 0, 0, 0, 0, 0, 0))
names(tab)[1:4] <- c("Secretary_of_Ed.", "Took_Office", "Left_Office", "President")
rownames(tab)<-NULL
tab[,3] <- as.Date(tab[,3], format = "%B %d, %Y")
tab[,2] <- as.Date(tab[,2], format = "%B %d, %Y")
tab[9, 3] <- Sys.Date()
tab$days <- as.numeric(difftime(strptime(tab[,3], format = "%Y-%m-%d"), 
    strptime(tab[, 2], format = "%Y-%m-%d")))
tab$President_Party <- c("Republican")
tab$President_Party[c(1, 6, 9)] <- "Democrat"

The Secretaries of Education

tab2 <- tab
tab2$days <- round(tab2$days, 0)
tab2[] <- lapply(tab2, as.character)
print(xtable(tab2), type="html")
Secretary_of_Ed. Took_Office Left_Office President k_12_educator days President_Party
1 Shirley M. Hufstedler 1979-11-30 1981-01-20 Jimmy Carter FALSE 417 Democrat
2 Terrel H. Bell 1981-01-22 1985-01-20 Ronald Reagan TRUE 1459 Republican
3 William J. Bennett 1985-02-06 1988-09-20 Ronald Reagan FALSE 1322 Republican
4 Lauro F. Cavazos 1988-09-20 1990-12-12 Ronald Reagan/George H. Bush FALSE 813 Republican
5 Lamar Alexander 1991-03-22 1993-01-20 George H. Bush FALSE 670 Republican
6 Richard W. Riley 1993-01-21 2001-01-20 Bill Clinton FALSE 2921 Democrat
7 Roderick R. Paige 2001-01-20 2005-01-20 George W. Bush FALSE 1461 Republican
8 Margaret Spellings 2005-01-20 2009-01-20 George W. Bush FALSE 1461 Republican
9 Arne Duncan 2009-01-21 2014-01-24 Barack Obama FALSE 1829 Democrat


tab2 <- tbl_df(tab)
time <- sum(tab2$days)
stats1 <- tab2 %.%
    group_by(k_12_educator) %.%
    summarise(time_by_educ_status = sum(days), Per_time_by_party = 100*time_by_educ_status/time)

stats2 <- tab2 %.%
    group_by(President_Party, k_12_educator) %.%
    summarise(time_by_party = sum(days), Per_time_by_party = 100*time_by_party/time)

ggplot(stats1, aes(x=factor(1), fill=k_12_educator)) + 
    geom_bar(aes(weight=Per_time_by_party), position="dodge") +
    scale_fill_manual(values=c("grey25", "red"), name= "K - 12\nEducator") +
    ylab("Percent of Time") + xlab("K-12 Educator") + theme_basic() +
    theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
    scale_y_continuous(expand = c(0, 0), limits = c(0, 90)) 

plot of chunk unnamed-chunk-3

print(xtable(stats1), type="html")
k_12_educator time_by_educ_status Per_time_by_party
1 FALSE 10894.00 88.19
2 TRUE 1459.00 11.81


x <- data.frame("Democrat", TRUE, 0, 0)
names(x) <- names(stats2)
stats3 <- rbind(stats2, x)
ggplot(stats3, aes(x=k_12_educator, fill=k_12_educator)) + 
    geom_bar(aes(weight=Per_time_by_party), position="dodge", size=2) +
    ylab("Percent of Time") + xlab("K-12 Educator") +
    facet_grid(~President_Party) +
    scale_fill_manual(values=c("grey25", "red"), name= "K - 12\nEducator") +
    theme_basic() +
    scale_y_continuous(expand = c(0, 0), limits = c(0, 50))     

plot of chunk unnamed-chunk-4

print(xtable(stats2), type="html")
President_Party k_12_educator time_by_party Per_time_by_party
1 Democrat FALSE 5167.00 41.83
2 Republican FALSE 5727.00 46.36
3 Republican TRUE 1459.00 11.81