For more info also see this post.
URL <- "http://en.wikipedia.org/wiki/United_States_Secretary_of_Education"
Table2 <- readHTMLTable(URL,
colClasses = rep("character", 2),
skip.rows=1,
which=2)
tab <- Table2[-4, -c(1,2,4,7 )]
tab[, 4] <- as.character(tab[, 4])
tab[2, 4] <- tab[1, 4]
tab[4, 4] <- "George H. Bush"
tab[3, 4] <- paste(tab[c(2, 4), 4], collapse="/")
tab[7, 4] <- tab[6, 4]
tab[] <- lapply(tab, as.character)
tab <- rbind(c("Shirley M. Hufstedler", "November 30, 1979",
"January 20, 1981", "Jimmy Carter"), tab)
tab$k_12_educator <- as.logical(c(0, 1, 0, 0, 0, 0, 0, 0, 0))
names(tab)[1:4] <- c("Secretary_of_Ed.", "Took_Office", "Left_Office", "President")
rownames(tab)<-NULL
tab[,3] <- as.Date(tab[,3], format = "%B %d, %Y")
tab[,2] <- as.Date(tab[,2], format = "%B %d, %Y")
tab[9, 3] <- Sys.Date()
tab$days <- as.numeric(difftime(strptime(tab[,3], format = "%Y-%m-%d"),
strptime(tab[, 2], format = "%Y-%m-%d")))
tab$President_Party <- c("Republican")
tab$President_Party[c(1, 6, 9)] <- "Democrat"
tab2 <- tab
tab2$days <- round(tab2$days, 0)
tab2[] <- lapply(tab2, as.character)
print(xtable(tab2), type="html")
| Secretary_of_Ed. | Took_Office | Left_Office | President | k_12_educator | days | President_Party | |
|---|---|---|---|---|---|---|---|
| 1 | Shirley M. Hufstedler | 1979-11-30 | 1981-01-20 | Jimmy Carter | FALSE | 417 | Democrat |
| 2 | Terrel H. Bell | 1981-01-22 | 1985-01-20 | Ronald Reagan | TRUE | 1459 | Republican |
| 3 | William J. Bennett | 1985-02-06 | 1988-09-20 | Ronald Reagan | FALSE | 1322 | Republican |
| 4 | Lauro F. Cavazos | 1988-09-20 | 1990-12-12 | Ronald Reagan/George H. Bush | FALSE | 813 | Republican |
| 5 | Lamar Alexander | 1991-03-22 | 1993-01-20 | George H. Bush | FALSE | 670 | Republican |
| 6 | Richard W. Riley | 1993-01-21 | 2001-01-20 | Bill Clinton | FALSE | 2921 | Democrat |
| 7 | Roderick R. Paige | 2001-01-20 | 2005-01-20 | George W. Bush | FALSE | 1461 | Republican |
| 8 | Margaret Spellings | 2005-01-20 | 2009-01-20 | George W. Bush | FALSE | 1461 | Republican |
| 9 | Arne Duncan | 2009-01-21 | 2014-01-24 | Barack Obama | FALSE | 1829 | Democrat |
tab2 <- tbl_df(tab)
time <- sum(tab2$days)
stats1 <- tab2 %.%
group_by(k_12_educator) %.%
summarise(time_by_educ_status = sum(days), Per_time_by_party = 100*time_by_educ_status/time)
stats2 <- tab2 %.%
group_by(President_Party, k_12_educator) %.%
summarise(time_by_party = sum(days), Per_time_by_party = 100*time_by_party/time)
ggplot(stats1, aes(x=factor(1), fill=k_12_educator)) +
geom_bar(aes(weight=Per_time_by_party), position="dodge") +
scale_fill_manual(values=c("grey25", "red"), name= "K - 12\nEducator") +
ylab("Percent of Time") + xlab("K-12 Educator") + theme_basic() +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 90))
print(xtable(stats1), type="html")
| k_12_educator | time_by_educ_status | Per_time_by_party | |
|---|---|---|---|
| 1 | FALSE | 10894.00 | 88.19 |
| 2 | TRUE | 1459.00 | 11.81 |
x <- data.frame("Democrat", TRUE, 0, 0)
names(x) <- names(stats2)
stats3 <- rbind(stats2, x)
ggplot(stats3, aes(x=k_12_educator, fill=k_12_educator)) +
geom_bar(aes(weight=Per_time_by_party), position="dodge", size=2) +
ylab("Percent of Time") + xlab("K-12 Educator") +
facet_grid(~President_Party) +
scale_fill_manual(values=c("grey25", "red"), name= "K - 12\nEducator") +
theme_basic() +
scale_y_continuous(expand = c(0, 0), limits = c(0, 50))
print(xtable(stats2), type="html")
| President_Party | k_12_educator | time_by_party | Per_time_by_party | |
|---|---|---|---|---|
| 1 | Democrat | FALSE | 5167.00 | 41.83 |
| 2 | Republican | FALSE | 5727.00 | 46.36 |
| 3 | Republican | TRUE | 1459.00 | 11.81 |