library(rvest)
library(tm)
library(wordcloud)
htmlPage <- read_html("https://www.huffpost.com/entry/i-have-a-dream-speech-text_n_809993")
speech_text <- html_nodes(htmlPage,'blockquote')
speech_text <- html_text(speech_text)
vs <- VectorSource(speech_text)
corpus <- Corpus(vs)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, stripWhitespace)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removeWords, stopwords('english'))
tdm <- as.matrix(TermDocumentMatrix(corpus))
fre <- sort(rowSums(tdm), decreasing = T)
barplot(fre[1:20], las=2, col=rainbow(10))
wordcloud(corpus, min.freq = 1, max.words = 80, random.order = F, rot.per = 0.35, colors = brewer.pal(8, "Dark2"))
| Ozone | Solar.R | Wind | Temp | Month | Day |
|---|---|---|---|---|---|
| 41 | 190 | 7.4 | 67 | 5 | 1 |
| 36 | 118 | 8.0 | 72 | 5 | 2 |
| 12 | 149 | 12.6 | 74 | 5 | 3 |
| 18 | 313 | 11.5 | 62 | 5 | 4 |
| NA | NA | 14.3 | 56 | 5 | 5 |
| 28 | NA | 14.9 | 66 | 5 | 6 |
| 23 | 299 | 8.6 | 65 | 5 | 7 |
| 19 | 99 | 13.8 | 59 | 5 | 8 |
| 8 | 19 | 20.1 | 61 | 5 | 9 |
| NA | 194 | 8.6 | 69 | 5 | 10 |
| Sepal.Length | Sepal.Width | Petal.Length | Petal.Width | Species |
|---|---|---|---|---|
| 5.1 | 3.5 | 1.4 | 0.2 | setosa |
| 4.9 | 3.0 | 1.4 | 0.2 | setosa |
| 4.7 | 3.2 | 1.3 | 0.2 | setosa |
| 4.6 | 3.1 | 1.5 | 0.2 | setosa |
| 5.0 | 3.6 | 1.4 | 0.2 | setosa |
| 5.4 | 3.9 | 1.7 | 0.4 | setosa |
| 4.6 | 3.4 | 1.4 | 0.3 | setosa |
| 5.0 | 3.4 | 1.5 | 0.2 | setosa |
| 4.4 | 2.9 | 1.4 | 0.2 | setosa |
| 4.9 | 3.1 | 1.5 | 0.1 | setosa |
Dietterich (1997) Dietterich (2000) Sebastiani (2002) Blum and Langley (1997) Michie et al. (1994)
Blum, Avrim L, and Pat Langley. 1997. “Selection of Relevant Features and Examples in Machine Learning.” Artificial Intelligence 97 (1-2): 245–71.
Dietterich, Thomas G. 1997. “Machine-Learning Research.” AI Magazine 18 (4): 97–97.
———. 2000. “Ensemble Methods in Machine Learning.” In International Workshop on Multiple Classifier Systems, 1–15. Springer.
Michie, Donald, David J Spiegelhalter, CC Taylor, and others. 1994. “Machine Learning.” Neural and Statistical Classification 13 (1994): 1–298.
Sebastiani, Fabrizio. 2002. “Machine Learning in Automated Text Categorization.” ACM Computing Surveys (CSUR) 34 (1): 1–47.