blogs <- readLines("en_US.blogs.txt", encoding = "UTF-8", skipNul = TRUE)
news <- readLines("en_US.news.txt", encoding = "UTF-8", skipNul = TRUE)
twitter <- readLines("en_US.twitter.txt", encoding = "UTF-8", skipNul = TRUE)
## File Lines Words Characters
## 1 Blogs 899288 37546250 206824505
## 2 News 1010242 34762395 203223159
## 3 Twitter 2360148 30093413 162096241
par(mfrow = c(1, 3))
hist(nchar(blogs), main = "Blogs Line Length", col = "lightblue", xlab = "Characters")
hist(nchar(news), main = "News Line Length", col = "lightgreen", xlab = "Characters")
hist(nchar(twitter), main = "Twitter Line Length", col = "lightpink", xlab = "Characters")
