blogs <- readLines("en_US.blogs.txt", encoding = "UTF-8", skipNul = TRUE)
news <- readLines("en_US.news.txt", encoding = "UTF-8", skipNul = TRUE)
twitter <- readLines("en_US.twitter.txt", encoding = "UTF-8", skipNul = TRUE)
##      File   Lines    Words Characters
## 1   Blogs  899288 37546250  206824505
## 2    News 1010242 34762395  203223159
## 3 Twitter 2360148 30093413  162096241
par(mfrow = c(1, 3))
hist(nchar(blogs), main = "Blogs Line Length", col = "lightblue", xlab = "Characters")
hist(nchar(news), main = "News Line Length", col = "lightgreen", xlab = "Characters")
hist(nchar(twitter), main = "Twitter Line Length", col = "lightpink", xlab = "Characters")