blogs <- readLines("en_US.blogs.txt")
news <- readLines("en_US.news.txt")
twitter <- readLines("en_US.twitter.txt", skipNul = TRUE)
print(head(blogs))
## [1] "In the years thereafter, most of the Oil fields and platforms were named after pagan “gods”."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                        
## [2] "We love you Mr. Brown."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              
## [3] "Chad has been awesome with the kids and holding down the fort while I work later than usual! The kids have been busy together playing Skylander on the XBox together, after Kyan cashed in his $$$ from his piggy bank. He wanted that game so bad and used his gift card from his birthday he has been saving and the money to get it (he never taps into that thing either, that is how we know he wanted it so bad). We made him count all of his money to make sure that he had enough! It was very cute to watch his reaction when he realized he did! He also does a very good job of letting Lola feel like she is playing too, by letting her switch out the characters! She loves it almost as much as him."
## [4] "so anyways, i am going to share some home decor inspiration that i have been storing in my folder on the puter. i have all these amazing images stored away ready to come to life when we get our home."                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                             
## [5] "With graduation season right around the corner, Nancy has whipped up a fun set to help you out with not only your graduation cards and gifts, but any occasion that brings on a change in one's life. I stamped the images in Memento Tuxedo Black and cut them out with circle Nestabilities. I embossed the kraft and red cardstock with TE's new Stars Impressions Plate, which is double sided and gives you 2 fantastic patterns. You can see how to use the Impressions Plates in this tutorial Taylor created. Just one pass through your die cut machine using the Embossing Pad Kit is all you need to do - super easy!"                                                                                    
## [6] "If you have an alternative argument, let's hear it! :)"
blogs <- gsub("\\.", "", blogs)
blogs <- gsub("\\,", "", blogs)
blogs <- gsub("\\'", "", blogs)
blogs <- gsub("\\@", "", blogs)
blogs <- gsub("\\?", "", blogs)
blogs <- gsub("\\!", "", blogs)
blogs <- gsub("\\:", "", blogs)
blogs <- gsub("\\;", "", blogs)
blogs <- gsub("\\)", "", blogs)
blogs <- gsub("\\(", "", blogs)
blogs <- gsub("\\$", "", blogs)
blogs <- gsub("\\-", "", blogs)
blogs <- gsub("\\#", "", blogs)
blogs <- tolower(blogs)
USblogs <- strsplit(blogs, " ")

news <- gsub("\\.", "", news)
news <- gsub("\\,", "", news)
news <- gsub("\\'", "", news)
news <- gsub("\\@", "", news)
news <- gsub("\\?", "", news)
news <- gsub("\\!", "", news)
news <- gsub("\\:", "", news)
news <- gsub("\\;", "", news)
news <- gsub("\\)", "", news)
news <- gsub("\\(", "", news)
news <- gsub("\\-", "", news)
news <- gsub("\\$", "", news)
news <- gsub("\\#", "", news)
news <- tolower(news)
USnews <- strsplit(news, " ")

twitter <- gsub("\\.", "", twitter)
twitter <- gsub("\\,", "", twitter)
twitter <- gsub("\\'", "", twitter)
twitter <- gsub("\\@", "", twitter)
twitter <- gsub("\\?", "", twitter)
twitter <- gsub("\\!", "", twitter)
twitter <- gsub("\\:", "", twitter)
twitter <- gsub("\\;", "", twitter)
twitter <- gsub("\\)", "", twitter)
twitter <- gsub("\\(", "", twitter)
twitter <- gsub("\\$", "", twitter)
twitter <- gsub("\\-", "", twitter)
twitter <- gsub("\\#", "", twitter)
twitter <- tolower(twitter)
UStwitter <- strsplit(twitter, " ")
print(head(UStwitter))
## [[1]]
##  [1] "how"     "are"     "you"     "btw"     "thanks"  "for"     "the"    
##  [8] "rt"      "you"     "gonna"   "be"      "in"      "dc"      "anytime"
## [15] "soon"    "love"    "to"      "see"     "you"     "been"    "way"    
## [22] "way"     "too"     "long"   
## 
## [[2]]
##  [1] "when"    "you"     "meet"    "someone" "special" "youll"   "know"   
##  [8] "your"    "heart"   "will"    "beat"    "more"    "rapidly" "and"    
## [15] "youll"   "smile"   "for"     "no"      "reason" 
## 
## [[3]]
## [1] "theyve"  "decided" "its"     "more"    "fun"     "if"      "i"      
## [8] "dont"   
## 
## [[4]]
##  [1] "so"      "tired"   "d"       "played"  "lazer"   "tag"     "&"      
##  [8] "ran"     "a"       "lot"     "d"       "ughh"    "going"   "to"     
## [15] "sleep"   "like"    "in"      "5"       "minutes"
## 
## [[5]]
##  [1] "words"    "from"     "a"        "complete" "stranger" "made"    
##  [7] "my"       "birthday" "even"     "better"  
## 
## [[6]]
##  [1] "first"    "cubs"     "game"     "ever"     "wrigley"  "field"   
##  [7] "is"       "gorgeous" "this"     "is"       "perfect"  "go"      
## [13] "cubs"     "go"
LUStwitter <- unlist(UStwitter)
LUSnews <- unlist(USnews)
LUSblogs <- unlist(USblogs)

a <- c(LUSblogs, LUSnews, LUStwitter)
print(length(a))
## [1] 101902277
tb <- table(a)
tb <- sort(tb, decreasing = TRUE)


print("Most Common")
## [1] "Most Common"
print(head(tb, 50))
## a
##     the      to     and       a      of      in       i     for      is 
## 4715745 2748637 2397666 2366710 2003693 1637295 1585368 1094776 1069257 
##    that     you      it      on    with     was      my      at      be 
## 1029769  917391  883542  813625  711784  622710  597806  566700  545439 
##    this    have     are      as     but              he     not      we 
##  529864  528273  488017  479008  470763  463792  419689  405024  390920 
##    from      so      me     all    they    will      by      or    said 
##  382676  375423  358473  325474  314026  313814  312871  308455  304414 
##    your    just     his      an   about     its     out      up     one 
##  300643  300440  300163  297221  294537  293603  290995  287553  284646 
##    what      if    like     has    when 
##  269501  268396  266934  259346  258872
barplot(head(tb, 10), col = "black")

Later, we will conduct statistical analysis and train models to eventually build the app.