ls -alsh ./final/en_US/en_US.blogs.txt
file.info("./final/en_US/en_US.blogs.txt")["size"]
## size
## ./final/en_US/en_US.blogs.txt 210160014
wc -l ./final/en_US/en_US.twitter.txt
con <- file("./final/en_US/en_US.twitter.txt", "r")
NROW(readLines(con))
## [1] 2360148
close(con)
awk ‘{ if (length($0) > max) {max = length($0); maxline = $0} } END { print maxline }’ en_US.twitter.txt
awk ‘{ if (length($0) > max) {max = length($0); maxline = $0} } END { print maxline }’ en_US.blogs.txt
awk ‘{ if (length($0) > max) {max = length($0); maxline = $0} } END { print maxline }’ en_US.news.txt
con <- file("./final/en_US/en_US.blogs.txt","r")
lineCounts <- nchar(readLines(con))
max_blog <- max(lineCounts)
close(con)
max_blog
## [1] 40835
con <- file("./final/en_US/en_US.twitter.txt","r")
lineCounts <- nchar(readLines(con))
max_twitter <- max(lineCounts)
close(con)
max_twitter
## [1] 213
con <- file("./final/en_US/en_US.news.txt","r")
lineCounts <- nchar(readLines(con))
max_news <- max(lineCounts)
close(con)
max_news
## [1] 5760
love_num= $(strings en_US.twitter.txt | grep love | wc -l)
hate_num= $(strings en_US.twitter.txt | grep hate | wc -l)
awk “BEGIN {print \(love_num+\)hate_num; exit}”
con <- file("./final/en_US/en_US.twitter.txt","r")
love_counts <- grepl(".love.", readLines(con), ignore.case = FALSE)
love_num <- sum(love_counts)
close(con)
con <- file("./final/en_US/en_US.twitter.txt","r")
hate_counts <- grepl(".hate.", readLines(con), ignore.case = FALSE)
hate_num <- sum(hate_counts)
close(con)
love_num / hate_num
## [1] 3.964942
strings en_US.twitter.txt | grep biostats
con <- file("./final/en_US/en_US.twitter.txt", "r")
text <- readLines(con)
close(con)
text[grepl(".biostats.", text, ignore.case = FALSE) == TRUE]
## [1] "i know how you feel.. i have biostats on tuesday and i have yet to study =/"
strings en_US.twitter.txt | grep “A computer once beat me at chess, but it was no match for me at kickboxing”
con <- file("./final/en_US/en_US.twitter.txt", "r")
text <- readLines(con)
close(con)
text[grepl("A computer once beat me at chess, but it was no match for me at kickboxing", text, ignore.case = FALSE) == TRUE]
## [1] "A computer once beat me at chess, but it was no match for me at kickboxing"
## [2] "A computer once beat me at chess, but it was no match for me at kickboxing"
## [3] "A computer once beat me at chess, but it was no match for me at kickboxing"