We can extract data from Facebook and later analyze it. Social media mining is one of the most interesting piece in data science.
library(Rfacebook)
## Warning: package 'Rfacebook' was built under R version 3.3.3
## Loading required package: httr
## Warning: package 'httr' was built under R version 3.3.3
## Loading required package: rjson
## Loading required package: httpuv
##
## Attaching package: 'Rfacebook'
## The following object is masked from 'package:methods':
##
## getGroup
# you need to use your own key, which can be obtain from facebook developer page
# App_ID = <- "xxxxxxxxxxxxxxxxxxx"
# App_Secret <- "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
#fb_oauth <- fbOAuth(app_id=My_App_ID, app_secret=My_App_Secret,extended_permissions = TRUE)
#save(fb_oauth, file="fb_oauth")
load("fb_oauth")
my_likes.df <- getLikes(user="me",token=fb_oauth)
sample(my_likes.df$names, 10)
## [1] "NASA Robotic Mining Competition"
## [2] "Drones"
## [3] "Google Brain"
## [4] "Machine Learning Weekly"
## [5] "Data Science Machine Learning AI"
## [6] "Artificial Intelligence and Machine Learning"
## [7] "DeepLearning"
## [8] "Sports on Facebook"
## [9] "Quantum World: Awaken Your Mind"
## [10] "Machine Learning Academy"
names.id <- subset(my_likes.df, select=c("names", "id"))
names.id
## names
## 1 Idealab
## 2 TechCrunch
## 3 Larry Bossidy
## 4 Michael Crichton
## 5 Wine Tasting In Paris
## 6 Wine tasting
## 7 XHIT
## 8 XHIT Daily
## 9 HASfit
## 10 The Fitness Café
## 11 FITNESS
## 12 HIPHOP DANCE
## 13 RobotShop
## 14 RobotDigg Equip Makers
## 15 Binary ROBOT
## 16 IEEE Robotics & Automation Society
## 17 NASA Robotic Mining Competition
## 18 Robotics
## 19 Robot MarketPlace
## 20 3D Robotics Inc.
## 21 roBOt Festival
## 22 Drones
## 23 Daz 3D
## 24 Bridge design & engineering
## 25 Jazz
## 26 WE Love Hip-Hop And RnB
## 27 MZHipHop
## 28 Modern dance
## 29 The World of Art, Classical Music and Philosophy
## 30 Classical Philosophy
## 31 Classic FM
## 32 Classical Music Humor
## 33 Classical Music
## 34 Classical (music)
## 35 Life at Google
## 36 Quantum World: Awaken Your Mind
## 37 Google Brain
## 38 Google Brain
## 39 Magic Leap
## 40 MIT MediaLab
## 41 MIT Media Lab
## 42 Collective Learning group at The MIT Media Lab
## 43 MIT Technology Review
## 44 Massachusetts Institute of Technology (MIT)
## 45 Institute for Advanced Study
## 46 The Science Scoop
## 47 The New York Times - Science
## 48 Science & Technology
## 49 Latest science and technology news
## 50 Deep learning
## 51 Udacity Machine Learning
## 52 Centre for Computational Statistics and Machine Learning
## 53 Artificial Intelligence and Machine Learning
## 54 Quant ML - Trading through Machine Learning
## 55 Machine Learning for Finance and Economics
## 56 DeepLearning
## 57 Computer Vision and Machine Learning- AI, AR, Robotics
## 58 Machine Learning with Python
## 59 Machine Learning, Raspberry Pi, Matlab, FPGA & VLSI Training
## 60 Data Science Machine Learning AI
## 61 Machine Learning and Text Analytics
## 62 Machine Learning For Beginners.
## 63 Big Data Analytics Apache Hadoop & Spark Machine Learning Pune Mumbai
## 64 Machine Learning and Computational Biology Lab
## 65 Center for Statistics and Machine Learning, Princeton University
## 66 NanoNets: Machine Learning API
## 67 Future of Machine Learning
## 68 Machine Learning Weekly
## 69 Machine Learning Laboratory
## 70 Deep Learning - Artificial Intelligence
## 71 Machine Learning Academy
## 72 Machine Learning Works
## 73 Machine Learning 101
## 74 Data Science, AI, Machine Learning & Big Data Analytics Community
## 75 Learning Machine
## 76 Machine Learning
## 77 The Learning Machine
## 78 MachineLearning
## 79 Machine Learning & Deep Learning Community
## 80 Machine Learning
## 81 I Love Machine Learning
## 82 Machine Learning Mastery
## 83 Machine learning
## 84 USA TODAY
## 85 China Times
## 86 ABC News
## 87 CBS News
## 88 Fox News
## 89 BBC News
## 90 CNN
## 91 Sports on Facebook
## 92 Tennis
## 93 Ritz Crackers
## 94 Yani Tseng
## 95 Yen-Hsun Rendy Lu
## id
## 1 263061880115
## 2 8062627951
## 3 110173555672749
## 4 214772491870366
## 5 571821909496510
## 6 105569919477264
## 7 485605814826807
## 8 1876516215937717
## 9 100449070050103
## 10 1734754566754138
## 11 13419577441
## 12 570533303104242
## 13 54731600766
## 14 602715833080162
## 15 343598849176130
## 16 158313264304982
## 17 79294804182
## 18 587600401261351
## 19 196408181981
## 20 112109168870108
## 21 109059962448362
## 22 683457958405316
## 23 72341345539
## 24 222627554421070
## 25 108166409205738
## 26 393863943975302
## 27 144346524016
## 28 107532252603348
## 29 274978509612136
## 30 114150845268795
## 31 6569469259
## 32 207019572653107
## 33 161059243906300
## 34 106224942746592
## 35 152260356535
## 36 294983143966345
## 37 1511998815761829
## 38 180042075526639
## 39 356157614473764
## 40 116168171726654
## 41 103772022994388
## 42 426925744087638
## 43 17043549797
## 44 126533127390327
## 45 347067291979857
## 46 172214092811872
## 47 105307012882667
## 48 103952640066535
## 49 622412621113892
## 50 190201717731858
## 51 1878084399080396
## 52 262087203880417
## 53 1642994685977845
## 54 1679321298972872
## 55 1640854809485834
## 56 894581557318208
## 57 586315654780340
## 58 259397054392010
## 59 1488228494542182
## 60 202980179723917
## 61 412256645563752
## 62 1845771139002927
## 63 382846871837859
## 64 361569250631783
## 65 1581073412215592
## 66 256410704776873
## 67 640766646041443
## 68 353808101468759
## 69 912345572197856
## 70 472081616289751
## 71 358318127900332
## 72 118335501896735
## 73 1797133990613322
## 74 1388055868128655
## 75 716181448412695
## 76 119762488098825
## 77 1435521286672700
## 78 1728577810728579
## 79 569123369961174
## 80 107675749255490
## 81 1687667801535470
## 82 1429846323896563
## 83 112440992104486
## 84 13652355666
## 85 109276079099293
## 86 86680728811
## 87 131459315949
## 88 15704546335
## 89 228735667216
## 90 5550296508
## 91 359129892456
## 92 105650876136555
## 93 205915571664
## 94 92299242927
## 95 57507133459
names.id[grep("ABC News", names.id$names), ]
## names id
## 86 ABC News 86680728811
ABC_News_FB <- getPage(86680728811,token=fb_oauth,n=10)
## 10 posts
for (i in c(1:2, 5)) {
cat(paste0("[", i, "] "))
writeLines(strwrap(ABC_News_FB$message[i], 60))
}
## [1] NA
## [2] President Trumps second travel ban proposal is still being
## largely blocked by the courts after the latest decision was
## handed down today.
## [5] Bodycam footage captures a firefighter being engulfed in
## flames after a propane tank exploded. The firefighter was
## treated for burns and released the next day. abcnews.com
An introductory article on text mining in R was published in R News (Feinerer, 2008). The main structure for managing documents in tm is a so-called Corpus, representing a collection of text documents. In machine learning and natural language processing, a topic model is a type of statistical model for discovering the abstract “topics” that occur in a collection of documents.
library(tm)
## Loading required package: NLP
##
## Attaching package: 'NLP'
## The following object is masked from 'package:httr':
##
## content
library(topicmodels)
## Warning: package 'topicmodels' was built under R version 3.3.3
myCorpus <- Corpus(VectorSource(ABC_News_FB$message))
myCorpus <- tm_map(myCorpus,content_transformer(tolower))
removeURL <- function(x) gsub("http[^[:space:]]*","",x)
myCorpus <- tm_map(myCorpus, content_transformer(removeURL))
removeNumPunct <- function(x) gsub("[^[:alpha:][:space:]]*","",x)
myCorpus <- tm_map(myCorpus, content_transformer(removeNumPunct))
myCorpus <- tm_map(myCorpus, stripWhitespace)
myCorpusCopy <- myCorpus
tdm <- TermDocumentMatrix(myCorpus,control=list(wordLength = c(1,Inf)))
dtm <- as.DocumentTermMatrix(tdm)
rowTotals <- apply(dtm , 1, sum) #Find the sum of words in each Document
dtm.new <- dtm[rowTotals> 0, ]
# find 7 topics
lda <- LDA(dtm.new,k=7)
# first 5 terms of every topic
term <- terms(lda,5)
term <- apply(term,MARGIN=2,paste,collapse=", ")
cat("The Topic examples are:")
## The Topic examples are:
term
## Topic 1
## "and, accusations, accuser, administration, against"
## Topic 2
## "car, she, escapes, florida, just"
## Topic 3
## "the, about, after, and, boycott"
## Topic 4
## "against, appeals, ban, circuit, court"
## Topic 5
## "against, announce, attorneys, constitution, donald"
## Topic 6
## "the, after, being, firefighter, was"
## Topic 7
## "violations, about, against, alleged, appear"
library(RColorBrewer)
library(wordcloud)
## Warning: package 'wordcloud' was built under R version 3.3.3
pal2 <- brewer.pal(8,"Dark2")
words <- my_likes.df$names
wordcloud(words,scale=c(4,.5),min.freq=1,max.words=Inf,colors=pal2)
There are many more functions you can use to extract data from Facebook, please see: https://cran.r-project.org/web/packages/Rfacebook/Rfacebook.pdf.
AI_Group_FB <- getGroup(1388055868128655,token=fb_oauth,n=10)
## 10 posts
for (i in c(1:2, 10)) {
cat(paste0("[", i, "] "))
writeLines(strwrap(AI_Group_FB$message[i], 60))
}
## [1] NA
## [2] Come and join our new community group for
## #ArtificialIntelligence #AI #MachineLearning #DataScience
## #London #startups
## [10] <U+202A>Please vote for Cognitionx http://pitchatpalace.com/vote/?
## #ArtificialIntelligence #machinelearning #startup #London
## #DataScience