install.packages("rmarkdown") # Make cool dynamic documents
install.packages("knitr") # Run R Code Chunks
install.packages("ggplot2") # For plotting
install.packages("DT") # Interactive HTML tables
install.packages("tidyverse") # Tidy Universe
install.packages("RCurl")
install.packages("XML")
install.packages("twitteR")
install.packages("ROAuth")berkleySurveyHtml <- getURL("https://datascience.berkeley.edu/what-is-big-data/")
htmlParsed <- gsub("<script.*?/script>", "", berkleySurveyHtml)
htmlDoc <- xmlParse(htmlParsed, isHTML = TRUE, useInternalNodes =TRUE)## Tag nav invalid
## Tag article invalid
## Tag article invalid
## Tag article invalid
## Tag article invalid
## Tag nav invalid
tNodes <- getNodeSet(htmlDoc,"//a[starts-with(text(),'@')]")
counter <- 1
df <- data.frame(matrix(ncol = 2))
for(tNode in tNodes)
{
parent <- xmlParent(tNode)
name <- getSibling(parent, after=FALSE)
name <- getSibling(name, after=FALSE)
handle <- trimws(xmlValue(tNode))
handle <- str_extract_all(handle, "[a-zA-Z0-9@]+")
df[counter,2] <- handle
df[counter,1] <- xmlValue(name)
counter <<- counter +1
}
colnames(df) <- c( "Name", "Handle")
datatable(df)##
## Attaching package: 'twitteR'
## The following objects are masked from 'package:dplyr':
##
## id, location
## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:twitteR':
##
## id
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## [1] "Using direct authentication"
## Warning in sample.int(length(x), size, replace, prob): '.Random.seed' is
## not an integer vector but of type 'NULL', so ignored
validDF <- df %>% filter(Handle != "@DelMonteCIO")
UsersDF <- ldply(validDF$Handle, function(handle) {
#print(handle)
user <- getUser(user = handle)
as.data.frame(user)
})
imageFn <- function(src) {
if(is.na(src)) {
return('')
} else {
p <- paste(c(""), collapse='')
return(p)
}
}
UsersDF$profile <- lapply(UsersDF$profileImageUrl, imageFn)
UsersDF1 <- UsersDF %>% select(name, created, followersCount, screenName, location, lang, listedCount, profile, description)
colnames(UsersDF1) <- c("Name", "Since", "NumberOfFollowers", "ScreenId", "Location", "Language", "ListedCount", "ProfileImage", "Desc")
kable(UsersDF1)| Name | Since | NumberOfFollowers | ScreenId | Location | Language | ListedCount | ProfileImage | Desc |
|---|---|---|---|---|---|---|---|---|
| Philip Ashlock | 1154578214 | 3737 | philipashlock | 40.69, -73.96 | en | 395 | Digital civic infrastructure & open gov’t. Architect @usdatagov. Co-founder @Open311, @MyUSA, @cfaCommons, @CivicAgency. Views c/o objective reality ;) | |
| Jon Bruner | 1228959564 | 5566 | JonBruner | San Francisco, CA | en | 530 | AI and bots at @OreillyMedia. Pulls out all the stops. Expert at getting sidetracked. https://t.co/WdJUV0eEGq | |
| Michael Cavaretta | 1241791201 | 12919 | mjcavaretta | Michigan | en | 1479 | Director, Analytics Infrastructure, Ford Motor Co. Opinions are mine. Interests: #BigData, #Datascience, #machinelearning, #dataviz, #IoT, #mdm, #analytics | |
| Drew Conway | 1230595425 | 19065 | drewconway | Brooklyn, NY | en | 1247 | Data nerd, hacker, student of conflict. Founder and CEO of @alluvium | |
| Rohan Deuskar | 1193432007 | 830 | RohanD | New York City | en | 72 | CEO/cofounder of Stylitics. Creator of AI & data-driven outfitting engine for fashion & retail. Experimenting ethically on chatbots. NYC Fashion Fellow 2014. | |
| John Foreman | 1322064496 | 13872 | John4man | Atlanta | en | 910 | VP Product Management @MailChimp. Author of Data Smart book (https://t.co/7JVRug8fZQ). | |
| Data Science Central | 1205904389 | 157895 | analyticbridge | Seattle, WA | en | 5914 | Co-founded by Vincent Granville and part of the DSC community, our focus is on data science, ML, AI, deep learning, dataviz, Hadoop, IoT, and BI. | |
| Annette Greiner | 1274564699 | 93 | annettegreiner | Berkeley, California | en | 0 | Designs and builds web science gateways at NERSC, Lawrence Berkeley Natl Lab, represents the lab to the W3C, and chairs W3C HPCweb community group. | |
| Seth Grimes | 1224852523 | 12118 | SethGrimes | Takoma Park, Maryland, USA | en | 1263 | Help organizations find value in enterprise, online & social data, as a consultant & industry analyst. Run https://t.co/5QkELYXcko & https://t.co/LRB5I9IOKD. | |
| Joel Gurin | 1292342557 | 2456 | JoelGurin | Washington, DC | en | 251 | President and Founder of Center for Open Data Enterprise, author of Open Data Now. Working to put #OpenData to the best possible use. | |
| Quentin Hardy | 1235931183 | 31330 | qhardy | San Francisco | en | 1681 | Former: Deputy Tech Editor, The New York Times. Now: Head of Editorial, Google Cloud. The pizza thing holds. | |
| Harlan Harris | 1232380398 | 3649 | HarlanH | Brooklyn, NY | en | 391 | Director of Data Science at @WeWork in NYC, co-founder emeritus of @DataScienceDC Meetup and @DataCommunityDC. | |
| Jessica Kirkpatrick | 1247112614 | 3678 | berkeleyjess | San Francisco, CA | en | 267 | Dr. Jessica Ann Kirkpatrick, Astrophysicist Data Scientist @Hired_HQ (formerly @Yammer & @InstaEDU). Also @AAS_Women @data4america @techiesproject | |
| David Leonhardt | 1279496673 | 95529 | DLeonhardt | Washington, DC | en | 3211 | Op-Ed columnist, The New York Times | |
| Hilary Mason | 1171228944 | 88916 | hmason | NYC | en | 4831 | Founder at @FastForwardLabs. Data Scientist in Residence at @accel. I data and cheeseburgers. | |
| Sharmila Mulligan | 1278044869 | 484 | ShahaniMulligan | en | 30 | CEO & Founder of ClearStory Data | ||
| Sean Patrick Murphy | 1247887201 | 798 | sayhitosean | NYC | en | 58 | Entrepreneur, data scientist, educator, and dancer. Attending HackerSchool Winter 2014 in NYC | |
| Chris Neumann | 1233508452 | 1232 | ckneumann | San Francisco, CA | en | 130 | EIR at @500Startups. Founder of @DataHero. Employee #1 at @AsterData. I empower people by getting rid of the fear of data. |
|
| Cathy O’Neil | 1314745328 | 14244 | mathbabedotorg | New York | en | 698 | Mathematician, data nerd, blogger, writer, loud mouth | |
| KDnuggets | 1233855446 | 79826 | kdnuggets | Brookline, MA, USA | en | 5138 | Covering #Analytics, #BigData, #DataMining, #DataScience #MachineLearning, #DeepLearning. Founded by Gregory Piatetsky-Shapiro. | |
| DataKind | 1326053066 | 22737 | DataKind | New York, NY | en | 1465 | Harnessing the power of data science in the service of humanity. We have Chapters in Bangalore, Dublin, San Francisco, Singapore, the UK and Washington DC. | |
| Jake Porway | 1236016849 | 12353 | jakeporway | Brooklyn / Oakland | en | 857 | Believer in tech+data for beautiful purposes || Director @DataKind || TV nerd @NatGeoChannel || Fiercely optimistic | |
| Kyle Rush | 1213668491 | 4046 | kylerush | New York, NY | en | 279 | VP of Engineering for @Casper. Formerly @HillaryClinton, @BarackObama, @Optimizely, @NewYorker. | |
| Anno Saxenian | 1237058109 | 1022 | annosax | SF Bay Area | en | 63 | UC Berkeley School of Information | |
| Josh Schwartz | 1339706854 | 1077 | joshuadschwartz | New York, NY | en | 86 | Chief of Eng and Data Science @chartbeat. Sometimes speaker. Rare tweeter. I spend my time thinking about media, machine learning, and pretentious coffee. | |
| Peter Skomoroch | 1207767955 | 36822 | peteskomoroch | San Francisco | en | 2378 | Co-Founder & CEO @Skipflag. Machine learning, AI, social computing. Former Principal Data Scientist @LinkedIn, Engineering @AOL, @MITLL, https://t.co/dMc4qAeQfK | |
| Anna Smith | 1281990730 | 1123 | OMGannaks | en | 97 | star trek: learn from the future. analytics engineer @renttherunway, former data scientist @bitly | ||
| Ryan Swanstrom | 1385868879 | 20 | swgoof | South Dakota, USA | en | 3 | currently tweets @ryanswanstrom | |
| Shashi Upadhyay | 1238901745 | 515 | shashiSF | en | 41 | Founder & CEO of Lattice Engines. Learning about learning | ||
| Mark van Rijmenam | 1265806877 | 13393 | VanRijmenam | Sydney, New South Wales | en | 2608 | Founder @Datafloq | Author: Think Bigger - https://t.co/0J8bfgORE0 | Speaker | Global Top 10 #BigData Influencer | Blockchain | AI | Investor | PhD Candidate | |
| Hal Varian | 1275922785 | 2739 | halvarian | California | en | 110 | Chief Economist at Google. | |
| John Myles White | 1215706219 | 18764 | johnmyleswhite | San Francisco, CA | en | 1035 | Research scientist at Facebook working on statistical computing. Julia developer. Tweets reflect my views only. | |
| Brian Wilt | 1246899421 | 1058 | brianwilt | San Francisco, CA | en | 64 | Data @ Facebook. Former head of data @Jawbone. @Stanford and @MIT physics. | |
| Raymond Yee | 1173633570 | 1967 | rdhyee | Berkeley, CA, USA | en | 186 | Liberates books at @unglueit. Thinks about APIs, data, and Python. Taught data and APIs at @BerkeleyISchool. Bach fanatic. Soli Deo Gloria. |
#tweets <- userTimeline(user = "@JonBruner ", n = 2, includeRts = FALSE, retryOnRateLimit = 2)
#tweets