Read File

getwd()
## [1] "C:/Users/Administrator/Desktop"
#setwd("C:/Users/Administrator")

download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/Data/purchase.csv', destfile='purchase.csv')

purchase <- read.csv('purchase.csv')
class(purchase)
## [1] "data.frame"
library(readr)
purchase <- read_csv("C:/Users/Administrator/Desktop/purchase.csv", 
    col_types = cols(Time = col_date(format = "%Y-%m-%d %H:%M:%S")))
## Warning: Missing column names filled in: 'X1' [1]
View(purchase)

Write File

data(iris)
View(iris)
write.csv(x = iris, file= 'iris.csv')
write.table(x = iris, file= 'iris.tsv', sep = '\t')

Write RData

save(x = iris, file = 'iris.RData')
rm(iris)
load('iris.RData')


data("anscombe")
fit <- lm(y1 ~ x1, data = anscombe)
fit
## 
## Call:
## lm(formula = y1 ~ x1, data = anscombe)
## 
## Coefficients:
## (Intercept)           x1  
##      3.0001       0.5001
save(x = fit, file = 'lm.RData')
load('lm.RData')
predict(fit, data.frame(x1= 5))
##        1 
## 5.500545

Read Excel

# 'https://raw.githubusercontent.com/ywchiu/rtibame/master/Data/FinancialReport.xlsx'

library(readxl)
FinancialReport <- read_excel("C:/Users/Administrator/Desktop/FinancialReport.xlsx")
View(FinancialReport)

summary(FinancialReport)
##       年度           股本         財報評分          收盤       
##  Min.   :1999   Min.   : 767   Min.   :59.00   Min.   : 42.60  
##  1st Qu.:2003   1st Qu.:2027   1st Qu.:89.00   1st Qu.: 62.50  
##  Median :2007   Median :2583   Median :92.00   Median : 71.00  
##  Mean   :2007   Mean   :2249   Mean   :88.24   Mean   : 83.75  
##  3rd Qu.:2011   3rd Qu.:2592   3rd Qu.:94.00   3rd Qu.: 97.00  
##  Max.   :2015   Max.   :2643   Max.   :96.00   Max.   :167.00  
##       平均             漲跌            漲跌__1          營業收入   
##  Min.   : 52.40   Min.   :-88.500   Min.   :-53.00   Min.   : 731  
##  1st Qu.: 56.40   1st Qu.: -5.500   1st Qu.: -8.10   1st Qu.:2030  
##  Median : 67.40   Median :  6.500   Median :  8.80   Median :3174  
##  Mean   : 82.29   Mean   :  4.235   Mean   : 11.77   Mean   :3576  
##  3rd Qu.:104.00   3rd Qu.: 20.100   3rd Qu.: 28.00   3rd Qu.:4271  
##  Max.   :147.00   Max.   : 96.000   Max.   :135.00   Max.   :8435  
##     營業毛利       營業利益       業外損益         稅後淨利   
##  Min.   : 315   Min.   : 128   Min.   :-43.70   Min.   : 145  
##  1st Qu.: 765   1st Qu.: 613   1st Qu.:  4.97   1st Qu.: 651  
##  Median :1417   Median :1044   Median : 35.00   Median : 999  
##  Mean   :1639   Mean   :1238   Mean   : 50.67   Mean   :1179  
##  3rd Qu.:2071   3rd Qu.:1592   3rd Qu.: 62.10   3rd Qu.:1616  
##  Max.   :4104   Max.   :3200   Max.   :304.00   Max.   :3066  
##       ROA             EPS        
##  Min.   : 3.93   Min.   : 0.830  
##  1st Qu.:15.50   1st Qu.: 3.450  
##  Median :18.40   Median : 4.140  
##  Mean   :17.15   Mean   : 4.969  
##  3rd Qu.:19.40   3rd Qu.: 6.240  
##  Max.   :24.70   Max.   :11.820

Read JSON File

download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/Data/fb.json', destfile='fb.json')

#install.packages('jsonlite')
library(jsonlite)

json_data <- fromJSON('fb.json')
json_data
## $data
##        from.name from.id
## 1      Tom Brady     X12
## 2 Peyton Manning     X18
##                                                                                           actions
## 1 http://www.facebook.com/X999/posts/Y999, http://www.facebook.com/X999/posts/Y999, Comment, Like
## 2 http://www.facebook.com/X998/posts/Y998, http://www.facebook.com/X998/posts/Y998, Comment, Like
##               updated_time             created_time
## 1 2010-08-02T21:27:44+0000 2010-08-02T21:27:44+0000
## 2 2010-08-02T21:27:44+0000 2010-08-02T21:27:44+0000
##                    message   type        id
## 1 Looking forward to 2010! status X999_Y999
## 2     Where's my contract? status X998_Y998

Read XML

#install.packages('XML')
library(XML)

url<-'http://download.post.gov.tw/post/download/Xml_10510.xml'
zipcode<-xmlToDataFrame(url)

使用Rvest

#install.packages('rvest')
library(rvest)
## Loading required package: xml2
## 
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
## 
##     guess_encoding
newsurl <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
apple <- read_html(newsurl, encoding = 'utf-8')
apple
## {xml_document}
## <html lang="zh-TW">
## [1] <head>\n<meta http-equiv="Content-Type" content="text/html; charset= ...
## [2] <body id="article" class="all">\r\n    <div class="wrapper">\r\n     ...
#as.character(apple)

使用httr

library(httr)
newsurl <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
GET(newsurl)
## Response [http://www.appledaily.com.tw/realtimenews/section/new/]
##   Date: 2017-09-17 09:31
##   Status: 200
##   Content-Type: text/html; charset=utf-8
##   Size: 70.7 kB
## <!DOCTYPE html>
## <!--[if lt IE 7 ]> <html lang="zh-TW" class="ie6 ielt8"> <![endif]-->
## <!--[if IE 7 ]>    <html lang="zh-TW" class="ie7 ielt8"> <![endif]-->
## <!--[if IE 8 ]>    <html lang="zh-TW" class="ie8"> <![endif]-->
## <!--[if (gte IE 9)|!(IE)]><!--> <html lang="zh-TW"> <!--<![endif]-->
## <head>
##   <meta charset="utf-8" />
##   <title>蘋果即時新聞|蘋果日報|Apple Daily</title>
##   <meta name="description" content="蘋果日報網站提供即時、快速、豐富的最新時事動態,包含國際、社會、娛樂、政...
##   <meta name="keywords" content="蘋果日報,Apple Daily,台灣,壹傳媒,Apple, Animatio...
## ...

GET PCHome Product

prod = 'http://ecapi.pchome.com.tw/ecshop/prodapi/v2/prod/DMAX20-A9008C1PB-000&fields=Seq,Id,Name,Nick,Store,PreOrdDate,SpeOrdDate,Price,Discount,Pic,Weight,ISBN,Qty,Bonus,isBig,isSpec,isCombine,isDiy,isRecyclable,isCarrier,isMedical,isBigCart,isSnapUp,isDescAndIntroSync,isFoodContents,isHuge,isEnergySubsidy&_callback=jsonp_prod&1505621520?_callback=jsonp_prod'
prod_page <- read_html(prod)
as.character(prod_page)
## [1] "<!DOCTYPE html PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">\n<html><body><p>try{jsonp_prod({\"DMAX20-A9008C1PB-000\":{\"Seq\":18507941,\"Id\":\"DMAX20-A9008C1PB-000\",\"Name\":\"\\u82f1\\u570b Gtech Multi \\u9ad8\\u6548\\u80fd\\u7121\\u7dda\\u624b\\u6301\\u5f0f\\u5438\\u5875\\u5668 (\\u9650\\u91cf\\u798f\\u5229\\u54c1)\",\"Nick\":\"\\u82f1\\u570b Gtech Multi \\u9ad8\\u6548\\u80fd\\u5438\\u529b\\u4e0d\\u8870\\u5f31 \\u7121\\u7dda\\u624b\\u6301\\u5145\\u96fb\\u5f0f\\u5438\\u5875\\u5668-\\u9650\\u91cf\\u798f\\u5229\\u54c1\",\"Store\":\"DMAX20\",\"PreOrdDate\":\"\",\"SpeOrdDate\":\"\",\"Price\":{\"M\":14880,\"P\":5980},\"Discount\":0,\"Pic\":{\"B\":\"\\/items\\/DMAX20A9008C1PB\\/000001_1502424919.jpg\",\"S\":\"\\/items\\/DMAX20A9008C1PB\\/000002_1502424919.jpg\"},\"Weight\":3,\"ISBN\":\"\",\"Qty\":20,\"Bonus\":0,\"isBig\":0,\"isSpec\":0,\"isCombine\":0,\"isDiy\":0,\"isRecyclable\":0,\"isCarrier\":0,\"isMedical\":0,\"isBigCart\":1,\"isSnapUp\":0,\"isDescAndIntroSync\":0,\"isFoodContents\":0,\"isHuge\":0,\"isEnergySubsidy\":0}});}catch(e){if(window.console){console.log(e);}}</p></body></html>\n"

Use POST

library(httr)
url <- 'https://www.thsrc.com.tw/tw/TimeTable/SearchResult'
payload <- list(
StartStation='977abb69-413a-4ccf-a109-0272c24fd490',
EndStation='a7a04c89-900b-4798-95a3-c01c455622f4',
SearchDate='2017/09/18',
SearchTime='08:00',
SearchWay='DepartureInMandarin'
)

res<-POST(url, body=payload, encode="form", config = set_config(config(ssl_verifypeer = 0L)))
res
## Response [https://www.thsrc.com.tw/tw/TimeTable/SearchResult]
##   Date: 2017-09-17 09:31
##   Status: 200
##   Content-Type: text/html; charset=utf-8
##   Size: 98.4 kB
## 
## <!DOCTYPE html>
## <html lang="zh-tw">
## 
## 
## <link rel="canonical" href="https://www.thsrc.com.tw"/>
## 
## <meta charset="utf-8" />
## <link rel="shortcut icon" href="/Content/favicon/favicon.ico" />
## <link rel="apple-touch-icon" href="/Content/favicon/57_57.png" sizes="57...
## ...

台鐵

url = 'http://twtraffic.tra.gov.tw/twrail/EasySearch.aspx'
res <- read_html(url)

Magrittr

sum(tail(head(iris), 3)$Sepal.Length)
## [1] 15
a <- tail(head(iris), 3)
a$Sepal.Length
## [1] 4.6 5.0 5.4
library(magrittr)
iris %>% head() %>% tail(3) %>% .$Sepal.Length %>% sum()
## [1] 15

DOM Tree

html_sample <- '<html>
<body>
  <h1 id="title">Hello World</h1>
  <a href="#" class="link">This is link1</a>
  <a href="# link2" class="link">This is link2</a>
</body>
</html>'

# h1
read_html(html_sample) %>% html_nodes('h1') %>% html_text()
## [1] "Hello World"
# body > h1
read_html(html_sample) %>% html_nodes('body h1') %>% html_text()
## [1] "Hello World"
# a
read_html(html_sample) %>% html_nodes('a') %>% html_text()
## [1] "This is link1" "This is link2"
# id => #
read_html(html_sample) %>% html_nodes('#title') %>% html_text()
## [1] "Hello World"
# class => .
read_html(html_sample) %>% html_nodes('.link') %>% html_text()
## [1] "This is link1" "This is link2"
# a.link
read_html(html_sample) %>% html_nodes('a.link') %>% html_text()
## [1] "This is link1" "This is link2"
# get href
read_html(html_sample) %>% html_nodes('a') %>% html_attr('href')
## [1] "#"       "# link2"

Get AppleDaily List

library(rvest)
url <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
read_html(url) %>% html_nodes('.rtddt a') %>% .[1] %>% as.character()
## [1] "<a href=\"/realtimenews/article/life/20170917/1203646/%E9%80%99%E6%AC%BE%E5%85%AC%E5%85%B1%E5%B7%A5%E7%A8%8B%EF%BC%81%E3%80%80%E6%B0%B4%E7%AE%A1%E9%96%8B%E9%97%9C%E8%A3%9D%E5%9C%A8%E4%BA%BA%E8%A1%8C%E9%81%93%E4%B8%8A\" target=\"_blank\">\r\n                                        <time>17:29</time><h2>生活</h2>\r\n                                        <h1><font color=\"#383c40\">這款公共工程! 水管開關裝在人行道上(0)</font></h1>\r\n                                    </a>"
read_html(url) %>% html_nodes('.rtddt a') %>% html_nodes('h1') %>% html_text()
##  [1] "這款公共工程! 水管開關裝在人行道上(0)"           
##  [2] "古董縫紉機超迷你 百年歷史仍可縫衣(0)"             
##  [3] "醫師假開刀還隔空門診、查房 長庚認了(0)"           
##  [4] "北市府環境教育營 24日訪公館台大農場、...(3)"      
##  [5] "響應世界清潔日 新北漁村舉辦微笑海灣活動(2)"       
##  [6] "【蘋果LIVE】洪宸宇單局3K 1上桃猿...(3558)"        
##  [7] "因應美國要求 科威特傳將驅逐北韓大使(8)"           
##  [8] "【不想靠爸片】老爸是「股神」 他卻選擇做...(60558)"
##  [9] "竹北鳳岡路工廠火警 廠內濃煙幸無人傷亡(87)"        
## [10] "維修冷氣不慎踩空 屋主4樓摔落鐵皮屋頂(234)"        
## [11] "洗牌太大聲引注意 警破家庭麻將賭場   (319)"        
## [12] "擺脫借錢度日 他靠菜市場婆媽來牽成(182)"           
## [13] "男逼少女吃30條死金魚 最高法院判10年(402)"         
## [14] "王祖賢24年前舊照出土 「打趴一群整形鬼...(1690)"   
## [15] "澎湖警小隊長槓海管處 內政部改口肯定(699)"         
## [16] " 愛國女星被打槍!聽聞「線上演員」定義驚...(1958)"  
## [17] "【真的變漂亮】許純美增胖20公斤模樣曝光...(194606)" 
## [18] "<U+200B>「要好好面對自己」 女醫告男友10年前...(2274)"
## [19] "范冰冰鴿子蛋出自De Beers 求婚鑽...(363)"          
## [20] "華碩ZF V與Verizon合作 首度推...(173)"             
## [21] "三星挑戰台積電龍頭地位 宣示5年後要當晶...(414)"   
## [22] "富邦主場轉戰天母 嘟爸潘威倫對上壽星陳明...(367)"  
## [23] "巴西新援絕殺 巴塞隆納4連勝領跑西甲(177)"          
## [24] "小姊弟被反鎖屋內 2分鐘內接連墜樓(3171)"           
## [25] "<U+200B>熱心警記憶佳 助迷途婦返家  (124)"         
## [26] "台中港漫沙塵暴 環保局找到禍首重罰(7085)"          
## [27] "灶君爺誕辰信徒齊來賀壽 五指山灶君堂免費...(234)"  
## [28] "貨輪硝酸外洩 台中港急處置(700)"                   
## [29] "【壹週刊】妙禪痴迷勞斯萊斯藏5祕密!Se..."          
## [30] "兌現媽媽生前承諾 7旬婦攜母遺照環島(280)"
category <- read_html(url) %>% html_nodes('.rtddt a') %>% html_nodes('h2') %>% html_text()

read_html(url) %>% html_nodes('.rtddt a') %>% html_nodes('time') %>% html_text()
##  [1] "17:29" "17:26" "17:25" "17:23" "17:23" "17:20" "17:19" "17:16"
##  [9] "17:14" "17:13" "17:12" "17:11" "17:11" "17:08" "17:05" "17:05"
## [17] "17:04" "17:03" "17:02" "17:00" "17:00" "16:57" "16:55" "16:54"
## [25] "16:54" "16:53" "16:53" "16:52" "16:45" "16:45"
rtddt <- read_html(url) %>% html_nodes('.rtddt a')


title <- rtddt %>% 
  html_nodes('h1') %>% 
  html_text()

category <-rtddt %>% 
  html_nodes('h2') %>% 
  html_text()

time <- rtddt %>% 
  html_nodes('time') %>% 
  html_text()

newsdf <- data.frame(title = title, category = category, dt= time)
head(newsdf)
##                                          title category    dt
## 1      這款公共工程! 水管開關裝在人行道上(0)     生活 17:29
## 2        古董縫紉機超迷你 百年歷史仍可縫衣(0)     生活 17:26
## 3      醫師假開刀還隔空門診、查房 長庚認了(0)     生活 17:25
## 4 北市府環境教育營 24日訪公館台大農場、...(3)     生活 17:23
## 5  響應世界清潔日 新北漁村舉辦微笑海灣活動(2)     生活 17:23
## 6   【蘋果LIVE】洪宸宇單局3K 1上桃猿...(3558)     體育 17:20
article_url <- '/realtimenews/article/local/20170917/1205418/旅客逾時未集合 旅行團報警找人'

domain <- 'http://www.appledaily.com.tw'
paste0(domain, article_url)
## [1] "http://www.appledaily.com.tw/realtimenews/article/local/20170917/1205418/旅客逾時未集合 旅行團報警找人"
url <- rtddt %>% html_attr('href') %>% paste0(domain, .)

applenews <- data.frame(title = title, category = category, time= time, url = url)

head(applenews)
##                                          title category  time
## 1      這款公共工程! 水管開關裝在人行道上(0)     生活 17:29
## 2        古董縫紉機超迷你 百年歷史仍可縫衣(0)     生活 17:26
## 3      醫師假開刀還隔空門診、查房 長庚認了(0)     生活 17:25
## 4 北市府環境教育營 24日訪公館台大農場、...(3)     生活 17:23
## 5  響應世界清潔日 新北漁村舉辦微笑海灣活動(2)     生活 17:23
## 6   【蘋果LIVE】洪宸宇單局3K 1上桃猿...(3558)     體育 17:20
##                                                                                                                        url
## 1             http://www.appledaily.com.tw/realtimenews/article/life/20170917/1203646/這款公共工程! 水管開關裝在人行道上
## 2               http://www.appledaily.com.tw/realtimenews/article/life/20170917/1205622/古董縫紉機超迷你 百年歷史仍可縫衣
## 3             http://www.appledaily.com.tw/realtimenews/article/life/20170917/1205634/醫師假開刀還隔空門診、查房 長庚認了
## 4 http://www.appledaily.com.tw/realtimenews/article/life/20170917/1205629/北市府環境教育營 24日訪公館台大農場、松山奉天宮
## 5         http://www.appledaily.com.tw/realtimenews/article/life/20170917/1205631/響應世界清潔日 新北漁村舉辦微笑海灣活動
## 6       http://www.appledaily.com.tw/realtimenews/article/sports/20170917/1205525/【蘋果LIVE】洪宸宇單局3K 1上桃猿0:0中信
#BIG5
write.csv(x = applenews, file = 'applenews.csv')

# UTF-8 WITH BOM
write_excel_csv(x = applenews, path = 'applnews_with_bom.csv')

UDN NEWS

library(rvest)
page <- read_html('https://udn.com/news/breaknews/1') 

title <- page %>% html_nodes('h2') %>% html_text()
dt    <- page %>% html_nodes('.dt') %>% html_text()
view  <- page %>% html_nodes('.view') %>% html_text()
cate  <- page %>% html_nodes('.cate') %>% html_text()
udn <- data.frame(dt, title, view, cate)
head(udn)

Mobile01

url <- 'https://www.mobile01.com/newtopics.php'
page <- read_html(url)
subject <- page %>% html_nodes('.subject') %>% html_text()
reply <- page %>% html_nodes('.reply') %>% html_text()
authur <- page %>% html_nodes('.authur') %>% html_text()

mobile01 <- data.frame(title = subject[-1], reply = reply[-1], author = authur[-1])
head(mobile01)
##                                                    title reply
## 1            SUZUKI GSR 125一週年開箱文SUZUKI(250cc以下)     0
## 2                         apple官網的分期扣款方式?iPhone     0
## 3 50歲以前退休之資產配置(現狀與規劃),請指教投資與理財     2
## 4             詢問桃園中壢附近貼燈膜的店VESPA(250cc以下)     0
## 5       Note 8要如何使用Apple Music呢?SAMSUNG (Android)     0
## 6                               lg跟三星的比較高畫質視界     0
##                            author
## 1     2017-09-17 17:17RV150遊車河
## 2         2017-09-17 17:13kienrx7
## 3         2017-09-17 17:09xpotter
## 4 2017-09-17 17:08kk1051362268888
## 5            2017-09-17 17:06熊爺
## 6           2017-09-17 17:06拿茲~

Google NEWS

google_new <- read_html('https://news.google.com/news/?ned=tw&hl=zh-TW') %>% html_nodes('.kWyHVd .ME7ew') %>% html_attr('href') 

CNN

url <- 'http://edition.cnn.com/'
cnn_text <- read_html(url) %>% as.character()
cnn1 <- strsplit(cnn_text, '"articleList":')[[1]][2]
cnn2 <- strsplit(cnn1, '}                     , registryURL')[[1]][1]
#cnn2
library(jsonlite)
jd <- fromJSON(cnn2)
head(jd)
##                                                                uri
## 1   /2017/09/17/americas/atlantic-storms-jose-lee-maria/index.html
## 2     /2017/09/15/us/hurricane-jose-forecast-east-coast/index.html
## 3         /2017/09/15/health/florida-nursing-home-video/index.html
## 4 /2017/09/07/world/iyw-help-for-hurricane-irma-victims/index.html
## 5         /2017/09/16/politics/trump-paris-climate-deal/index.html
## 6         /2017/09/15/vr/an-ordinary-day-in-north-korea/index.html
##                                                                                                                    headline
## 1 <strong>Hurricane warnings issued in Caribbean<strong></strong>; devastated islands could be in new storm's path</strong>
## 2                                                  <strong>Jose heads north</strong>: Hurricane now threatens US east coast
## 3          <strong>Aftermath: </strong>Video shows woman sat naked as residents suffered in sweltering Florida nursing home
## 4                                                    <strong>Calls for aid:</strong> Hurricane Irma victims need your help 
## 5                                                         <strong>Paris accord: </strong>WH says US still plans to withdraw
## 6                    <strong>See in 360°<strong></strong>:<strong></strong> CNN's journey through the secret state</strong>
##                                                                                                  thumbnail
## 1 //i2.cdn.cnn.com/cnnnext/dam/assets/170916230307-tropical-storm-maria-satellite-091617-1700-small-11.jpg
## 2        //i2.cdn.cnn.com/cnnnext/dam/assets/170915110224-jose-hurricane-trop-storm-track-915-small-11.jpg
## 3                    //i2.cdn.cnn.com/cnnnext/dam/assets/170914001901-01-florida-nursing-home-small-11.jpg
## 4                     //i2.cdn.cnn.com/cnnnext/dam/assets/170911105649-01-irma-st-martin-0910-small-11.jpg
## 5         //i2.cdn.cnn.com/cnnnext/dam/assets/170602100218-01-donald-trump-paris-presser-0601-small-11.jpg
## 6        //i2.cdn.cnn.com/cnnnext/dam/assets/170915133308-north-korea-ripley-2-360-vr-cropped-small-11.jpg
##   duration
## 1         
## 2         
## 3         
## 4         
## 5         
## 6         
##                                                                                                                                                                                                                                                               description
## 1                                                                                              Three storms are swaggering through the Atlantic, with one already a hurricane and another forecast to strengthen and threaten areas battered by Hurricane Irma last week.
## 2                                 Tropical Storm Jose has strengthened back into a hurricane, according to a 5 p.m. ET Friday update from <a href="http://www.nhc.noaa.gov/text/refresh/MIATCPAT2+shtml/152034.shtml" target="_blank">the National Hurricane Center</a>. 
## 3   On the night before emergency workers had to evacuate people from a Florida nursing home where eight died, residents suffered in the sweltering heat as staff used fans to cool them, and one woman sat naked in a bed in a hallway, a video obtained by CNN shows.  
## 4                                                                                                                             Hurricane Irma killed at least 33 people in the US and flooded major cities including Jacksonville, Florida and Charleston, South Carolina.
## 5                                                                      President Donald Trump still plans to withdraw the United States from the Paris climate agreement unless there are major changes made to the carbon emissions pact, the White House said Saturday.
## 6 See Pyongyang's answer to the Apple Store, videogames where Americans are the bad guys, and a picturesque waterfront for fishing - and watching missiles launch. Follow CNN's Will Ripley as he gets a rare glimpse of everyday life for the privileged in North Korea.
##   layout iconType
## 1            <NA>
## 2            <NA>
## 3            <NA>
## 4            <NA>
## 5            <NA>
## 6            <NA>

Google Place API

library(httr)

key <- 'A'
res <- GET('https://maps.googleapis.com/maps/api/place/textsearch/json?query=走馬瀨農場&key=A')
placeid <- content(res)$results[[1]]$place_id
placeid


apiurl <- 'https://maps.googleapis.com/maps/api/place/details/json?placeid='

detail_url <- paste0(apiurl,placeid, '&key=', key )
res <- GET(detail_url)
reviews <- content(res)$result$reviews
#reviews
total_reviews <- data.frame()
for (review in reviews){
  df <- data.frame(text = review$text, author = review$author_name, stringsAsFactors = FALSE)
  total_reviews <- rbind(total_reviews, df)
}
total_reviews

#data.frame(matrix(unlist(reviews), nrow=2, byrow=T))

抓取蘋果內文

library(rvest)

appleurl <- 'http://www.appledaily.com.tw/realtimenews/article/sports/20170917/1205584/%E8%87%BA%E7%81%A3%E6%A1%8C%E7%90%83%E5%B0%8F%E5%B0%87%E3%80%80%E5%85%8B%E7%BE%85%E5%9F%83%E8%A5%BF%E4%BA%9E%E5%8C%85%E8%BE%A6%E5%9C%98%E9%AB%944%E9%87%91'

page <- read_html(appleurl)

title <- page %>% html_nodes('#h1') %>% html_text()

summary <- page %>% html_nodes('#summary') %>% html_text()

time <- page %>% html_nodes('.gggs time') %>% html_text()

clicked <- page %>% html_nodes('.clicked') %>% html_text()

data.frame(title, summary, time, clicked)
##                                 title
## 1 臺灣桌球小將 克羅埃西亞包辦團體4金
##                                                                                                                                                                                                                                                                                                                                                                                                                                              summary
## 1 臺灣桌球小將凌晨於ITTF(International Table Tennis Federation,國際桌球總會)克羅埃西亞青少年公開賽再遞佳績,包辦18歲、15組男、女團體全數4面金牌,加上日前已結束18歲組各項個人賽,在本站已累計5金3銀3銅。這項賽事臺灣時間明天凌晨落幕,還有15歲組4項個人賽未完賽。以下是凌晨團體摘金臺灣小將名單(依決賽出賽點序):18歲男子-馮翊新、戴茗葦、高民騏18歲女子-蔡育勤、蘇珮綾、溫睿玲15歲男子-黎昕祐、王翊帆15歲女子-蔡豐恩、陳慈瑄(王毓健/綜合報導)
##                  time   clicked
## 1 2017年09月17日16:10 人氣(628)

Function

getArticle <- function(appleurl, category){
  page    <- read_html(appleurl)
  
  title   <- page %>% 
        html_nodes('#h1') %>% 
        html_text()
  
  summary <- page %>% 
        html_nodes('#summary') %>% 
        html_text()
  
  time    <- page %>% 
        html_nodes('.gggs time') %>% 
        html_text()
  
  clicked <- page %>% 
        html_nodes('.clicked') %>% 
        html_text()
  if (length(clicked) == 0){
    clicked = 0
  }
  
  data.frame(title, summary, time, clicked, category, stringsAsFactors = FALSE)  
  
}

df <- getArticle('http://www.appledaily.com.tw/realtimenews/article/life/20170917/1205601/單車管理好頭痛 北市:已建議交部比照汽機車納管', '生活')



str(df)
## 'data.frame':    1 obs. of  5 variables:
##  $ title   : chr "單車管理好頭痛 北市:已建議交部比照汽機車納管"
##  $ summary : chr "單車騎乘風氣夯,但民眾亂停或違規行駛卻讓政府束手無策。北市交通局今表示,已建議交通部修改《道路交通管理處罰條例"| __truncated__
##  $ time    : chr "2017年09月17日16:42"
##  $ clicked : chr "人氣(507)"
##  $ category: chr "生活"
domain     <- 'http://www.appledaily.com.tw'
newsurl    <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
rtddt <- read_html(newsurl) %>% html_nodes('.rtddt a')

dfall <- data.frame()
for (ele in rtddt){
  category  <- ele %>% html_nodes('h2') %>%
            html_text()
  url       <- ele %>% html_attr('href')
  
  url       <- paste0(domain, url)
  #print(url)
  df        <- getArticle(url, category)
  dfall     <- rbind(dfall, df ) 
}
#dfall

抓取所有新聞


getURL <- function(newsurl){
  rtddt <- read_html(newsurl) %>% html_nodes('.rtddt a')
  
  dfall <- data.frame()
  for (ele in rtddt){
    category  <- ele %>% html_nodes('h2') %>%
              html_text()
    url       <- ele %>% html_attr('href')
    
    url       <- paste0(domain, url)
    df        <- getArticle(url, category)
    dfall     <- rbind(dfall, df ) 
  }
  dfall
}  


newsurl <- 'http://www.appledaily.com.tw/realtimenews/section/new/'
newsdf <- data.frame()
for (page in 1:2){
  #print(page)
  url    <- paste0(newsurl, page)
  df     <- getURL(url)
  newsdf <- rbind(newsdf, df)
}
#newsdf
write.csv(x = newsdf, file = 'applenews.csv')

Merge DataFrame

df1 <- data.frame(a = 1, b = 2)
df1
##   a b
## 1 1 2
df2 <- data.frame(a = 3, b = 4)
df2
##   a b
## 1 3 4
rbind(df1,df2)
##   a b
## 1 1 2
## 2 3 4