library(httr)
#GET('http://news.ltn.com.tw/list/BreakingNews')
library(rvest)
## Loading required package: xml2
ltn <- read_html('http://news.ltn.com.tw/list/BreakingNews') %>% html_nodes('.lipic')
#as.character(ltn[1])
title <- ltn %>% html_nodes('a.picword') %>% html_text()
url <- ltn %>% html_nodes('a.picword') %>% html_attr('href')
datetime <- ltn %>% html_nodes('span') %>% html_text()
ltn_news <- data.frame(title = title, url = url, datetime = datetime)
View(ltn_news)
library(httr)
?POST
## starting httpd help server ...
## done
url <- 'https://shopee.tw/api/v1/items/'
payload <- '{"item_shop_ids":[{"itemid":19785078,"adsid":0,"shopid":5435363,"campaignid":0},{"itemid":5595311,"adsid":0,"shopid":118045,"campaignid":0},{"itemid":30193672,"adsid":0,"shopid":4067127,"campaignid":0},{"itemid":2330199,"adsid":0,"shopid":196610,"campaignid":0},{"itemid":5778442,"adsid":0,"shopid":1691271,"campaignid":0},{"itemid":15001686,"adsid":0,"shopid":4634210,"campaignid":0},{"itemid":21807228,"adsid":0,"shopid":5435363,"campaignid":0},{"itemid":29047156,"adsid":0,"shopid":5435363,"campaignid":0},{"itemid":17075672,"adsid":0,"shopid":3434330,"campaignid":0},{"itemid":12566028,"adsid":0,"shopid":3418898,"campaignid":0},{"itemid":7569360,"adsid":0,"shopid":3051849,"campaignid":0},{"itemid":29678731,"adsid":0,"shopid":4063412,"campaignid":0},{"itemid":4143428,"adsid":0,"shopid":1794309,"campaignid":0},{"itemid":22324648,"adsid":0,"shopid":1249908,"campaignid":0},{"itemid":7658646,"adsid":0,"shopid":1975307,"campaignid":0},{"itemid":14969456,"adsid":0,"shopid":4634210,"campaignid":0},{"itemid":14875124,"adsid":0,"shopid":13982,"campaignid":0},{"itemid":10261379,"adsid":0,"shopid":3712151,"campaignid":0},{"itemid":5398424,"adsid":0,"shopid":118223,"campaignid":0},{"itemid":4854194,"adsid":0,"shopid":1982863,"campaignid":0},{"itemid":29895576,"adsid":0,"shopid":2519614,"campaignid":0},{"itemid":12256422,"adsid":0,"shopid":169631,"campaignid":0},{"itemid":14932484,"adsid":0,"shopid":482725,"campaignid":0},{"itemid":16693567,"adsid":0,"shopid":4930205,"campaignid":0},{"itemid":11717734,"adsid":0,"shopid":943728,"campaignid":0},{"itemid":5556010,"adsid":0,"shopid":2453776,"campaignid":0},{"itemid":19373566,"adsid":0,"shopid":3979419,"campaignid":0},{"itemid":1274446,"adsid":0,"shopid":633256,"campaignid":0},{"itemid":4483565,"adsid":0,"shopid":810776,"campaignid":0},{"itemid":21312569,"adsid":0,"shopid":12278,"campaignid":0},{"itemid":8352857,"adsid":0,"shopid":633256,"campaignid":0},{"itemid":21419323,"adsid":0,"shopid":4696764,"campaignid":0},{"itemid":28965921,"adsid":0,"shopid":3377331,"campaignid":0},{"itemid":20036765,"adsid":0,"shopid":4634210,"campaignid":0},{"itemid":8035333,"adsid":0,"shopid":633256,"campaignid":0},{"itemid":21415348,"adsid":0,"shopid":4696764,"campaignid":0},{"itemid":5354695,"adsid":0,"shopid":633256,"campaignid":0},{"itemid":20597384,"adsid":0,"shopid":4696764,"campaignid":0},{"itemid":16303760,"adsid":0,"shopid":4824242,"campaignid":0},{"itemid":15422537,"adsid":0,"shopid":3626349,"campaignid":0},{"itemid":9781824,"adsid":0,"shopid":1652921,"campaignid":0},{"itemid":131449,"adsid":0,"shopid":12290,"campaignid":0},{"itemid":10465863,"adsid":0,"shopid":2698155,"campaignid":0},{"itemid":10463269,"adsid":0,"shopid":2698155,"campaignid":0},{"itemid":10452756,"adsid":0,"shopid":2698155,"campaignid":0},{"itemid":10450644,"adsid":0,"shopid":2698155,"campaignid":0},{"itemid":10453441,"adsid":0,"shopid":2698155,"campaignid":0},{"itemid":10455438,"adsid":0,"shopid":2698155,"campaignid":0},{"itemid":13540440,"adsid":0,"shopid":2698155,"campaignid":0},{"itemid":14640624,"adsid":0,"shopid":2698155,"campaignid":0}]}'
shopee <- POST(url,
add_headers(
`User-Agent` = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
Referer = 'https://shopee.tw/%E6%89%8B%E6%A9%9F%E5%B9%B3%E6%9D%BF%E8%88%87%E5%91%A8%E9%82%8A-cat.70',
`x-csrftoken` = 'nejFG2gAVqfyxvjTDcqz8WP55XrSgqdD',
Cookie = 'SPC_IA=-1; SPC_F=JM49YcKaERKGJGQZzjnXv5s6Ip1pFxZ4; REC_T_ID=d007be74-16b9-11e7-bbb8-246e961721ec; SPC_T_ID="R13wP3bTYRQgb3bQaMQeP1gLVE65uvi9LWf3FyOjA0O7qSQMhYtQ8rqstSWrLhLUI8lXNCk3hHDi7ZvI0F+MvDAJe3/26gyexmQQS7I7UF8="; SPC_IA_U=-; SPC_T_IV="98O2upWiCsUC2HxJ7+dQoA=="; csrftoken=nejFG2gAVqfyxvjTDcqz8WP55XrSgqdD; _gat=1; _ga=GA1.2.1232379326.1491037300; SPC_EC=-; SPC_SI=znchksgsgebh01cficchzu63ua5h034k'
),
body = payload,
encode = 'json')
#content(shopee)
https://chromedriver.storage.googleapis.com/index.html?path=2.29/
library(RSelenium)
remDr <- remoteDriver(remoteServerAddr = 'localhost',
port = 4444,
browserName = 'chrome')
remDr$open()
remDr$navigate('http://www.largitdata.com')
remDr$navigate('https://www.facebook.com/oh.bankalng/')
webElem <- remDr$findElement(using = 'id', value = "lst-ib")
webElem$sendKeysToElement(list("coldplay", key = "enter"))
remDr$quit()
``` show databases; use orders; show tables;
create table customer( id int AUTO_INCREMENT PRIMARY KEY, name varchar(50), gender varchar(1), address varchar(200) ) ENGINE=InnoDB;
describe customer;
ALTER TABLE customer; CHANGE COLUMN name cname VARCHAR(50) NOT NULL, ADD COLUMN phone VARCHAR(10);
describe customer;
insert into customer(cname, gender, address) values(‘John’, ‘M’, ‘Chiayi’); insert into customer(cname, gender, address) values(‘Mary’, ‘F’, ‘Tainan’); insert into customer(cname, gender, address) values(‘Brad’, ‘M’, ‘Chiayi’);
select * from customer; select cname, gender from customer; select cname, gender from customer where gender = ‘M’;
select count() from customer; select gender, count() from customer group by gender; select gender, count() from customer group by gender having count() >= 2;
update customer set cname = ‘Johnny’ where id = 1;
create index idx_customer on customer(cname);
create index idx_customer2 on customer(cname, gender);
create index idx_customer3 on customer(cname) using BTREE;
delete from customer
where id =1;
truncate customer;
drop table customer;
``` ## 使用R 連結JDBC
#install.packages("rJava")
library(rJava)
#install.packages("RJDBC")
library(RJDBC)
## Loading required package: DBI
jar.loc <- 'C:\\Program Files (x86)\\MySQL\\Connector.J 5.1\\mysql-connector-java-5.1.40-bin.jar'
drv <- JDBC("com.mysql.jdbc.Driver",
jar.loc,
identifier.quote="`")
conn <- dbConnect(drv, "jdbc:mysql://localhost/appledaily", "root", "test")
data(iris)
dbWriteTable(conn, "iris", iris)
## [1] TRUE
dbListTables(conn)
## [1] "applenews" "iris" "news_main"
df <- dbGetQuery(conn, 'select * from iris')
df
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
df2 <- dbGetQuery(conn, 'select species, avg(`Petal.Length`) from iris group by species')
df2
## Species avg(`Petal.Length`)
## 1 setosa 1.462
## 2 versicolor 4.260
## 3 virginica 5.552
dbExistsTable(conn,'iris')
## [1] TRUE
if(dbExistsTable(conn,'iris')){
dbRemoveTable(conn, 'iris')
}
## logical(0)
dbListTables(conn)
## [1] "applenews" "news_main"
dbDisconnect(conn)
## [1] TRUE
download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/data/applenews.RData', 'applenews.RData')
load('applenews.RData')
library(RJDBC)
jar.loc <- 'C:\\Program Files (x86)\\MySQL\\Connector.J 5.1\\mysql-connector-java-5.1.40-bin.jar'
drv <- JDBC("com.mysql.jdbc.Driver",
jar.loc,
identifier.quote="`")
conn <- dbConnect(drv, "jdbc:mysql://localhost/appledaily", "root", "test")
#dbWriteTable(conn, "applenews", appledaily)
#CREATE TABLE `news_main` (
# `content` text,
# `title` varchar(1000) DEFAULT NULL,
# `dt` datetime DEFAULT NULL,
# `category` varchar(100) DEFAULT NULL,
# `view_cnt` int(11) DEFAULT NULL
#) ENGINE=InnoDB DEFAULT CHARSET=utf8
applenews$view_cnt <- as.integer(applenews$view_cnt )
str(applenews)
dbWriteTable(conn, 'news_main', applenews, append=TRUE,row.names=FALSE,overwrite=FALSE)
df3 <- dbGetQuery(conn, 'select * from news_main')
res <- dbGetQuery(conn, "SELECT category, count(*) FROM news_main group by category")
res <- dbGetQuery(conn, "SELECT title FROM news_main order by view_cnt desc limit 1")
res
res <- dbGetQuery(conn, "SELECT category, sum(view_cnt) FROM news_main group by category")
res
res <- dbGetQuery(conn, "SELECT category, avg(view_cnt) FROM news_main group by category order by 2 desc")
barplot(height = res$`avg(view_cnt)`, names.arg = res$category, col = as.factor(res$category) )
dbDisconnect(conn)
data(iris)
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
mean(iris[iris$Species == 'setosa', 'Petal.Length'])
## [1] 1.462
tapply(iris$Petal.Length, iris$Species, mean )
## setosa versicolor virginica
## 1.462 4.260 5.552
download.file('https://raw.githubusercontent.com/ywchiu/rtibame/master/data/applenews.RData', 'applenews.RData')
load('applenews.RData')
# R
applenews[applenews$category == '娛樂',]
# dplyr
library(dplyr)
filter(applenews,category == '娛樂')
filter(applenews, category == "娛樂" & view_cnt > 1000)
filter(applenews, category == "娛樂" | view_cnt > 1000)
filter(applenews, category %in% c("娛樂", "社會") )
# Choose Column
applenews[,c('title', 'view_cnt')]
select(applenews, title, view_cnt)
applenews %>%
select(title,view_cnt) %>%
filter(view_cnt > 1000)
#使用Arrange 可以將資料做排序
applenews %>%
select(title, category,view_cnt) %>%
filter(category == "社會") %>%
arrange(view_cnt)
#由大到小排序 (desc)
applenews %>%
select(title, category,view_cnt) %>%
filter(category == "社會") %>%
arrange(desc(view_cnt))
#計算總和
freqsum <- applenews %>%
select(view_cnt) %>%
sum()
freqsum
# 使用mutate 新增欄位
applenews %>%
select(title, category,view_cnt) %>%
mutate(portion= view_cnt/freqsum)
#儲存新欄位
applenews <- applenews %>% mutate(portion= view_cnt/freqsum)
applenews %>%
group_by(category) %>%
summarise(view_sum = sum(view_cnt, na.rm=TRUE)) %>% arrange(desc(view_sum))
applenews %>%
group_by(category) %>%
summarise_each(funs(sum), view_cnt, portion) %>% arrange(desc(portion))
applenews %>%
group_by(category) %>%
summarise_each(funs(min(., na.rm=TRUE), max(., na.rm=TRUE)), matches("view_cnt"))
applenews %>%
group_by(category) %>%
summarise_each(funs(sum(., na.rm=TRUE), mean(., na.rm=TRUE)), matches("view_cnt"))
#一般計數
applenews %>%
select(category) %>%
summarise_each(funs(n()))
#不重複計數
applenews %>%
select(category) %>%
summarise_each(funs(n_distinct(category)))
#取得統計數
cat_stat <- applenews %>%
group_by(category) %>%
summarise(view_sum = sum(view_cnt)) %>%
arrange(desc(view_sum))
cat_stat
cat_stat$category <- as.factor(cat_stat$category)
#繪圖
barplot(cat_stat$view_sum, names.arg=cat_stat$category, col=cat_stat$category)
pie(cat_stat$view_sum, label = cat_stat$category)
applenews %>%
group_by(category) %>%
summarise(view_cnt = n()) %>%
arrange(desc(view_cnt))
applenews %>%
group_by(category) %>%
summarise(view_mean = mean(view_cnt)) %>%
arrange(desc(view_mean))
applenews %>% select(dt) %>% head() %>% format('%H')
applenews$hour <- format(applenews$dt, '%H')
cnt_by_hour <- applenews %>% filter(dt >= '2016-04-14' & dt < '2016-04-15') %>% select(hour) %>% group_by(hour) %>% summarize(cnt = n())
plot(cnt_by_hour, type = 'b')
?strptime
dates <- c('2017/04/08 16:42:00')
class(dates)
a <- strptime(dates, "%Y/%m/%d %H:%M:%S")
class(a)
format(a, '%H')
format(a, '%y-%m-%d %A')