# wd: working directory
getwd()
## [1] "/Users/davidchiu/rprj"
url <- 'https://od.cdc.gov.tw/eic/Age_County_Gender_061.csv'
destfile <- 'Dengue.csv'
download.file(url, destfile)
curl::curl_download(url, destfile)
#setwd('rprj')
getwd()
## [1] "/Users/davidchiu/rprj"
curl::curl_download(url, destfile)
dengue <- read.csv('Dengue.csv')
class(dengue)
## [1] "data.frame"
str(dengue)
## 'data.frame': 18243 obs. of 9 variables:
## $ 確定病名 : chr "登革熱" "登革熱" "登革熱" "登革熱" ...
## $ 發病年份 : int 2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 ...
## $ 發病月份 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ 縣市 : chr "台中市" "台中市" "台北市" "台北市" ...
## $ 鄉鎮 : chr "大肚區" "北屯區" "中山區" "內湖區" ...
## $ 性別 : chr "M" "M" "F" "M" ...
## $ 是否為境外移入: chr "是" "是" "是" "是" ...
## $ 年齡層 : chr "55-59" "10-14" "35-39" "35-39" ...
## $ 確定病例數 : int 1 1 1 1 1 1 1 1 1 1 ...
summary(dengue)
## 確定病名 發病年份 發病月份 縣市
## Length:18243 Min. :2003 Min. : 1.000 Length:18243
## Class :character 1st Qu.:2010 1st Qu.: 8.000 Class :character
## Mode :character Median :2014 Median :10.000 Mode :character
## Mean :2013 Mean : 9.045
## 3rd Qu.:2015 3rd Qu.:11.000
## Max. :2020 Max. :12.000
## 鄉鎮 性別 是否為境外移入 年齡層
## Length:18243 Length:18243 Length:18243 Length:18243
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## 確定病例數
## Min. : 1.000
## 1st Qu.: 1.000
## Median : 1.000
## Mean : 4.038
## 3rd Qu.: 2.000
## Max. :274.000
#View(dengue)
library(readr)
Age_County_Gender_061 <- read_csv("https://od.cdc.gov.tw/eic/Age_County_Gender_061.csv")
## Parsed with column specification:
## cols(
## 確定病名 = col_character(),
## 發病年份 = col_double(),
## 發病月份 = col_double(),
## 縣市 = col_character(),
## 鄉鎮 = col_character(),
## 性別 = col_character(),
## 是否為境外移入 = col_character(),
## 年齡層 = col_character(),
## 確定病例數 = col_double()
## )
View(Age_County_Gender_061)
write.csv(x = dengue, file = 'dengue2.csv')
write.table(x = dengue, file = 'dengue2.tsv', sep='\t')
#RData
save(x=dengue, file='dengue.RData')
rm(dengue)
#dengue
load('dengue.RData')
library(readxl)
url <- "https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/disease_info.xlsx"
destfile <- "disease_info.xlsx"
curl::curl_download(url, destfile)
disease_info <- read_excel(destfile)
## New names:
## * `` -> ...1
#View(disease_info)
#install.packages('jsonlite')
library(jsonlite)
data <- fromJSON('https://od.cdc.gov.tw/eic/Age_County_Gender_061.json')
head(data)
## 確定病名 發病年份 發病月份 縣市 鄉鎮 性別 是否為境外移入 年齡層
## 1 登革熱 2003 1 台中市 大肚區 M 是 55-59
## 2 登革熱 2003 1 台中市 北屯區 M 是 10-14
## 3 登革熱 2003 1 台北市 中山區 F 是 35-39
## 4 登革熱 2003 1 台北市 內湖區 M 是 35-39
## 5 登革熱 2003 1 台南市 安南區 F 否 55-59
## 6 登革熱 2003 1 台南市 南區 F 否 65-69
## 確定病例數
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
library(XML)
url <- 'http://opendata.epa.gov.tw/ws/Data/ATM00698/?$format=xml'
weather <- XML::xmlToDataFrame(url)
head(weather)
## SiteName WindDirection WindPower Gust Visibility Temperature Moisture
## 1 馬祖 南南東 3 29.5(-0.9) 81
## 2 金門 南南西 3 31.2(-0.9) 75
## 3 東吉島 南南西 4 6 29.8(-1.1) 79
## 4 澎湖 西南 2 29.4(-2.2) 86
## 5 蘭嶼 西南西 6 8 27.8(-1.1) 79
## 6 大武 南南東 2 34.6(+3.5) 54
## AtmosphericPressure Weather Rainfall1day Unit DataCreationDate
## 1 1003.4 0.0 中央氣象局 109/7/15 16:00:00
## 2 1004.3 0.0 中央氣象局 109/7/15 16:00:00
## 3 1006.0 1.0 中央氣象局 109/7/15 16:00:00
## 4 1005.3 0.0 中央氣象局 109/7/15 16:00:00
## 5 1004.4 10.0 中央氣象局 109/7/15 16:00:00
## 6 1002.6 11.0 中央氣象局 109/7/15 16:00:00
covid19 <- read.csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv')
head(covid19)
## Province.State Country.Region Last.Update Confirmed Deaths Recovered
## 1 Anhui Mainland China 1/22/2020 17:00 1 NA NA
## 2 Beijing Mainland China 1/22/2020 17:00 14 NA NA
## 3 Chongqing Mainland China 1/22/2020 17:00 6 NA NA
## 4 Fujian Mainland China 1/22/2020 17:00 1 NA NA
## 5 Gansu Mainland China 1/22/2020 17:00 NA NA NA
## 6 Guangdong Mainland China 1/22/2020 17:00 26 NA NA
download.file('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv', '01-22-2020.csv')
covid19_2 <- read.csv('01-22-2020.csv')
#install.packages('rvest')
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:XML':
##
## xml
## The following object is masked from 'package:readr':
##
## guess_encoding
health_news <- read_html('https://www.globalhealthnow.org/topics/coronaviruses')
library(rvest)
fic_news <- read_html('http://www.flu.org.cn/scn/news')
#as.character(fic_news)
library(httr)
res <- httr::GET('http://www.flu.org.cn/scn/news')
httr::content(res)
## {html_document}
## <html>
## [1] <head>\n<title>全球流感资讯网-流感快讯</title>\n<meta http-equiv="Content-Type" cont ...
## [2] <body>\r\n\r\n<!--html file head-->\r\n<script type="text/javascript" src ...
data(iris)
sum(tail(head(iris, 6))$Sepal.Length)
## [1] 29.7
library(magrittr)
iris %>% head(6) %>% tail() %>% .$Sepal.Length %>% sum()
## [1] 29.7
iris %>%
head(6) %>%
tail() %>%
.$Sepal.Length %>%
sum()
## [1] 29.7
sample_page <- '<html><body>
<h1 id="title">Hello World</h1>
<a href="#" class="link">This is link1</a>
<a href="# link2" class="link">This is link2</a>
</body>
</html>'
read_html(sample_page) %>%
html_nodes('h1') %>%
html_text()
## [1] "Hello World"
read_html(sample_page) %>%
html_nodes('a') %>%
html_text()
## [1] "This is link1" "This is link2"
read_html(sample_page) %>%
html_nodes('#title') %>%
html_text()
## [1] "Hello World"
read_html(sample_page) %>%
html_nodes('.link') %>%
html_text()
## [1] "This is link1" "This is link2"
read_html(sample_page) %>%
html_nodes('h1#title') %>%
html_text()
## [1] "Hello World"
read_html(sample_page) %>%
html_nodes('body h1#title') %>%
html_text()
## [1] "Hello World"
read_html(sample_page) %>%
html_nodes('body #title') %>%
html_text()
## [1] "Hello World"
read_html(sample_page) %>%
html_nodes('.link') %>%
html_text()
## [1] "This is link1" "This is link2"
read_html(sample_page) %>%
html_nodes('a.link') %>%
html_text()
## [1] "This is link1" "This is link2"
read_html(sample_page) %>%
html_nodes('body a.link') %>%
html_text()
## [1] "This is link1" "This is link2"
read_html(sample_page) %>%
html_nodes('body a.link') %>%
html_attr('href')
## [1] "#" "# link2"