Demo20200715

CSV

# wd: working directory
getwd()

## [1] "/Users/davidchiu/rprj"

url <- 'https://od.cdc.gov.tw/eic/Age_County_Gender_061.csv'
destfile <- 'Dengue.csv'
download.file(url, destfile)

curl::curl_download(url, destfile)

#setwd('rprj')
getwd()

## [1] "/Users/davidchiu/rprj"

curl::curl_download(url, destfile)

dengue <- read.csv('Dengue.csv')
class(dengue)

## [1] "data.frame"

str(dengue)

## 'data.frame':    18243 obs. of  9 variables:
##  $ 確定病名      : chr  "登革熱" "登革熱" "登革熱" "登革熱" ...
##  $ 發病年份      : int  2003 2003 2003 2003 2003 2003 2003 2003 2003 2003 ...
##  $ 發病月份      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ 縣市          : chr  "台中市" "台中市" "台北市" "台北市" ...
##  $ 鄉鎮          : chr  "大肚區" "北屯區" "中山區" "內湖區" ...
##  $ 性別          : chr  "M" "M" "F" "M" ...
##  $ 是否為境外移入: chr  "是" "是" "是" "是" ...
##  $ 年齡層        : chr  "55-59" "10-14" "35-39" "35-39" ...
##  $ 確定病例數    : int  1 1 1 1 1 1 1 1 1 1 ...

summary(dengue)

##    確定病名            發病年份       發病月份          縣市          
##  Length:18243       Min.   :2003   Min.   : 1.000   Length:18243      
##  Class :character   1st Qu.:2010   1st Qu.: 8.000   Class :character  
##  Mode  :character   Median :2014   Median :10.000   Mode  :character  
##                     Mean   :2013   Mean   : 9.045                     
##                     3rd Qu.:2015   3rd Qu.:11.000                     
##                     Max.   :2020   Max.   :12.000                     
##      鄉鎮               性別           是否為境外移入        年齡層         
##  Length:18243       Length:18243       Length:18243       Length:18243      
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    確定病例數     
##  Min.   :  1.000  
##  1st Qu.:  1.000  
##  Median :  1.000  
##  Mean   :  4.038  
##  3rd Qu.:  2.000  
##  Max.   :274.000

#View(dengue)

library(readr)
Age_County_Gender_061 <- read_csv("https://od.cdc.gov.tw/eic/Age_County_Gender_061.csv")

## Parsed with column specification:
## cols(
##   確定病名 = col_character(),
##   發病年份 = col_double(),
##   發病月份 = col_double(),
##   縣市 = col_character(),
##   鄉鎮 = col_character(),
##   性別 = col_character(),
##   是否為境外移入 = col_character(),
##   年齡層 = col_character(),
##   確定病例數 = col_double()
## )

View(Age_County_Gender_061)


write.csv(x = dengue, file = 'dengue2.csv')
write.table(x = dengue, file = 'dengue2.tsv', sep='\t')

#RData

save(x=dengue, file='dengue.RData')
rm(dengue)
#dengue
load('dengue.RData')

Excel

library(readxl)
url <- "https://raw.githubusercontent.com/ywchiu/cdc_course/master/data/disease_info.xlsx"
destfile <- "disease_info.xlsx"
curl::curl_download(url, destfile)
disease_info <- read_excel(destfile)

## New names:
## * `` -> ...1

#View(disease_info)

JSON

#install.packages('jsonlite')
library(jsonlite)

data <- fromJSON('https://od.cdc.gov.tw/eic/Age_County_Gender_061.json')
head(data)

##   確定病名 發病年份 發病月份   縣市   鄉鎮 性別 是否為境外移入 年齡層
## 1   登革熱     2003        1 台中市 大肚區    M             是  55-59
## 2   登革熱     2003        1 台中市 北屯區    M             是  10-14
## 3   登革熱     2003        1 台北市 中山區    F             是  35-39
## 4   登革熱     2003        1 台北市 內湖區    M             是  35-39
## 5   登革熱     2003        1 台南市 安南區    F             否  55-59
## 6   登革熱     2003        1 台南市   南區    F             否  65-69
##   確定病例數
## 1          1
## 2          1
## 3          1
## 4          1
## 5          1
## 6          1

XML

library(XML)

url <- 'http://opendata.epa.gov.tw/ws/Data/ATM00698/?$format=xml'
weather <- XML::xmlToDataFrame(url)

head(weather)

##   SiteName WindDirection WindPower Gust Visibility Temperature Moisture
## 1     馬祖        南南東         3                  29.5(-0.9)       81
## 2     金門        南南西         3                  31.2(-0.9)       75
## 3   東吉島        南南西         4    6             29.8(-1.1)       79
## 4     澎湖          西南         2                  29.4(-2.2)       86
## 5     蘭嶼        西南西         6    8             27.8(-1.1)       79
## 6     大武        南南東         2                  34.6(+3.5)       54
##   AtmosphericPressure Weather Rainfall1day       Unit  DataCreationDate
## 1              1003.4                  0.0 中央氣象局 109/7/15 16:00:00
## 2              1004.3                  0.0 中央氣象局 109/7/15 16:00:00
## 3              1006.0                  1.0 中央氣象局 109/7/15 16:00:00
## 4              1005.3                  0.0 中央氣象局 109/7/15 16:00:00
## 5              1004.4                 10.0 中央氣象局 109/7/15 16:00:00
## 6              1002.6                 11.0 中央氣象局 109/7/15 16:00:00

covid19 <- read.csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv')

head(covid19)

##   Province.State Country.Region     Last.Update Confirmed Deaths Recovered
## 1          Anhui Mainland China 1/22/2020 17:00         1     NA        NA
## 2        Beijing Mainland China 1/22/2020 17:00        14     NA        NA
## 3      Chongqing Mainland China 1/22/2020 17:00         6     NA        NA
## 4         Fujian Mainland China 1/22/2020 17:00         1     NA        NA
## 5          Gansu Mainland China 1/22/2020 17:00        NA     NA        NA
## 6      Guangdong Mainland China 1/22/2020 17:00        26     NA        NA

download.file('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv', '01-22-2020.csv')

covid19_2 <- read.csv('01-22-2020.csv')

rvest

#install.packages('rvest')
library(rvest)

## Loading required package: xml2

## 
## Attaching package: 'rvest'

## The following object is masked from 'package:XML':
## 
##     xml

## The following object is masked from 'package:readr':
## 
##     guess_encoding

health_news <- read_html('https://www.globalhealthnow.org/topics/coronaviruses')

FIC

library(rvest)
fic_news <- read_html('http://www.flu.org.cn/scn/news')
#as.character(fic_news)

library(httr)
res <- httr::GET('http://www.flu.org.cn/scn/news')
httr::content(res)

## {html_document}
## <html>
## [1] <head>\n<title>全球流感资讯网-流感快讯</title>\n<meta http-equiv="Content-Type" cont ...
## [2] <body>\r\n\r\n<!--html file head-->\r\n<script type="text/javascript" src ...

Magrittr

data(iris)

sum(tail(head(iris, 6))$Sepal.Length)

## [1] 29.7

library(magrittr)

iris %>% head(6) %>% tail() %>% .$Sepal.Length %>% sum()

## [1] 29.7

iris %>%
  head(6) %>%
  tail() %>%
  .$Sepal.Length %>%
  sum()

## [1] 29.7

sample_page <- '<html><body>
  <h1 id="title">Hello World</h1>
  <a href="#" class="link">This is link1</a>
  <a href="# link2" class="link">This is link2</a>
  </body>
</html>'

read_html(sample_page) %>%
  html_nodes('h1') %>%
  html_text()

## [1] "Hello World"

read_html(sample_page) %>%
  html_nodes('a') %>%
  html_text()

## [1] "This is link1" "This is link2"

read_html(sample_page) %>%
  html_nodes('#title') %>%
  html_text()

## [1] "Hello World"

read_html(sample_page) %>%
  html_nodes('.link') %>%
  html_text()

## [1] "This is link1" "This is link2"

read_html(sample_page) %>%
  html_nodes('h1#title') %>%
  html_text()

## [1] "Hello World"

read_html(sample_page) %>%
  html_nodes('body h1#title') %>%
  html_text()

## [1] "Hello World"

read_html(sample_page) %>%
  html_nodes('body #title') %>%
  html_text()

## [1] "Hello World"

read_html(sample_page) %>%
  html_nodes('.link') %>%
  html_text()

## [1] "This is link1" "This is link2"

read_html(sample_page) %>%
  html_nodes('a.link') %>%
  html_text()

## [1] "This is link1" "This is link2"

read_html(sample_page) %>%
  html_nodes('body a.link') %>%
  html_text()

## [1] "This is link1" "This is link2"

read_html(sample_page) %>%
  html_nodes('body a.link') %>%
  html_attr('href')

## [1] "#"       "# link2"

Demo20200715

David Chiu

7/15/2020

CSV

Excel

JSON

XML

rvest

FIC

Magrittr