Download COVID19

setwd('~/covid19_repo')
#setwd('~/')
url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv'
fname <- '01-22-2020.csv'
download.file(url = url, destfile = fname)

Lubridate

#install.packages('lubridate')
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.2
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
current_date <- today(tzone="Asia/Taipei")
current_date
## [1] "2020-07-16"
current_date - days(1)
## [1] "2020-07-15"
current_date + days(1)
## [1] "2020-07-17"
current_date - weeks(1)
## [1] "2020-07-09"
seq(1,3)
## [1] 1 2 3
seq( (current_date - days(3)) , current_date , by='days') 
## [1] "2020-07-13" "2020-07-14" "2020-07-15" "2020-07-16"

Format Date

format : turn date to string

  • %m: month
  • %d: day
  • %Y: year
  • %H: hour
  • %M: minute
  • %S: second
for(i in 1:10){
  dt <- current_date - days(i)
  print(dt)
}
## [1] "2020-07-15"
## [1] "2020-07-14"
## [1] "2020-07-13"
## [1] "2020-07-12"
## [1] "2020-07-11"
## [1] "2020-07-10"
## [1] "2020-07-09"
## [1] "2020-07-08"
## [1] "2020-07-07"
## [1] "2020-07-06"
format(current_date, format="%Y-%m-%d %H:%M:%S")
## [1] "2020-07-16 00:00:00"
format(current_date, format="%m-%d-%Y")
## [1] "07-16-2020"
format(current_date, format="%d-%m-%Y")
## [1] "16-07-2020"
for(i in 1:10){
  dt     <- current_date - days(i)
  dt_str <- format(dt, format="%d-%m-%Y")
  print(dt_str)
}
## [1] "15-07-2020"
## [1] "14-07-2020"
## [1] "13-07-2020"
## [1] "12-07-2020"
## [1] "11-07-2020"
## [1] "10-07-2020"
## [1] "09-07-2020"
## [1] "08-07-2020"
## [1] "07-07-2020"
## [1] "06-07-2020"

Format URL

paste('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/', '01-22-2020', '.csv')
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/ 01-22-2020 .csv"
paste('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/', '01-22-2020', '.csv', sep ="")
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv"
paste0('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/', '01-22-2020', '.csv')
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/01-22-2020.csv"
#sprintf
hello <- 'Hello,%s'
sprintf(hello, 'David')
## [1] "Hello,David"
hello <- 'Hi, %s. Have you eaten %s'
sprintf(hello,'David', 'breakfast')
## [1] "Hi, David. Have you eaten breakfast"
url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/%s.csv'

for(i in 1:10){
  dt      <- current_date - days(i)
  dt_str  <- format(dt, format="%d-%m-%Y")
  url_str <- sprintf(url, dt_str)
  print(url_str)
}
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/15-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/14-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/13-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/12-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/11-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/10-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/08-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-07-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/06-07-2020.csv"
current_date <- today(tz = 'Asia/Taipei')
start_date   <- ymd('2020-01-22')
class(start_date)
## [1] "Date"
start_date   <- dmy('14-07-2020')
start_date
## [1] "2020-07-14"
date_seq <- seq(start_date, current_date, by = 'days')
#date_seq
url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/%s.csv'

for(dt in date_seq){
  #print(class(dt))
  #print(as_date(dt))
  dt_str      <- format(as_date(dt), '%m-%d-%Y')
  fname_str   <- sprintf('%s.csv', dt_str)
  url_str     <- sprintf(url, dt_str)
  #print(url_str)
  #print(dt_str)
  #download.file(url_str, fname_str)
}

tryCatch

R 的錯誤處理機制

3/5
## [1] 0.6
3 / 0
## [1] Inf
#3 / 'qoo'

divide <- function(x, y) {
  result <- tryCatch({
    x / y
  }, warning = function(war) {
    print(war)
  }, error = function(err) {
    print("ERROR")
    print(err)
  }, finally = {
    print("executing finally clause")
  })
  return(result)
}

divide(3,5)
## [1] "executing finally clause"
## [1] 0.6
divide(3,0)
## [1] "executing finally clause"
## [1] Inf
divide(3, "qoo")
## [1] "ERROR"
## <simpleError in x/y: non-numeric argument to binary operator>
## [1] "executing finally clause"
## <simpleError in x/y: non-numeric argument to binary operator>

adding TryCatch

url <- 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/%s.csv'


setwd('~/covid19_repo')
for(i in 1:3){
  dt       <- current_date - days(i)
  dt_str   <- format(dt, format="%m-%d-%Y")
  url_str  <- sprintf(url, dt_str)
  filename <- sprintf('%s.csv', dt_str)
  print(url_str)
  tryCatch({
    download.file(url_str, filename)
  }, error = function(err) {
    print(paste("ERROR",url_str))
  })
}
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-15-2020.csv"
## Warning in download.file(url_str, filename): cannot open URL 'https://
## raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/
## csse_covid_19_daily_reports/07-15-2020.csv': HTTP status was '404 Not Found'
## [1] "ERROR https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-15-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-14-2020.csv"
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-13-2020.csv"

Handling File Missing

! file.exists('01-22-2020.csv')
## [1] FALSE
for(i in 1:3){
  dt <- current_date - days(i)
  dt_str <- format(dt, format="%m-%d-%Y")
  url_str <- sprintf(url, dt_str)
  filename <- sprintf('%s.csv', dt_str)
  if (! file.exists(filename)){
    tryCatch({
      download.file(url_str, filename)
    }, error = function(err) {
      print(url_str)
    })
  }
}
## Warning in download.file(url_str, filename): cannot open URL 'https://
## raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/
## csse_covid_19_daily_reports/07-15-2020.csv': HTTP status was '404 Not Found'
## [1] "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/07-15-2020.csv"

RSelenium

#install.packages('RSelenium')
library(RSelenium)

remDr <- remoteDriver(
  remoteServerAddr = "localhost",
  port = 4444,
  browserName = "chrome")

remDr$open()
## [1] "Connecting to remote server"
## $acceptInsecureCerts
## [1] FALSE
## 
## $acceptSslCerts
## [1] FALSE
## 
## $applicationCacheEnabled
## [1] FALSE
## 
## $browserConnectionEnabled
## [1] FALSE
## 
## $browserName
## [1] "chrome"
## 
## $chrome
## $chrome$chromedriverVersion
## [1] "2.42.591059 (a3d9684d10d61aa0c45f6723b327283be1ebaad8)"
## 
## $chrome$userDataDir
## [1] "/var/folders/46/b7dzk4mn6g54qzptv608w7d00000gn/T/.org.chromium.Chromium.ClExz9"
## 
## 
## $cssSelectorsEnabled
## [1] TRUE
## 
## $databaseEnabled
## [1] FALSE
## 
## $`goog:chromeOptions`
## $`goog:chromeOptions`$debuggerAddress
## [1] "localhost:57132"
## 
## 
## $handlesAlerts
## [1] TRUE
## 
## $hasTouchScreen
## [1] FALSE
## 
## $javascriptEnabled
## [1] TRUE
## 
## $locationContextEnabled
## [1] TRUE
## 
## $mobileEmulationEnabled
## [1] FALSE
## 
## $nativeEvents
## [1] TRUE
## 
## $networkConnectionEnabled
## [1] FALSE
## 
## $pageLoadStrategy
## [1] "normal"
## 
## $platform
## [1] "Mac OS X"
## 
## $rotatable
## [1] FALSE
## 
## $setWindowRect
## [1] TRUE
## 
## $takesHeapSnapshot
## [1] TRUE
## 
## $takesScreenshot
## [1] TRUE
## 
## $unexpectedAlertBehaviour
## [1] ""
## 
## $version
## [1] "84.0.4147.89"
## 
## $webStorageEnabled
## [1] TRUE
## 
## $webdriver.remote.sessionid
## [1] "2a570e5a22bc0484ad763bf9e6d14799"
## 
## $id
## [1] "2a570e5a22bc0484ad763bf9e6d14799"
remDr$navigate("https://statementdog.com/users/sign_in")
useremail <- remDr$findElement(using = "id", value = "user_email")
useremail$sendKeysToElement(list("fefiw88976@votavk.com"))
userepwd <- remDr$findElement(using = "id", value = "user_password")
userepwd$sendKeysToElement(list("1qaz2wsx"))
submit_btn <- remDr$findElement(using = "class", value = "submit-btn")
submit_btn$clickElement()

remDr$screenshot(file= "test2.png")
remDr$close()