Demo20220318

For Loop

tour_history <- list(
  t1  = c('USA','JAPAN', 'HK'),
  t2  = c('CHINA', 'JAPNA'),
  t3  = c('CHINA', 'HK')
)

for (tour in tour_history){
  print(tour)
}

## [1] "USA"   "JAPAN" "HK"   
## [1] "CHINA" "JAPNA"
## [1] "CHINA" "HK"

t <- c("USA",   "JAPAN", "HK" )
sum(t == 'USA')

## [1] 1

# Method 1
s <- 0
for (tour in tour_history){
  s <- s + sum(tour == 'CHINA')
}
s

## [1] 2

# Method 2
sum(
  sapply(tour_history, function(tour) {
    sum(tour == 'CHINA')
  } )
  )

## [1] 2

Function

f <- function(a, b){
  a + b
}

f(3,5)

## [1] 8

addNum <- function(a = 2, b = 3){
  s <- a + b
  s
}


addNum(5,6)

## [1] 11

addNum(5)

## [1] 8

addNum()

## [1] 5

f <- function(a,b){
  a * 2
}

f(3)

## [1] 6

f <- function(a,b){
  a + b
}

#f(3)

students <- c(40,50,38,72,90)

getPass <- function(s){
  t1 <- sqrt(s)
  t2 <- t1 * 10
  t2
}

getPass(students)

## [1] 63.24555 70.71068 61.64414 84.85281 94.86833

money <- c('NTD10,000','NTD20,000','NTD5,000')
class(money)

## [1] "character"

exchange_rate <- 28.0
#money / exchange_rate

s <- 'NTD10,000'
?strsplit

## starting httpd help server ... done

t1 <- strsplit(s, 'NTD')[[1]][2]
t1

## [1] "10,000"

?gsub
t2 <- gsub(',','',t1)
t3 <- as.numeric(t2)
t3 / exchange_rate

## [1] 357.1429

twd_to_usd <- function(twd){
  t1  <- strsplit(twd, 'NTD')[[1]][2]
  t2  <- gsub(',','',t1)
  t3  <- as.numeric(t2)
  ret <- t3 / exchange_rate
  ret
}

sapply(money, twd_to_usd)

## NTD10,000 NTD20,000  NTD5,000 
##  357.1429  714.2857  178.5714

for (m in money){
  print(twd_to_usd(m))
}

## [1] 357.1429
## [1] 714.2857
## [1] 178.5714

## Practice
tour_history <- list(
  t1  = c('USA','JAPAN', 'HK'),
  t2  = c('CHINA', 'JAPNA'),
  t3  = c('CHINA', 'HK')
)


stat_tour <- function(country){
  s <- 0
  for (tour in tour_history){
    s <- s + sum(tour == country)
  }
  s
}

stat_tour('JAPAN')

## [1] 1

Download COVID19 DATA

#download.file('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-17-2022.csv', '03-17-2022.csv')

#library(readr)
#covid19 <- #read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-17-2022.csv")
#View(covid19)
library(readr)

## Warning: 套件 'readr' 是用 R 版本 4.1.3 來建造的

covid19 <- read_csv('03-17-2022.csv')

## Rows: 4010 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (4): Admin2, Province_State, Country_Region, Combined_Key
## dbl  (7): FIPS, Lat, Long_, Confirmed, Deaths, Incident_Rate, Case_Fatality_...
## lgl  (2): Recovered, Active
## dttm (1): Last_Update
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

head(covid19)

library(dplyr)

## Warning: 套件 'dplyr' 是用 R 版本 4.1.3 來建造的

## 
## 載入套件：'dplyr'
## 
## 下列物件被遮斷自 'package:stats':
## 
##     filter, lag
## 
## 下列物件被遮斷自 'package:base':
## 
##     intersect, setdiff, setequal, union

# filter data
covid19[covid19$Country_Region == 'US'  ,  ]

head(filter(covid19, Country_Region == 'US' ))

head(filter(covid19, Country_Region == 'US' & Confirmed > 1000000 ))

head(filter(covid19, Country_Region == 'US' | Confirmed > 1000000 ))

head(filter(covid19, Province_State %in% c('NewYork', 'California'
)))

## Select
head(covid19[ , c('Country_Region', 'Confirmed')])

head(select(covid19, Country_Region, Confirmed))

## Select & Filter
covid19[  covid19$Country_Region == 'US' & covid19$Confirmed > 1000000 ,  c('Province_State', 'Confirmed') ]

select(
filter(covid19, Country_Region == 'US' & Confirmed > 1000000),
Province_State,Confirmed
)

covid19 %>% 
  filter(Country_Region == 'US' & Confirmed > 1000000) %>%
  select(Province_State,Confirmed)

covid19 %>% 
  filter(Country_Region == 'US' & Confirmed > 1000000) %>%
  select(省份 = Province_State,確診 = Confirmed)

covid19[covid19$Country_Region == 'Taiwan*',]

covid19[covid19$Country_Region == 'Taiwan*', c('Confirmed', 'Deaths', 'Recovered')]

covid19 %>%
  filter(Country_Region == 'Taiwan*') %>%
  select(Confirmed, Deaths, Recovered)

Arrange

covid19 %>%
  filter(Country_Region == 'US') %>%
  select(Province_State, Confirmed) %>%
  arrange(Confirmed)

covid19 %>%
  filter(Country_Region == 'US') %>%
  select(Province_State, Confirmed) %>%
  arrange(desc(Confirmed))

covid19 %>%
  select(Country_Region, Confirmed) %>%
  arrange(desc(Confirmed))

us_confirmed <- covid19 %>%
  filter(Country_Region == 'US') %>%
  select(Province_State, Confirmed) %>%
  arrange(desc(Confirmed))

head(us_confirmed, 10)

covid19 %>%
  filter(Country_Region == 'US') %>%
  select(Province_State, Confirmed) %>%
  arrange(desc(Confirmed)) %>%
  head(10)

covid19 %>%
  filter(Country_Region == 'US') %>%
  select(Province_State, Confirmed) %>%
  arrange(desc(Confirmed)) %>%
  slice(0:10)

Mutate

covid19 %>%
  select(Country_Region, Confirmed, Deaths) %>%
  mutate(death_portion = Deaths / Confirmed) %>%
  arrange(desc(death_portion)) %>%
  slice(0:20)

covid19 %>%
  mutate(cfr = Deaths/Confirmed) %>%
  arrange(desc(cfr)) %>%
  head(10) %>%
  select(
    國家名稱 = Country_Region, 
    省份名稱=Province_State, 
    確診數 = Confirmed,
    死亡數=Deaths,
    康復數=Recovered,
    確診死亡率=cfr
    )

Group By and Summarize

covid19 %>%
  select(Country_Region, Confirmed) %>%
  group_by(Country_Region) %>%
  summarise(Confirmed_Total = sum(Confirmed)) %>%
  arrange(desc(Confirmed_Total))

covid19 %>%
  select(Country_Region, Confirmed, Deaths, Recovered) %>%
  group_by(Country_Region) %>%
  summarise_each(funs(sum), Confirmed, Deaths, Recovered)

## Warning: `summarise_each_()` was deprecated in dplyr 0.7.0.
## Please use `across()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

covid19 %>%
  select(Country_Region, Confirmed, Deaths, Recovered) %>%
  group_by(Country_Region) %>%
  summarise_each(funs(min,max, sum), Confirmed, Deaths, Recovered)

covid19 %>%
  select(Country_Region) %>%
  summarise_each(funs(n()))

covid19 %>%
  select(Country_Region) %>%
  summarise_each(funs(n_distinct(Country_Region)))

JOIN

library(readr)
covid19_0317 <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-17-2022.csv")

## Rows: 4010 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (4): Admin2, Province_State, Country_Region, Combined_Key
## dbl  (7): FIPS, Lat, Long_, Confirmed, Deaths, Incident_Rate, Case_Fatality_...
## lgl  (2): Recovered, Active
## dttm (1): Last_Update
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

covid19_0316 <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-16-2022.csv")

## Rows: 4010 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (4): Admin2, Province_State, Country_Region, Combined_Key
## dbl  (7): FIPS, Lat, Long_, Confirmed, Deaths, Incident_Rate, Case_Fatality_...
## lgl  (2): Recovered, Active
## dttm (1): Last_Update
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

covid19_0317_Confirmed <- covid19_0317 %>%
  select(Country_Region, Confirmed) %>%
  group_by(Country_Region) %>%
  summarise(Confirmed_Total = sum(Confirmed))

covid19_0316_Confirmed <- covid19_0316 %>%
  select(Country_Region, Confirmed) %>%
  group_by(Country_Region) %>%
  summarise(Confirmed_Total = sum(Confirmed))

homework

請找出20220316 ~ 20220317 新增確診數最多的10個國家

covid19_0317_Confirmed %>% 
inner_join(covid19_0316_Confirmed, by = 'Country_Region') %>%
  select(Country_Region, confirmed_20220317 = Confirmed_Total.x, confirmed_20220316 = Confirmed_Total.y) %>%
  mutate(confirmed_20220317 - confirmed_20220316)