Demo20220323

使用readr 讀取資料

library(readr)

## Warning: 套件 'readr' 是用 R 版本 4.1.3 來建造的

X03_17_2022 <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-17-2022.csv")

## Rows: 4010 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (4): Admin2, Province_State, Country_Region, Combined_Key
## dbl  (7): FIPS, Lat, Long_, Confirmed, Deaths, Incident_Rate, Case_Fatality_...
## lgl  (2): Recovered, Active
## dttm (1): Last_Update
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(X03_17_2022)

使用dplyr 分析資料

library(dplyr)

## Warning: 套件 'dplyr' 是用 R 版本 4.1.3 來建造的

## 
## 載入套件：'dplyr'

## 下列物件被遮斷自 'package:stats':
## 
##     filter, lag

## 下列物件被遮斷自 'package:base':
## 
##     intersect, setdiff, setequal, union

# SELECT Country_Region, Confirmed FROM X03-17-2022 WHERE Confirmed > 1000000 ORDER BY Confirmed DESC limit 10

X03_17_2022 %>%
  select(Country_Region, Confirmed) %>%
  filter(Confirmed > 1000000) %>%
  arrange(desc(Confirmed)) %>%
  head(10)

# SELECT Country_Region, sum(Confirmed) FROM x03_17_2022 GROUP BY Country_Region ORDER BY sum(Confirmed) DESC LIMIT 10

X03_17_2022 %>%
  select(Country_Region, Confirmed) %>%
  group_by(Country_Region) %>%
  summarise(Confirmed_Total = sum(Confirmed) ) %>%
  arrange(desc(Confirmed_Total))%>%
  head(10)

homework

請找出20220316 ~ 20220317 新增確診數最多的10個國家

library(readr)
X03_17_2022 <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-17-2022.csv")

## Rows: 4010 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (4): Admin2, Province_State, Country_Region, Combined_Key
## dbl  (7): FIPS, Lat, Long_, Confirmed, Deaths, Incident_Rate, Case_Fatality_...
## lgl  (2): Recovered, Active
## dttm (1): Last_Update
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

X03_16_2022 <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/03-16-2022.csv")

## Rows: 4010 Columns: 14
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr  (4): Admin2, Province_State, Country_Region, Combined_Key
## dbl  (7): FIPS, Lat, Long_, Confirmed, Deaths, Incident_Rate, Case_Fatality_...
## lgl  (2): Recovered, Active
## dttm (1): Last_Update
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

covid19_0317_Confirmed <- X03_17_2022 %>%
  select(Country_Region, Confirmed) %>%
  group_by(Country_Region) %>%
  summarise(Confirmed_Total = sum(Confirmed) )

covid19_0316_Confirmed <- X03_16_2022 %>%
  select(Country_Region, Confirmed) %>%
  group_by(Country_Region) %>%
  summarise(Confirmed_Total = sum(Confirmed) )

# SELECT * FROM convid19_0317_confirmed INNER JOIN covid19_0316_confirmed ON convid19_0317_confirmed.Country_Region = convid19_0316_confirmed.Country_Region

covid19_0317_Confirmed %>%
  inner_join(covid19_0316_Confirmed, by = 'Country_Region')

covid19_0317_Confirmed %>%
  inner_join(covid19_0316_Confirmed, by = 'Country_Region') %>%
  mutate(Confirmed_Diff = Confirmed_Total.x - Confirmed_Total.y) %>%
  arrange(desc(Confirmed_Diff)) %>%
  head(10)

  #select(Country_Region,diff_Confirmed = Confirmed_Total.x - Confirmed_Total.y)

資料視覺化

library(readr)
time_series_covid19_confirmed_global <- read_csv("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")

## Rows: 284 Columns: 795
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (2): Province/State, Country/Region
## dbl (793): Lat, Long, 1/22/20, 1/23/20, 1/24/20, 1/25/20, 1/26/20, 1/27/20, ...
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

View(time_series_covid19_confirmed_global)

df <- time_series_covid19_confirmed_global
col_names <- colnames(df)
date_cols <- col_names[5:length(df)]

col_names[5:length(col_names)]

##   [1] "1/22/20"  "1/23/20"  "1/24/20"  "1/25/20"  "1/26/20"  "1/27/20" 
##   [7] "1/28/20"  "1/29/20"  "1/30/20"  "1/31/20"  "2/1/20"   "2/2/20"  
##  [13] "2/3/20"   "2/4/20"   "2/5/20"   "2/6/20"   "2/7/20"   "2/8/20"  
##  [19] "2/9/20"   "2/10/20"  "2/11/20"  "2/12/20"  "2/13/20"  "2/14/20" 
##  [25] "2/15/20"  "2/16/20"  "2/17/20"  "2/18/20"  "2/19/20"  "2/20/20" 
##  [31] "2/21/20"  "2/22/20"  "2/23/20"  "2/24/20"  "2/25/20"  "2/26/20" 
##  [37] "2/27/20"  "2/28/20"  "2/29/20"  "3/1/20"   "3/2/20"   "3/3/20"  
##  [43] "3/4/20"   "3/5/20"   "3/6/20"   "3/7/20"   "3/8/20"   "3/9/20"  
##  [49] "3/10/20"  "3/11/20"  "3/12/20"  "3/13/20"  "3/14/20"  "3/15/20" 
##  [55] "3/16/20"  "3/17/20"  "3/18/20"  "3/19/20"  "3/20/20"  "3/21/20" 
##  [61] "3/22/20"  "3/23/20"  "3/24/20"  "3/25/20"  "3/26/20"  "3/27/20" 
##  [67] "3/28/20"  "3/29/20"  "3/30/20"  "3/31/20"  "4/1/20"   "4/2/20"  
##  [73] "4/3/20"   "4/4/20"   "4/5/20"   "4/6/20"   "4/7/20"   "4/8/20"  
##  [79] "4/9/20"   "4/10/20"  "4/11/20"  "4/12/20"  "4/13/20"  "4/14/20" 
##  [85] "4/15/20"  "4/16/20"  "4/17/20"  "4/18/20"  "4/19/20"  "4/20/20" 
##  [91] "4/21/20"  "4/22/20"  "4/23/20"  "4/24/20"  "4/25/20"  "4/26/20" 
##  [97] "4/27/20"  "4/28/20"  "4/29/20"  "4/30/20"  "5/1/20"   "5/2/20"  
## [103] "5/3/20"   "5/4/20"   "5/5/20"   "5/6/20"   "5/7/20"   "5/8/20"  
## [109] "5/9/20"   "5/10/20"  "5/11/20"  "5/12/20"  "5/13/20"  "5/14/20" 
## [115] "5/15/20"  "5/16/20"  "5/17/20"  "5/18/20"  "5/19/20"  "5/20/20" 
## [121] "5/21/20"  "5/22/20"  "5/23/20"  "5/24/20"  "5/25/20"  "5/26/20" 
## [127] "5/27/20"  "5/28/20"  "5/29/20"  "5/30/20"  "5/31/20"  "6/1/20"  
## [133] "6/2/20"   "6/3/20"   "6/4/20"   "6/5/20"   "6/6/20"   "6/7/20"  
## [139] "6/8/20"   "6/9/20"   "6/10/20"  "6/11/20"  "6/12/20"  "6/13/20" 
## [145] "6/14/20"  "6/15/20"  "6/16/20"  "6/17/20"  "6/18/20"  "6/19/20" 
## [151] "6/20/20"  "6/21/20"  "6/22/20"  "6/23/20"  "6/24/20"  "6/25/20" 
## [157] "6/26/20"  "6/27/20"  "6/28/20"  "6/29/20"  "6/30/20"  "7/1/20"  
## [163] "7/2/20"   "7/3/20"   "7/4/20"   "7/5/20"   "7/6/20"   "7/7/20"  
## [169] "7/8/20"   "7/9/20"   "7/10/20"  "7/11/20"  "7/12/20"  "7/13/20" 
## [175] "7/14/20"  "7/15/20"  "7/16/20"  "7/17/20"  "7/18/20"  "7/19/20" 
## [181] "7/20/20"  "7/21/20"  "7/22/20"  "7/23/20"  "7/24/20"  "7/25/20" 
## [187] "7/26/20"  "7/27/20"  "7/28/20"  "7/29/20"  "7/30/20"  "7/31/20" 
## [193] "8/1/20"   "8/2/20"   "8/3/20"   "8/4/20"   "8/5/20"   "8/6/20"  
## [199] "8/7/20"   "8/8/20"   "8/9/20"   "8/10/20"  "8/11/20"  "8/12/20" 
## [205] "8/13/20"  "8/14/20"  "8/15/20"  "8/16/20"  "8/17/20"  "8/18/20" 
## [211] "8/19/20"  "8/20/20"  "8/21/20"  "8/22/20"  "8/23/20"  "8/24/20" 
## [217] "8/25/20"  "8/26/20"  "8/27/20"  "8/28/20"  "8/29/20"  "8/30/20" 
## [223] "8/31/20"  "9/1/20"   "9/2/20"   "9/3/20"   "9/4/20"   "9/5/20"  
## [229] "9/6/20"   "9/7/20"   "9/8/20"   "9/9/20"   "9/10/20"  "9/11/20" 
## [235] "9/12/20"  "9/13/20"  "9/14/20"  "9/15/20"  "9/16/20"  "9/17/20" 
## [241] "9/18/20"  "9/19/20"  "9/20/20"  "9/21/20"  "9/22/20"  "9/23/20" 
## [247] "9/24/20"  "9/25/20"  "9/26/20"  "9/27/20"  "9/28/20"  "9/29/20" 
## [253] "9/30/20"  "10/1/20"  "10/2/20"  "10/3/20"  "10/4/20"  "10/5/20" 
## [259] "10/6/20"  "10/7/20"  "10/8/20"  "10/9/20"  "10/10/20" "10/11/20"
## [265] "10/12/20" "10/13/20" "10/14/20" "10/15/20" "10/16/20" "10/17/20"
## [271] "10/18/20" "10/19/20" "10/20/20" "10/21/20" "10/22/20" "10/23/20"
## [277] "10/24/20" "10/25/20" "10/26/20" "10/27/20" "10/28/20" "10/29/20"
## [283] "10/30/20" "10/31/20" "11/1/20"  "11/2/20"  "11/3/20"  "11/4/20" 
## [289] "11/5/20"  "11/6/20"  "11/7/20"  "11/8/20"  "11/9/20"  "11/10/20"
## [295] "11/11/20" "11/12/20" "11/13/20" "11/14/20" "11/15/20" "11/16/20"
## [301] "11/17/20" "11/18/20" "11/19/20" "11/20/20" "11/21/20" "11/22/20"
## [307] "11/23/20" "11/24/20" "11/25/20" "11/26/20" "11/27/20" "11/28/20"
## [313] "11/29/20" "11/30/20" "12/1/20"  "12/2/20"  "12/3/20"  "12/4/20" 
## [319] "12/5/20"  "12/6/20"  "12/7/20"  "12/8/20"  "12/9/20"  "12/10/20"
## [325] "12/11/20" "12/12/20" "12/13/20" "12/14/20" "12/15/20" "12/16/20"
## [331] "12/17/20" "12/18/20" "12/19/20" "12/20/20" "12/21/20" "12/22/20"
## [337] "12/23/20" "12/24/20" "12/25/20" "12/26/20" "12/27/20" "12/28/20"
## [343] "12/29/20" "12/30/20" "12/31/20" "1/1/21"   "1/2/21"   "1/3/21"  
## [349] "1/4/21"   "1/5/21"   "1/6/21"   "1/7/21"   "1/8/21"   "1/9/21"  
## [355] "1/10/21"  "1/11/21"  "1/12/21"  "1/13/21"  "1/14/21"  "1/15/21" 
## [361] "1/16/21"  "1/17/21"  "1/18/21"  "1/19/21"  "1/20/21"  "1/21/21" 
## [367] "1/22/21"  "1/23/21"  "1/24/21"  "1/25/21"  "1/26/21"  "1/27/21" 
## [373] "1/28/21"  "1/29/21"  "1/30/21"  "1/31/21"  "2/1/21"   "2/2/21"  
## [379] "2/3/21"   "2/4/21"   "2/5/21"   "2/6/21"   "2/7/21"   "2/8/21"  
## [385] "2/9/21"   "2/10/21"  "2/11/21"  "2/12/21"  "2/13/21"  "2/14/21" 
## [391] "2/15/21"  "2/16/21"  "2/17/21"  "2/18/21"  "2/19/21"  "2/20/21" 
## [397] "2/21/21"  "2/22/21"  "2/23/21"  "2/24/21"  "2/25/21"  "2/26/21" 
## [403] "2/27/21"  "2/28/21"  "3/1/21"   "3/2/21"   "3/3/21"   "3/4/21"  
## [409] "3/5/21"   "3/6/21"   "3/7/21"   "3/8/21"   "3/9/21"   "3/10/21" 
## [415] "3/11/21"  "3/12/21"  "3/13/21"  "3/14/21"  "3/15/21"  "3/16/21" 
## [421] "3/17/21"  "3/18/21"  "3/19/21"  "3/20/21"  "3/21/21"  "3/22/21" 
## [427] "3/23/21"  "3/24/21"  "3/25/21"  "3/26/21"  "3/27/21"  "3/28/21" 
## [433] "3/29/21"  "3/30/21"  "3/31/21"  "4/1/21"   "4/2/21"   "4/3/21"  
## [439] "4/4/21"   "4/5/21"   "4/6/21"   "4/7/21"   "4/8/21"   "4/9/21"  
## [445] "4/10/21"  "4/11/21"  "4/12/21"  "4/13/21"  "4/14/21"  "4/15/21" 
## [451] "4/16/21"  "4/17/21"  "4/18/21"  "4/19/21"  "4/20/21"  "4/21/21" 
## [457] "4/22/21"  "4/23/21"  "4/24/21"  "4/25/21"  "4/26/21"  "4/27/21" 
## [463] "4/28/21"  "4/29/21"  "4/30/21"  "5/1/21"   "5/2/21"   "5/3/21"  
## [469] "5/4/21"   "5/5/21"   "5/6/21"   "5/7/21"   "5/8/21"   "5/9/21"  
## [475] "5/10/21"  "5/11/21"  "5/12/21"  "5/13/21"  "5/14/21"  "5/15/21" 
## [481] "5/16/21"  "5/17/21"  "5/18/21"  "5/19/21"  "5/20/21"  "5/21/21" 
## [487] "5/22/21"  "5/23/21"  "5/24/21"  "5/25/21"  "5/26/21"  "5/27/21" 
## [493] "5/28/21"  "5/29/21"  "5/30/21"  "5/31/21"  "6/1/21"   "6/2/21"  
## [499] "6/3/21"   "6/4/21"   "6/5/21"   "6/6/21"   "6/7/21"   "6/8/21"  
## [505] "6/9/21"   "6/10/21"  "6/11/21"  "6/12/21"  "6/13/21"  "6/14/21" 
## [511] "6/15/21"  "6/16/21"  "6/17/21"  "6/18/21"  "6/19/21"  "6/20/21" 
## [517] "6/21/21"  "6/22/21"  "6/23/21"  "6/24/21"  "6/25/21"  "6/26/21" 
## [523] "6/27/21"  "6/28/21"  "6/29/21"  "6/30/21"  "7/1/21"   "7/2/21"  
## [529] "7/3/21"   "7/4/21"   "7/5/21"   "7/6/21"   "7/7/21"   "7/8/21"  
## [535] "7/9/21"   "7/10/21"  "7/11/21"  "7/12/21"  "7/13/21"  "7/14/21" 
## [541] "7/15/21"  "7/16/21"  "7/17/21"  "7/18/21"  "7/19/21"  "7/20/21" 
## [547] "7/21/21"  "7/22/21"  "7/23/21"  "7/24/21"  "7/25/21"  "7/26/21" 
## [553] "7/27/21"  "7/28/21"  "7/29/21"  "7/30/21"  "7/31/21"  "8/1/21"  
## [559] "8/2/21"   "8/3/21"   "8/4/21"   "8/5/21"   "8/6/21"   "8/7/21"  
## [565] "8/8/21"   "8/9/21"   "8/10/21"  "8/11/21"  "8/12/21"  "8/13/21" 
## [571] "8/14/21"  "8/15/21"  "8/16/21"  "8/17/21"  "8/18/21"  "8/19/21" 
## [577] "8/20/21"  "8/21/21"  "8/22/21"  "8/23/21"  "8/24/21"  "8/25/21" 
## [583] "8/26/21"  "8/27/21"  "8/28/21"  "8/29/21"  "8/30/21"  "8/31/21" 
## [589] "9/1/21"   "9/2/21"   "9/3/21"   "9/4/21"   "9/5/21"   "9/6/21"  
## [595] "9/7/21"   "9/8/21"   "9/9/21"   "9/10/21"  "9/11/21"  "9/12/21" 
## [601] "9/13/21"  "9/14/21"  "9/15/21"  "9/16/21"  "9/17/21"  "9/18/21" 
## [607] "9/19/21"  "9/20/21"  "9/21/21"  "9/22/21"  "9/23/21"  "9/24/21" 
## [613] "9/25/21"  "9/26/21"  "9/27/21"  "9/28/21"  "9/29/21"  "9/30/21" 
## [619] "10/1/21"  "10/2/21"  "10/3/21"  "10/4/21"  "10/5/21"  "10/6/21" 
## [625] "10/7/21"  "10/8/21"  "10/9/21"  "10/10/21" "10/11/21" "10/12/21"
## [631] "10/13/21" "10/14/21" "10/15/21" "10/16/21" "10/17/21" "10/18/21"
## [637] "10/19/21" "10/20/21" "10/21/21" "10/22/21" "10/23/21" "10/24/21"
## [643] "10/25/21" "10/26/21" "10/27/21" "10/28/21" "10/29/21" "10/30/21"
## [649] "10/31/21" "11/1/21"  "11/2/21"  "11/3/21"  "11/4/21"  "11/5/21" 
## [655] "11/6/21"  "11/7/21"  "11/8/21"  "11/9/21"  "11/10/21" "11/11/21"
## [661] "11/12/21" "11/13/21" "11/14/21" "11/15/21" "11/16/21" "11/17/21"
## [667] "11/18/21" "11/19/21" "11/20/21" "11/21/21" "11/22/21" "11/23/21"
## [673] "11/24/21" "11/25/21" "11/26/21" "11/27/21" "11/28/21" "11/29/21"
## [679] "11/30/21" "12/1/21"  "12/2/21"  "12/3/21"  "12/4/21"  "12/5/21" 
## [685] "12/6/21"  "12/7/21"  "12/8/21"  "12/9/21"  "12/10/21" "12/11/21"
## [691] "12/12/21" "12/13/21" "12/14/21" "12/15/21" "12/16/21" "12/17/21"
## [697] "12/18/21" "12/19/21" "12/20/21" "12/21/21" "12/22/21" "12/23/21"
## [703] "12/24/21" "12/25/21" "12/26/21" "12/27/21" "12/28/21" "12/29/21"
## [709] "12/30/21" "12/31/21" "1/1/22"   "1/2/22"   "1/3/22"   "1/4/22"  
## [715] "1/5/22"   "1/6/22"   "1/7/22"   "1/8/22"   "1/9/22"   "1/10/22" 
## [721] "1/11/22"  "1/12/22"  "1/13/22"  "1/14/22"  "1/15/22"  "1/16/22" 
## [727] "1/17/22"  "1/18/22"  "1/19/22"  "1/20/22"  "1/21/22"  "1/22/22" 
## [733] "1/23/22"  "1/24/22"  "1/25/22"  "1/26/22"  "1/27/22"  "1/28/22" 
## [739] "1/29/22"  "1/30/22"  "1/31/22"  "2/1/22"   "2/2/22"   "2/3/22"  
## [745] "2/4/22"   "2/5/22"   "2/6/22"   "2/7/22"   "2/8/22"   "2/9/22"  
## [751] "2/10/22"  "2/11/22"  "2/12/22"  "2/13/22"  "2/14/22"  "2/15/22" 
## [757] "2/16/22"  "2/17/22"  "2/18/22"  "2/19/22"  "2/20/22"  "2/21/22" 
## [763] "2/22/22"  "2/23/22"  "2/24/22"  "2/25/22"  "2/26/22"  "2/27/22" 
## [769] "2/28/22"  "3/1/22"   "3/2/22"   "3/3/22"   "3/4/22"   "3/5/22"  
## [775] "3/6/22"   "3/7/22"   "3/8/22"   "3/9/22"   "3/10/22"  "3/11/22" 
## [781] "3/12/22"  "3/13/22"  "3/14/22"  "3/15/22"  "3/16/22"  "3/17/22" 
## [787] "3/18/22"  "3/19/22"  "3/20/22"  "3/21/22"  "3/22/22"

library(tidyr)

## Warning: 套件 'tidyr' 是用 R 版本 4.1.3 來建造的

df_long <- df %>%
   gather(Date, Confirmed, date_cols)

## Note: Using an external vector in selections is ambiguous.
## i Use `all_of(date_cols)` instead of `date_cols` to silence this message.
## i See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This message is displayed once per session.

df_stat <- df_long %>% 
  group_by(`Country/Region`, Date) %>%
  summarise(Confirmed_Total = sum(Confirmed))

## `summarise()` has grouped output by 'Country/Region'. You can override using
## the `.groups` argument.

df_stat$Date <- as.Date(df_stat$Date, '%m/%d/%y') 

df_taiwan <- df_stat %>%
  filter(`Country/Region` == 'Taiwan*') %>%
  arrange(desc(Date))

plot(df_taiwan$Date, df_taiwan$Confirmed_Total, type = 'o')

描述數據

curl::curl_download('https://raw.githubusercontent.com/ywchiu/fda_course/main/cdc.Rdata', 'cdc.Rdata')

load("cdc.Rdata")

head(cdc)

names(cdc)

## [1] "genhlth"  "exerany"  "hlthplan" "smoke100" "height"   "weight"   "wtdesire"
## [8] "age"      "gender"

hist(cdc$height, breaks = 10)

hist(cdc$height, breaks = 50)

hist(cdc$weight, breaks = 10)

stem(cdc$weight)

## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##    6 | 80889
##    8 | 00223444555568888000000000000222222233333344455555555555555555555556+26
##   10 | 00000000000000000000000000000000000000000000000000000000000000000000+1166
##   12 | 00000000000000000000000000000000000000000000000000000000000000000000+3111
##   14 | 00000000000000000000000000000000000000000000000000000000000000000000+3751
##   16 | 00000000000000000000000000000000000000000000000000000000000000000000+4023
##   18 | 00000000000000000000000000000000000000000000000000000000000000000000+3155
##   20 | 00000000000000000000000000000000000000000000000000000000000000000000+1924
##   22 | 00000000000000000000000000000000000000000000000000000000000000000000+1027
##   24 | 00000000000000000000000000000000000000000000000000000000000000000000+501
##   26 | 00000000000000000000000000000000000000000000000000000000000000000000+192
##   28 | 00000000000000000000000000000000000000000000000000000000022335555555+66
##   30 | 00000000000000000000000000000000000000000000000000000000000000000000+18
##   32 | 000000000000455578000000
##   34 | 00000048000000000000000000000
##   36 | 0022401
##   38 | 0000550
##   40 | 000005
##   42 | 
##   44 | 
##   46 | 
##   48 | 5
##   50 | 0

summary(cdc)

##       genhlth        exerany          hlthplan         smoke100     
##  excellent:4657   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  very good:6972   1st Qu.:0.0000   1st Qu.:1.0000   1st Qu.:0.0000  
##  good     :5675   Median :1.0000   Median :1.0000   Median :0.0000  
##  fair     :2019   Mean   :0.7457   Mean   :0.8738   Mean   :0.4721  
##  poor     : 677   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:1.0000  
##                   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##      height          weight         wtdesire          age        gender   
##  Min.   :48.00   Min.   : 68.0   Min.   : 68.0   Min.   :18.00   m: 9569  
##  1st Qu.:64.00   1st Qu.:140.0   1st Qu.:130.0   1st Qu.:31.00   f:10431  
##  Median :67.00   Median :165.0   Median :150.0   Median :43.00            
##  Mean   :67.18   Mean   :169.7   Mean   :155.1   Mean   :45.07            
##  3rd Qu.:70.00   3rd Qu.:190.0   3rd Qu.:175.0   3rd Qu.:57.00            
##  Max.   :93.00   Max.   :500.0   Max.   :680.0   Max.   :99.00

數據中心

mean(cdc$weight)

## [1] 169.683

median(cdc$weight)

## [1] 165

table(cdc$genhlth)

## 
## excellent very good      good      fair      poor 
##      4657      6972      5675      2019       677

table(cdc$smoke100)

## 
##     0     1 
## 10559  9441

pie(table(cdc$smoke100))

barplot(table(cdc$smoke100))

table(cdc$gender, cdc$smoke100)

##    
##        0    1
##   m 4547 5022
##   f 6012 4419

mosaicplot(table(cdc$gender, cdc$smoke100))

離度計算

a <- c(150, 155, 160, 162, 168, 171, 173, 175, 178, 182, 185)
stem(a)

## 
##   The decimal point is 1 digit(s) to the right of the |
## 
##   15 | 05
##   16 | 028
##   17 | 1358
##   18 | 25

median(a)

## [1] 171

quantile(a, 0.25)

## 25% 
## 161

quantile(a, 0.75)

##   75% 
## 176.5

176.5 - 161

## [1] 15.5

IQR(a)

## [1] 15.5

boxplot(cdc$weight)

temp <- c(20,30,10,15,20,32,18,19,22)
boxplot(temp)

temp2 <- c(20,30,10,15,20,32,18,19,22, 999,999)
boxplot(temp2)

boxplot(cdc$weight)

boxplot(cdc$weight ~ cdc$gender)

boxplot(cdc$height ~ cdc$gender)

bmi = (cdc$weight/cdc$height^2) * 703
boxplot(bmi ~ cdc$genhlth)

Standard deviation

sd(cdc$weight) ^ 2

## [1] 1606.484

var(cdc$weight)

## [1] 1606.484

contender1 <-
c(8.4,8.6,8.8,9,9,9.2,9.7,10.1,10.4,10.3,10.5,10.6,11.0,11.1,11.4,11.7,11.9,12.3,12.8,13,13,14.2,14.4,14.6)
contender2 <-
c(9.8,9.8,9.9,10.1,10.1,10.2,10.2,10.3,10.3,10.7,10.8,10.8,11,11.1,11.2,11.2,11.3,11.6,11.7,11.7,11.8,11.8,11.9,11.9)
summary(contender1)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   8.400   9.575  10.800  11.083  12.425  14.600

summary(contender2)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.80   10.20   10.90   10.88   11.62   11.90

IQR(contender1)

## [1] 2.85

IQR(contender2)

## [1] 1.425

combined <- cbind(contender1,contender2)
boxplot(combined)

sd(contender1)

## [1] 1.880718

sd(contender2)

## [1] 0.7293038

Cov, Cor

numeric_dataset <- cdc[,c('height', 'weight', 'wtdesire', 'age')]
cov(numeric_dataset)

##              height      weight   wtdesire        age
## height    17.023499   91.834880  100.13654  -8.879927
## weight    91.834880 1606.484154 1026.56638   1.108694
## wtdesire 100.136542 1026.566383 1024.85178 -13.769994
## age       -8.879927    1.108694  -13.76999 295.588571

cor(numeric_dataset)

##              height      weight    wtdesire          age
## height    1.0000000 0.555322192  0.75811946 -0.125181791
## weight    0.5553222 1.000000000  0.80005213  0.001608902
## wtdesire  0.7581195 0.800052128  1.00000000 -0.025018392
## age      -0.1251818 0.001608902 -0.02501839  1.000000000

plot(numeric_dataset$weight, numeric_dataset$wtdesire)

heatmap(cor(numeric_dataset))

## Probability

outcomes <- c("heads", "tails")
experiment <-sample(outcomes, size = 2000, replace=TRUE)
barplot(table(experiment))