#Homework 3 #1. (3 pts) Import the health care spending data (“HealthCareSpending.csv”) from Blackboard. #For each unique combination of country (LOCATION) and measure (MEASURE) find the #mean amount of annual spending since 2014. To simplify things, let’s say we only care #about total spending (i.e., SUBJECT == “TOT”).
library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ dplyr 1.0.8
## ✓ tibble 3.1.6 ✓ stringr 1.4.0
## ✓ tidyr 1.2.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
HealthCareSpending <- read_csv("/Users/annabelle/Desktop/R class/HealthCareSpending.csv")
## Rows: 16610 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): LOCATION, INDICATOR, SUBJECT, MEASURE, FREQUENCY, Flag Codes
## dbl (2): TIME, Value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(HealthCareSpending)
head(HealthCareSpending)
## # A tibble: 6 × 8
## LOCATION INDICATOR SUBJECT MEASURE FREQUENCY TIME Value `Flag Codes`
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 AUS HEALTHEXP TOT PC_GDP A 1971 4.55 <NA>
## 2 AUS HEALTHEXP TOT PC_GDP A 1972 4.55 <NA>
## 3 AUS HEALTHEXP TOT PC_GDP A 1973 4.51 <NA>
## 4 AUS HEALTHEXP TOT PC_GDP A 1974 5.11 <NA>
## 5 AUS HEALTHEXP TOT PC_GDP A 1975 5.76 <NA>
## 6 AUS HEALTHEXP TOT PC_GDP A 1976 5.78 <NA>
?subset
tot_HCS<-subset(HealthCareSpending, SUBJECT=="TOT",select=c("LOCATION", "MEASURE", "Value"))
tot_HCS
## # A tibble: 3,298 × 3
## LOCATION MEASURE Value
## <chr> <chr> <dbl>
## 1 AUS PC_GDP 4.55
## 2 AUS PC_GDP 4.55
## 3 AUS PC_GDP 4.51
## 4 AUS PC_GDP 5.11
## 5 AUS PC_GDP 5.76
## 6 AUS PC_GDP 5.78
## 7 AUS PC_GDP 6.08
## 8 AUS PC_GDP 5.94
## 9 AUS PC_GDP 5.83
## 10 AUS PC_GDP 5.83
## # … with 3,288 more rows
loc_meas_val<-aggregate(Value~LOCATION+MEASURE, tot_HCS, FUN=mean, na.action = na.omit)
view(loc_meas_val)
head(loc_meas_val)
## LOCATION MEASURE Value
## 1 AUS PC_GDP 7.054312
## 2 AUT PC_GDP 8.063878
## 3 BEL PC_GDP 7.681612
## 4 BGR PC_GDP 7.190556
## 5 BRA PC_GDP 8.276765
## 6 CAN PC_GDP 8.409408
#2. (2 pts) Import the country codes (“CountryCodes_a.csv”) from Blackboard. #Merge the country code data frame with the aggregated health care data frame you just produced.
library(readr)
CountryCodes_a <- read_csv("/Users/annabelle/Desktop/R class/CountryCodes_a.csv")
## Rows: 249 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Country, Alpha.2.code, Alpha.3.code
## dbl (1): Numeric
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(CountryCodes_a)
head(CountryCodes_a)
## # A tibble: 6 × 4
## Country Alpha.2.code Alpha.3.code Numeric
## <chr> <chr> <chr> <dbl>
## 1 Afghanistan AF AFG 4
## 2 Albania AL ALB 8
## 3 Algeria DZ DZA 12
## 4 American Samoa AS ASM 16
## 5 Andorra AD AND 20
## 6 Angola AO AGO 24
merged_cc_lmv<-merge(CountryCodes_a, loc_meas_val, by.x = "Alpha.3.code", by.y="LOCATION")
?merge
loc_meas_val$LOCATION
## [1] "AUS" "AUT" "BEL" "BGR" "BRA" "CAN" "CHE" "CHL" "CHN" "COL"
## [11] "CRI" "CYP" "CZE" "DEU" "DNK" "ESP" "EST" "FIN" "FRA" "GBR"
## [21] "GRC" "HRV" "HUN" "IDN" "IND" "IRL" "ISL" "ISR" "ITA" "JPN"
## [31] "KOR" "LTU" "LUX" "LVA" "MEX" "MLT" "NLD" "NOR" "NZL" "OAVG"
## [41] "POL" "PRT" "ROU" "RUS" "SVK" "SVN" "SWE" "TUR" "USA" "ZAF"
## [51] "AUS" "AUT" "BEL" "BGR" "BRA" "CAN" "CHE" "CHL" "CHN" "COL"
## [61] "CRI" "CYP" "CZE" "DEU" "DNK" "ESP" "EST" "FIN" "FRA" "GBR"
## [71] "GRC" "HRV" "HUN" "IDN" "IND" "IRL" "ISL" "ISR" "ITA" "JPN"
## [81] "KOR" "LTU" "LUX" "LVA" "MEX" "MLT" "NLD" "NOR" "NZL" "OAVG"
## [91] "POL" "PRT" "ROU" "RUS" "SVK" "SVN" "SWE" "TUR" "USA" "ZAF"
CountryCodes_a$Alpha.3.code
## [1] "AFG" "ALB" "DZA" "ASM" "AND" "AGO" "AIA" "ATA" "ATG" "ARG" "ARM" "ABW"
## [13] "AUS" "AUT" "AZE" "BHS" "BHR" "BGD" "BRB" "BLR" "BEL" "BLZ" "BEN" "BMU"
## [25] "BTN" "BOL" "BES" "BIH" "BWA" "BVT" "BRA" "IOT" "BRN" "BGR" "BFA" "BDI"
## [37] "CPV" "KHM" "CMR" "CAN" "CYM" "CAF" "TCD" "CHL" "CHN" "CXR" "CCK" "COL"
## [49] "COM" "COD" "COG" "COK" "CRI" "HRV" "CUB" "CUW" "CYP" "CZE" "CIV" "DNK"
## [61] "DJI" "DMA" "DOM" "ECU" "EGY" "SLV" "GNQ" "ERI" "EST" "SWZ" "ETH" "FLK"
## [73] "FRO" "FJI" "FIN" "FRA" "GUF" "PYF" "ATF" "GAB" "GMB" "GEO" "DEU" "GHA"
## [85] "GIB" "GRC" "GRL" "GRD" "GLP" "GUM" "GTM" "GGY" "GIN" "GNB" "GUY" "HTI"
## [97] "HMD" "VAT" "HND" "HKG" "HUN" "ISL" "IND" "IDN" "IRN" "IRQ" "IRL" "IMN"
## [109] "ISR" "ITA" "JAM" "JPN" "JEY" "JOR" "KAZ" "KEN" "KIR" "PRK" "KOR" "KWT"
## [121] "KGZ" "LAO" "LVA" "LBN" "LSO" "LBR" "LBY" "LIE" "LTU" "LUX" "MAC" "MDG"
## [133] "MWI" "MYS" "MDV" "MLI" "MLT" "MHL" "MTQ" "MRT" "MUS" "MYT" "MEX" "FSM"
## [145] "MDA" "MCO" "MNG" "MNE" "MSR" "MAR" "MOZ" "MMR" "NAM" "NRU" "NPL" "NLD"
## [157] "NCL" "NZL" "NIC" "NER" "NGA" "NIU" "NFK" "MNP" "NOR" "OMN" "PAK" "PLW"
## [169] "PSE" "PAN" "PNG" "PRY" "PER" "PHL" "PCN" "POL" "PRT" "PRI" "QAT" "MKD"
## [181] "ROU" "RUS" "RWA" "REU" "BLM" "SHN" "KNA" "LCA" "MAF" "SPM" "VCT" "WSM"
## [193] "SMR" "STP" "SAU" "SEN" "SRB" "SYC" "SLE" "SGP" "SXM" "SVK" "SVN" "SLB"
## [205] "SOM" "ZAF" "SGS" "SSD" "ESP" "LKA" "SDN" "SUR" "SJM" "SWE" "CHE" "SYR"
## [217] "TWN" "TJK" "TZA" "THA" "TLS" "TGO" "TKL" "TON" "TTO" "TUN" "TUR" "TKM"
## [229] "TCA" "TUV" "UGA" "UKR" "ARE" "GBR" "UMI" "USA" "URY" "UZB" "VUT" "VEN"
## [241] "VNM" "VGB" "VIR" "WLF" "ESH" "YEM" "ZMB" "ZWE" "ALA"
view(merged_cc_lmv)
view(merged_cc_lmv)
head(merged_cc_lmv)
## Alpha.3.code Country Alpha.2.code Numeric MEASURE Value
## 1 AUS Australia AU 36 PC_GDP 7.054312
## 2 AUS Australia AU 36 USD_CAP 1943.500979
## 3 AUT Austria AT 40 PC_GDP 8.063878
## 4 AUT Austria AT 40 USD_CAP 2247.828286
## 5 BEL Belgium BE 56 PC_GDP 7.681612
## 6 BEL Belgium BE 56 USD_CAP 2003.684469
#3. (2 pts) Import the situation report from WHO (“COVID-19-SitRep_2020_02_25a.csv”) #from Blackboard. Aggregate the data frame to provide the total number of COVID-19 cases #in each country on Feburary 24th, 2020. The data from other dates can be omitted in the process.
library(readr)
COVID_19_SitRep_2020_02_25a <- read_csv("/Users/annabelle/Desktop/R class/COVID-19-SitRep_2020_02_25a.csv")
## Rows: 64 Columns: 38
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Province.States, Country.Region, WHO.region
## dbl (35): X1.21.20, X1.22.20, X1.23.20, X1.24.20, X1.25.20, X1.26.20, X1.27....
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(COVID_19_SitRep_2020_02_25a)
covid<-COVID_19_SitRep_2020_02_25a
covid
## # A tibble: 64 × 38
## Province.States Country.Region WHO.region X1.21.20 X1.22.20 X1.23.20 X1.24.20
## <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Hubei China Western P… 258 270 375 375
## 2 Guangdong China Western P… 14 17 26 32
## 3 Beijing China Western P… 5 5 10 10
## 4 Shanghai China Western P… 1 2 9 9
## 5 Chongqing China Western P… NA 1 5 5
## 6 Zhejiang China Western P… NA 5 5 5
## 7 Jiangxi China Western P… NA 1 2 2
## 8 Sichuan China Western P… NA 1 2 2
## 9 Tianjin China Western P… NA 2 2 2
## 10 Henan China Western P… NA 1 1 1
## # … with 54 more rows, and 31 more variables: X1.25.20 <dbl>, X1.26.20 <dbl>,
## # X1.27.20 <dbl>, X1.28.20 <dbl>, X1.29.20 <dbl>, X1.30.20 <dbl>,
## # X1.31.20 <dbl>, X2.1.20 <dbl>, X2.2.20 <dbl>, X2.3.20 <dbl>, X2.4.20 <dbl>,
## # X2.5.20 <dbl>, X2.6.20 <dbl>, X2.7.20 <dbl>, X2.8.20 <dbl>, X2.9.20 <dbl>,
## # X2.10.20 <dbl>, X2.11.20 <dbl>, X2.12.20 <dbl>, X2.13.20 <dbl>,
## # X2.14.20 <dbl>, X2.15.20 <dbl>, X2.16.20 <dbl>, X2.17.20 <dbl>,
## # X2.18.20 <dbl>, X2.19.20 <dbl>, X2.20.20 <dbl>, X2.21.20 <dbl>, …
feb24_20_cases<-aggregate(X2.24.20~Country.Region, covid, FUN=sum,na.action = na.omit)
feb24_20_cases
## Country.Region X2.24.20
## 1 Australia 21
## 2 Belgium 1
## 3 Cambodia 1
## 4 Canada 9
## 5 China 77262
## 6 Egypt 1
## 7 Finland 1
## 8 France 12
## 9 Germany 16
## 10 India 3
## 11 Iran 43
## 12 Israel 1
## 13 Italy 124
## 14 Japan 144
## 15 Lebanon 1
## 16 Malaysia 23
## 17 Nepal 1
## 18 Other 695
## 19 Philippines 3
## 20 Republic of Korea 763
## 21 Russia 2
## 22 Singapore 89
## 23 Spain 2
## 24 Sri Lanka 1
## 25 Sweden 1
## 26 Thailand 35
## 27 United Arab Emirates 13
## 28 United Kingdom 9
## 29 United States of America 15
## 30 Vietnam 16
head(feb24_20_cases)
## Country.Region X2.24.20
## 1 Australia 21
## 2 Belgium 1
## 3 Cambodia 1
## 4 Canada 9
## 5 China 77262
## 6 Egypt 1
#4. (2 pt) Merge the data frames from question 2 and 3. Note: some countries do not #have health care spending data, so NAs will appear.
covid_cc_lmv<-merge(merged_cc_lmv, feb24_20_cases, by.x="Country", by.y="Country.Region")
covid_cc_lmv
## Country Alpha.3.code Alpha.2.code Numeric MEASURE
## 1 Australia AUS AU 36 PC_GDP
## 2 Australia AUS AU 36 USD_CAP
## 3 Belgium BEL BE 56 USD_CAP
## 4 Belgium BEL BE 56 PC_GDP
## 5 Canada CAN CA 124 USD_CAP
## 6 Canada CAN CA 124 PC_GDP
## 7 China CHN CN 156 PC_GDP
## 8 China CHN CN 156 USD_CAP
## 9 Finland FIN FI 246 PC_GDP
## 10 Finland FIN FI 246 USD_CAP
## 11 France FRA FR 250 USD_CAP
## 12 France FRA FR 250 PC_GDP
## 13 Germany DEU DE 276 PC_GDP
## 14 Germany DEU DE 276 USD_CAP
## 15 India IND IN 356 USD_CAP
## 16 India IND IN 356 PC_GDP
## 17 Israel ISR IL 376 USD_CAP
## 18 Israel ISR IL 376 PC_GDP
## 19 Italy ITA IT 380 PC_GDP
## 20 Italy ITA IT 380 USD_CAP
## 21 Japan JPN JP 392 PC_GDP
## 22 Japan JPN JP 392 USD_CAP
## 23 Republic of Korea KOR KR 410 PC_GDP
## 24 Republic of Korea KOR KR 410 USD_CAP
## 25 Russia RUS RU 643 USD_CAP
## 26 Russia RUS RU 643 PC_GDP
## 27 Spain ESP ES 724 USD_CAP
## 28 Spain ESP ES 724 PC_GDP
## 29 Sweden SWE SE 752 USD_CAP
## 30 Sweden SWE SE 752 PC_GDP
## 31 United States of America USA US 840 USD_CAP
## 32 United States of America USA US 840 PC_GDP
## 33 Vietnam GBR GB 826 PC_GDP
## 34 Vietnam GBR GB 826 USD_CAP
## Value X2.24.20
## 1 7.054312 21
## 2 1943.500979 21
## 3 2003.684469 1
## 4 7.681612 1
## 5 2224.677286 9
## 6 8.409408 9
## 7 4.379588 77262
## 8 320.282412 77262
## 9 7.266980 1
## 10 1704.589531 1
## 11 2900.222485 12
## 12 9.742818 12
## 13 9.178187 16
## 14 2501.734792 16
## 15 140.883412 3
## 16 3.773647 3
## 17 1872.785292 1
## 18 6.822250 1
## 19 7.949452 124
## 20 2285.314548 124
## 21 7.155245 144
## 22 1758.602571 144
## 23 4.169367 763
## 24 821.433184 763
## 25 1056.636056 2
## 26 5.050471 2
## 27 1337.465857 2
## 28 6.512184 2
## 29 2127.542449 1
## 30 8.035286 1
## 31 4124.240000 15
## 32 11.881265 15
## 33 6.274204 16
## 34 1474.662714 16
view(covid_cc_lmv)
head(covid_cc_lmv)
## Country Alpha.3.code Alpha.2.code Numeric MEASURE Value X2.24.20
## 1 Australia AUS AU 36 PC_GDP 7.054312 21
## 2 Australia AUS AU 36 USD_CAP 1943.500979 21
## 3 Belgium BEL BE 56 USD_CAP 2003.684469 1
## 4 Belgium BEL BE 56 PC_GDP 7.681612 1
## 5 Canada CAN CA 124 USD_CAP 2224.677286 9
## 6 Canada CAN CA 124 PC_GDP 8.409408 9
#5. (1 pts) Plot COVID-19 cases on Feb 24th as a function of the total number of #dollars spent per capita (USD_CAP) on health care
plot<-subset(covid_cc_lmv, MEASURE=="USD_CAP")
plot
## Country Alpha.3.code Alpha.2.code Numeric MEASURE Value
## 2 Australia AUS AU 36 USD_CAP 1943.5010
## 3 Belgium BEL BE 56 USD_CAP 2003.6845
## 5 Canada CAN CA 124 USD_CAP 2224.6773
## 8 China CHN CN 156 USD_CAP 320.2824
## 10 Finland FIN FI 246 USD_CAP 1704.5895
## 11 France FRA FR 250 USD_CAP 2900.2225
## 14 Germany DEU DE 276 USD_CAP 2501.7348
## 15 India IND IN 356 USD_CAP 140.8834
## 17 Israel ISR IL 376 USD_CAP 1872.7853
## 20 Italy ITA IT 380 USD_CAP 2285.3145
## 22 Japan JPN JP 392 USD_CAP 1758.6026
## 24 Republic of Korea KOR KR 410 USD_CAP 821.4332
## 25 Russia RUS RU 643 USD_CAP 1056.6361
## 27 Spain ESP ES 724 USD_CAP 1337.4659
## 29 Sweden SWE SE 752 USD_CAP 2127.5424
## 31 United States of America USA US 840 USD_CAP 4124.2400
## 34 Vietnam GBR GB 826 USD_CAP 1474.6627
## X2.24.20
## 2 21
## 3 1
## 5 9
## 8 77262
## 10 1
## 11 12
## 14 16
## 15 3
## 17 1
## 20 124
## 22 144
## 24 763
## 25 2
## 27 2
## 29 1
## 31 15
## 34 16
view(plot)
plot(Value~X2.24.20, plot, xlab="Total dollars spend per capita", ylab= "COVID-19 cases on Feb 24th")