Covid_19 Edx Project
Loaded necessary libraries
library(httr)
library(rvest)
TASK 1: Get a COVID-19 pandemic Wiki page using HTTP request
get_wiki_covid19_page <- function() {
wiki_base_url <- "https://en.wikipedia.org/w/index.php"
url_param <- list(title = "Template:COVID-19_testing_by_country")
# - Use the `GET` function in httr library with a `url` argument and a `query` arugment to get a HTTP response
result <- GET(url = wiki_base_url, query = url_param)
# Use the `return` function to return the response
return(result)
}
# Call the get_wiki_covid19_page function and print the response
result <- get_wiki_covid19_page()
result
## Response [https://en.wikipedia.org/w/index.php?title=Template%3ACOVID-19_testing_by_country]
## Date: 2025-11-20 11:04
## Status: 200
## Content-Type: text/html; charset=UTF-8
## Size: 456 kB
## <!DOCTYPE html>
## <html class="client-nojs vector-feature-language-in-header-enabled vector-fea...
## <head>
## <meta charset="UTF-8">
## <title>Template:COVID-19 testing by country - Wikipedia</title>
## <script>(function(){var className="client-js vector-feature-language-in-heade...
## RLSTATE={"ext.globalCssJs.user.styles":"ready","site.styles":"ready","user.st...
## <script>(RLQ=window.RLQ||[]).push(function(){mw.loader.impl(function(){return...
## }];});});</script>
## <link rel="stylesheet" href="/w/load.php?lang=en&modules=ext.cite.styles%...
## ...
step 3 (pre-process the extracted data frame from the previous step,
and export it as a csv file)
#(3a)
preprocess_covid_data_frame <- function(Covid19) {
# Remove the last row
Covid19_frame <- Covid19 [1:172, ]
# We don't need the Units and Ref columns, so can be removed
Covid19_frame ["Ref."] <- NULL
Covid19_frame ["Units.b."] <- NULL
# Renaming the columns
names( Covid19_frame ) <- c("country", "date", "tested", "confirmed", "confirmed.tested.ratio", "tested.population.ratio", "confirmed.population.ratio")
# Convert column data types
Covid19_frame $country <- as.factor( Covid19_frame$country)
Covid19_frame $date <- as.factor( Covid19_frame$date)
Covid19_frame $tested <- as.numeric(gsub(",","", Covid19_frame$tested))
Covid19_frame $confirmed <- as.numeric(gsub(",","", Covid19_frame$confirmed))
Covid19_frame $'confirmed.tested.ratio' <- as.numeric(gsub(",","", Covid19_frame$'confirmed.tested.ratio'))
Covid19_frame $'tested.population.ratio' <- as.numeric(gsub(",","", Covid19_frame$`tested.population.ratio`))
Covid19_frame$'confirmed.population.ratio' <- as.numeric(gsub(",","", Covid19_frame$`confirmed.population.ratio`))
return( Covid19_frame )
}
Covid19_frame <- preprocess_covid_data_frame(Covid19)
Covid19_frame
#(3b)
# export the data frame to a csv file
write.table(Covid19_frame ,"covid_data.csv")
# to get the working directory
vid<-getwd()
vid
## [1] "C:/Users/USER/Desktop/R folder"
setwd("C:/Users/USER/Desktop/R folder")
#get exported
file_path <- paste(vid,sep="","/covid_data.csv")
# file path
file_path
## [1] "C:/Users/USER/Desktop/R folder/covid_data.csv"
file.exists(file_path)
## [1] TRUE
TASK 5: Calculate worldwide COVID testing positive ratio
#The goal of task 5 is to get the total confirmed and tested cases worldwide, and try to figure the overall positive ratio using confirmed cases / tested cases
total_confirmed_cases <- sum(Covid19_data_frame_csv$confirmed)
total_confirmed_cases
## [1] 431434555
total_tested_cases <- sum(Covid19_data_frame_csv$tested)
total_tested_cases
## [1] 5396881644
positive_ratio <- total_confirmed_cases/total_tested_cases
positive_ratio
## [1] 0.07994145
TASK 6: Get a country list which reported their testing data
#The goal of task 6 is to get a catalog or sorted list of countries who have reported their COVID-19 testing data
#get the 'country' column
Covid19_data_frame_csv$country
## [1] "Afghanistan" "Albania" "Algeria"
## [4] "Andorra" "Angola" "Antigua and Barbuda"
## [7] "Argentina" "Armenia" "Australia"
## [10] "Austria" "Azerbaijan" "Bahamas"
## [13] "Bahrain" "Bangladesh" "Barbados"
## [16] "Belarus" "Belgium" "Belize"
## [19] "Benin" "Bhutan" "Bolivia"
## [22] "Bosnia and Herzegovina" "Botswana" "Brazil"
## [25] "Brunei" "Bulgaria" "Burkina Faso"
## [28] "Burundi" "Cambodia" "Cameroon"
## [31] "Canada" "Chad" "Chile"
## [34] "China[c]" "Colombia" "Costa Rica"
## [37] "Croatia" "Cuba" "Cyprus[d]"
## [40] "Czechia" "Denmark[e]" "Djibouti"
## [43] "Dominica" "Dominican Republic" "DR Congo"
## [46] "Ecuador" "Egypt" "El Salvador"
## [49] "Equatorial Guinea" "Estonia" "Eswatini"
## [52] "Ethiopia" "Faroe Islands" "Fiji"
## [55] "Finland" "France[f][g]" "Gabon"
## [58] "Gambia" "Georgia[h]" "Germany"
## [61] "Ghana" "Greece" "Greenland"
## [64] "Grenada" "Guatemala" "Guinea"
## [67] "Guinea-Bissau" "Guyana" "Haiti"
## [70] "Honduras" "Hungary" "Iceland"
## [73] "India" "Indonesia" "Iran"
## [76] "Iraq" "Ireland" "Israel"
## [79] "Italy" "Ivory Coast" "Jamaica"
## [82] "Japan" "Jordan" "Kazakhstan"
## [85] "Kenya" "Kosovo" "Kuwait"
## [88] "Kyrgyzstan" "Laos" "Latvia"
## [91] "Lebanon" "Lesotho" "Liberia"
## [94] "Libya" "Lithuania" "Luxembourg[i]"
## [97] "Madagascar" "Malawi" "Malaysia"
## [100] "Maldives" "Mali" "Malta"
## [103] "Mauritania" "Mauritius" "Mexico"
## [106] "Moldova[j]" "Mongolia" "Montenegro"
## [109] "Morocco" "Mozambique" "Myanmar"
## [112] "Namibia" "Nepal" "Netherlands"
## [115] "New Caledonia" "New Zealand" "Niger"
## [118] "Nigeria" "North Korea" "North Macedonia"
## [121] "Northern Cyprus[k]" "Norway" "Oman"
## [124] "Pakistan" "Palestine" "Panama"
## [127] "Papua New Guinea" "Paraguay" "Peru"
## [130] "Philippines" "Poland" "Portugal"
## [133] "Qatar" "Romania" "Russia"
## [136] "Rwanda" "Saint Kitts and Nevis" "Saint Lucia"
## [139] "Saint Vincent" "San Marino" "Saudi Arabia"
## [142] "Senegal" "Serbia" "Singapore"
## [145] "Slovakia" "Slovenia" "South Africa"
## [148] "South Korea" "South Sudan" "Spain"
## [151] "Sri Lanka" "Sudan" "Sweden"
## [154] "Switzerland[l]" "Taiwan[m]" "Tanzania"
## [157] "Thailand" "Togo" "Trinidad and Tobago"
## [160] "Tunisia" "Turkey" "Uganda"
## [163] "Ukraine" "United Arab Emirates" "United Kingdom"
## [166] "United States" "Uruguay" "Uzbekistan"
## [169] "Venezuela" "Vietnam" "Zambia"
## [172] "Zimbabwe"
# check the class (it should be character)
class(Covid19_data_frame_csv$country)
## [1] "character"
#Sort the countries AtoZ
A_to_Z <- sort(Covid19_data_frame_csv$country,decreasing=FALSE)
A_to_Z
## [1] "Afghanistan" "Albania" "Algeria"
## [4] "Andorra" "Angola" "Antigua and Barbuda"
## [7] "Argentina" "Armenia" "Australia"
## [10] "Austria" "Azerbaijan" "Bahamas"
## [13] "Bahrain" "Bangladesh" "Barbados"
## [16] "Belarus" "Belgium" "Belize"
## [19] "Benin" "Bhutan" "Bolivia"
## [22] "Bosnia and Herzegovina" "Botswana" "Brazil"
## [25] "Brunei" "Bulgaria" "Burkina Faso"
## [28] "Burundi" "Cambodia" "Cameroon"
## [31] "Canada" "Chad" "Chile"
## [34] "China[c]" "Colombia" "Costa Rica"
## [37] "Croatia" "Cuba" "Cyprus[d]"
## [40] "Czechia" "Denmark[e]" "Djibouti"
## [43] "Dominica" "Dominican Republic" "DR Congo"
## [46] "Ecuador" "Egypt" "El Salvador"
## [49] "Equatorial Guinea" "Estonia" "Eswatini"
## [52] "Ethiopia" "Faroe Islands" "Fiji"
## [55] "Finland" "France[f][g]" "Gabon"
## [58] "Gambia" "Georgia[h]" "Germany"
## [61] "Ghana" "Greece" "Greenland"
## [64] "Grenada" "Guatemala" "Guinea"
## [67] "Guinea-Bissau" "Guyana" "Haiti"
## [70] "Honduras" "Hungary" "Iceland"
## [73] "India" "Indonesia" "Iran"
## [76] "Iraq" "Ireland" "Israel"
## [79] "Italy" "Ivory Coast" "Jamaica"
## [82] "Japan" "Jordan" "Kazakhstan"
## [85] "Kenya" "Kosovo" "Kuwait"
## [88] "Kyrgyzstan" "Laos" "Latvia"
## [91] "Lebanon" "Lesotho" "Liberia"
## [94] "Libya" "Lithuania" "Luxembourg[i]"
## [97] "Madagascar" "Malawi" "Malaysia"
## [100] "Maldives" "Mali" "Malta"
## [103] "Mauritania" "Mauritius" "Mexico"
## [106] "Moldova[j]" "Mongolia" "Montenegro"
## [109] "Morocco" "Mozambique" "Myanmar"
## [112] "Namibia" "Nepal" "Netherlands"
## [115] "New Caledonia" "New Zealand" "Niger"
## [118] "Nigeria" "North Korea" "North Macedonia"
## [121] "Northern Cyprus[k]" "Norway" "Oman"
## [124] "Pakistan" "Palestine" "Panama"
## [127] "Papua New Guinea" "Paraguay" "Peru"
## [130] "Philippines" "Poland" "Portugal"
## [133] "Qatar" "Romania" "Russia"
## [136] "Rwanda" "Saint Kitts and Nevis" "Saint Lucia"
## [139] "Saint Vincent" "San Marino" "Saudi Arabia"
## [142] "Senegal" "Serbia" "Singapore"
## [145] "Slovakia" "Slovenia" "South Africa"
## [148] "South Korea" "South Sudan" "Spain"
## [151] "Sri Lanka" "Sudan" "Sweden"
## [154] "Switzerland[l]" "Taiwan[m]" "Tanzania"
## [157] "Thailand" "Togo" "Trinidad and Tobago"
## [160] "Tunisia" "Turkey" "Uganda"
## [163] "Ukraine" "United Arab Emirates" "United Kingdom"
## [166] "United States" "Uruguay" "Uzbekistan"
## [169] "Venezuela" "Vietnam" "Zambia"
## [172] "Zimbabwe"
#Sort the countries ZtoA
Z_to_A <- sort(Covid19_data_frame_csv$country,decreasing=TRUE)
#Print the sorted ZtoA list
Z_to_A
## [1] "Zimbabwe" "Zambia" "Vietnam"
## [4] "Venezuela" "Uzbekistan" "Uruguay"
## [7] "United States" "United Kingdom" "United Arab Emirates"
## [10] "Ukraine" "Uganda" "Turkey"
## [13] "Tunisia" "Trinidad and Tobago" "Togo"
## [16] "Thailand" "Tanzania" "Taiwan[m]"
## [19] "Switzerland[l]" "Sweden" "Sudan"
## [22] "Sri Lanka" "Spain" "South Sudan"
## [25] "South Korea" "South Africa" "Slovenia"
## [28] "Slovakia" "Singapore" "Serbia"
## [31] "Senegal" "Saudi Arabia" "San Marino"
## [34] "Saint Vincent" "Saint Lucia" "Saint Kitts and Nevis"
## [37] "Rwanda" "Russia" "Romania"
## [40] "Qatar" "Portugal" "Poland"
## [43] "Philippines" "Peru" "Paraguay"
## [46] "Papua New Guinea" "Panama" "Palestine"
## [49] "Pakistan" "Oman" "Norway"
## [52] "Northern Cyprus[k]" "North Macedonia" "North Korea"
## [55] "Nigeria" "Niger" "New Zealand"
## [58] "New Caledonia" "Netherlands" "Nepal"
## [61] "Namibia" "Myanmar" "Mozambique"
## [64] "Morocco" "Montenegro" "Mongolia"
## [67] "Moldova[j]" "Mexico" "Mauritius"
## [70] "Mauritania" "Malta" "Mali"
## [73] "Maldives" "Malaysia" "Malawi"
## [76] "Madagascar" "Luxembourg[i]" "Lithuania"
## [79] "Libya" "Liberia" "Lesotho"
## [82] "Lebanon" "Latvia" "Laos"
## [85] "Kyrgyzstan" "Kuwait" "Kosovo"
## [88] "Kenya" "Kazakhstan" "Jordan"
## [91] "Japan" "Jamaica" "Ivory Coast"
## [94] "Italy" "Israel" "Ireland"
## [97] "Iraq" "Iran" "Indonesia"
## [100] "India" "Iceland" "Hungary"
## [103] "Honduras" "Haiti" "Guyana"
## [106] "Guinea-Bissau" "Guinea" "Guatemala"
## [109] "Grenada" "Greenland" "Greece"
## [112] "Ghana" "Germany" "Georgia[h]"
## [115] "Gambia" "Gabon" "France[f][g]"
## [118] "Finland" "Fiji" "Faroe Islands"
## [121] "Ethiopia" "Eswatini" "Estonia"
## [124] "Equatorial Guinea" "El Salvador" "Egypt"
## [127] "Ecuador" "DR Congo" "Dominican Republic"
## [130] "Dominica" "Djibouti" "Denmark[e]"
## [133] "Czechia" "Cyprus[d]" "Cuba"
## [136] "Croatia" "Costa Rica" "Colombia"
## [139] "China[c]" "Chile" "Chad"
## [142] "Canada" "Cameroon" "Cambodia"
## [145] "Burundi" "Burkina Faso" "Bulgaria"
## [148] "Brunei" "Brazil" "Botswana"
## [151] "Bosnia and Herzegovina" "Bolivia" "Bhutan"
## [154] "Benin" "Belize" "Belgium"
## [157] "Belarus" "Barbados" "Bangladesh"
## [160] "Bahrain" "Bahamas" "Azerbaijan"
## [163] "Austria" "Australia" "Armenia"
## [166] "Argentina" "Antigua and Barbuda" "Angola"
## [169] "Andorra" "Algeria" "Albania"
## [172] "Afghanistan"
TASK 7: Identify countries names with a specific pattern
#The goal of task 7 is using a regular expression to find any countries start with United
matches <- regexpr("United.+",Covid19_data_frame_csv$country)
regmatches(Covid19_data_frame_csv$country,matches)
## [1] "United Arab Emirates" "United Kingdom" "United States"
TASK 8: Pick two countries you are interested, and then review their
testing data
#The goal of task 8 is to compare the COVID-19 test data between two countries, you will need to select two rows
#from the dataframe, and select country, confirmed, confirmed-population-ratio columns
country_a <- Covid19_data_frame_csv[118,c(1,4,7)]
country_a
country_b <- Covid19_data_frame_csv[80,c(1,4,7)]
country_b
TASK 9: Compare which one of the selected countries has a larger
ratio of confirmed cases to population
#The goal of task 9 is to find out which country you have selected before has larger ratio of confirmed cases to population, which may
#indicate that country has higher COVID-19 infection risk
# Use if-else statement
if(country_a[2]>country_b[2]){
print("Nigeria has more populations afected with covid19 hence is not safer than Ivory Coast")
}else{
print("Ivory Coast has more populations afected with covid19 hence is not safer than Nigeria")
}
## [1] "Nigeria has more populations afected with covid19 hence is not safer than Ivory Coast"
TASK 10: Find countries with confirmed to population ratio rate less
than a threshold
#The goal of task 10 is to find out which countries have the confirmed to population ratio less than 1%, it may indicate the risk of those countries are relatively low
threshold <- 1
threshold
## [1] 1
Covid19_data_frame_csv[Covid19_data_frame_csv$confirmed.population.ratio<threshold,"country"]
## [1] "Afghanistan" "Algeria" "Angola"
## [4] "Antigua and Barbuda" "Bangladesh" "Benin"
## [7] "Brunei" "Burkina Faso" "Burundi"
## [10] "Cambodia" "Cameroon" "Chad"
## [13] "China[c]" "DR Congo" "Egypt"
## [16] "Ethiopia" "Gabon" "Gambia"
## [19] "Ghana" "Grenada" "Guinea"
## [22] "Guinea-Bissau" "Haiti" "Ivory Coast"
## [25] "Japan" "Kenya" "Laos"
## [28] "Liberia" "Madagascar" "Malawi"
## [31] "Mali" "Mauritania" "Mauritius"
## [34] "Mozambique" "Myanmar" "New Caledonia"
## [37] "Niger" "Nigeria" "North Korea"
## [40] "Pakistan" "Papua New Guinea" "Rwanda"
## [43] "Senegal" "South Korea" "South Sudan"
## [46] "Sri Lanka" "Sudan" "Tanzania"
## [49] "Thailand" "Togo" "Uganda"
## [52] "Uzbekistan" "Venezuela"
Visualizing the highest country with comfirmed cases
library(ggplot2)
ggplot(subset,aes(x=reorder(country,confirmed),y=confirmed,fill = country))+
geom_col()+
coord_flip()+
labs(title = "highest country with confirmed cases",
x="country",
y="confirmed"
)+
theme_minimal()
