library(dplyr)
library(tidyr)
library(stringr)
library(rvest)
library(data.table)
library(tidyverse)
library(kableExtra)
library(knitr)
library(scales)
# Import the dataset CSV file into R. In order to remove the metadata from the top of the file, and to exclude the Total Census row, we add the "skip" argument to "read.csv()" and skip the first 15 lines.
steel_csv <- "https://raw.githubusercontent.com/stephen-haslett/data-607/master/project2-steel-dataset.csv"
steel_dataframe <- read.csv(steel_csv, header = FALSE, stringsAsFactors = FALSE, skip = 15)
# Select only rows and columns that have data. The CSV file contains 47 rows of solid data spread across 13 columns. The rest of the file's content is either empty space, or information that is irrelevant to our analysis. We are only interested in the first 9 columns.
steel_dataframe <- steel_dataframe[1:47, 1:9]
# Replace dashes and empty spaces in the data with NA values.
steel_dataframe[steel_dataframe == ""] <- NA
steel_dataframe[steel_dataframe == "(-)"] <- NA
# Add column names to the table.
names(steel_dataframe) <- c('Country', 'mar18_Quantity', 'mar18_Cost', 'feb18_Quantity', 'feb18_Cost', 'mar17_Quantity', 'mar17_Cost', 'feb17_Quantity', 'feb17_Cost')
# Convert the table to long format.
steel_long = gather(steel_dataframe, type, value, 2:9, factor_key = TRUE) %>% separate(type, c('period','type')) %>% spread(period, value, convert = TRUE)
# Strip commas out of the numeric values in the dataframe so that we can sum them.
steel_long$feb17 <- as.numeric(gsub(",", "", steel_long$feb17))
steel_long$mar17 <- as.numeric(gsub(",", "", steel_long$mar17))
steel_long$feb18 <- as.numeric(gsub(",", "", steel_long$feb18))
steel_long$mar18 <- as.numeric(gsub(",", "", steel_long$mar18))
# Sum values and add the Total column to the table.
summed_steel_data <- steel_long %>% rowwise() %>% mutate(Total = sum(`feb17`, `mar17`, `feb18`, `mar18`))
# Order results by total cost.
ordered_by_cost <- filter(summed_steel_data, type == 'Cost', !is.na(Total))
# Order results by total quantity.
ordered_by_quantity <- filter(summed_steel_data, type == "Quantity", !is.na(Total))
highest_cost <- ordered_by_cost %>% select(-type) %>% arrange(desc(Total))
# Convert numeric column values to dollar amounts.
dollar_format()(c(1:10 * 10))
## [1] "$10" "$20" "$30" "$40" "$50" "$60" "$70" "$80" "$90" "$100"
highest_cost$Total <- dollar(highest_cost$Total)
highest_cost$feb17 <- dollar(highest_cost$feb17)
highest_cost$mar17 <- dollar(highest_cost$mar17)
highest_cost$feb18 <- dollar(highest_cost$feb18)
highest_cost$mar18 <- dollar(highest_cost$mar18)
highest_cost %>% head(10) %>% kable() %>% kable_styling()
| Country | feb17 | feb18 | mar17 | mar18 | Total |
|---|---|---|---|---|---|
| Europe | $124,861 | $156,209 | $164,523 | $183,656 | $629,249 |
| European Union | $117,190 | $150,295 | $156,743 | $177,054 | $601,282 |
| Euro Area | $88,961 | $120,555 | $117,668 | $137,157 | $464,341 |
| Pacific Rim Countries | $81,213 | $137,556 | $95,804 | $108,106 | $422,679 |
| Germany | $36,296 | $39,553 | $45,631 | $45,670 | $167,150 |
| Italy | $21,727 | $32,736 | $34,270 | $43,720 | $132,453 |
| Taiwan | $31,684 | $32,163 | $31,585 | $30,068 | $125,500 |
| North America | $28,129 | $29,088 | $29,928 | $35,667 | $122,812 |
| Other Countries | $24,710 | $24,798 | $41,161 | $29,555 | $120,224 |
| Japan | $16,328 | $29,004 | $25,298 | $31,386 | $102,016 |
highest_quantity <- ordered_by_quantity %>% select(-type) %>% arrange(desc(Total))
# Add the thousands seperator back to quantity values.
highest_quantity$Total <- format(highest_quantity$Total, big.mark = ",", trim = TRUE)
highest_quantity$feb17 <- format(highest_quantity$feb17, big.mark = ",", trim = TRUE)
highest_quantity$feb18 <- format(highest_quantity$mar17, big.mark = ",", trim = TRUE)
highest_quantity$mar17 <- format(highest_quantity$feb18, big.mark = ",", trim = TRUE)
highest_quantity$mar18 <- format(highest_quantity$mar18, big.mark = ",", trim = TRUE)
highest_quantity %>% head(10) %>% kable() %>% kable_styling()
| Country | feb17 | feb18 | mar17 | mar18 | Total |
|---|---|---|---|---|---|
| Europe | 33,255 | 41,862 | 41,862 | 47,170 | 162,533 |
| European Union | 30,980 | 39,659 | 39,659 | 45,085 | 154,531 |
| Pacific Rim Countries | 22,381 | 25,457 | 25,457 | 28,455 | 121,694 |
| Euro Area | 22,224 | 29,769 | 29,769 | 34,848 | 117,970 |
| Other Countries | 10,822 | 12,567 | 12,567 | 11,108 | 43,594 |
| Taiwan | 11,086 | 10,350 | 10,350 | 10,134 | 42,323 |
| North America | 10,007 | 9,599 | 9,599 | 11,284 | 39,542 |
| Germany | 7,975 | 10,293 | 10,293 | 9,790 | 36,485 |
| Italy | 4,929 | 8,946 | 8,946 | 11,699 | 34,012 |
| Indonesia | 1,135 | 1,859 | 1,859 | 951 | 25,207 |
lowest_quantity <- ordered_by_quantity %>% select(-type) %>% arrange(Total)
# Add the thousands seperator back to quantity values.
lowest_quantity$Total <- format(lowest_quantity$Total, big.mark = ",", trim = TRUE)
lowest_quantity$feb17 <- format(lowest_quantity$feb17, big.mark = ",", trim = TRUE)
lowest_quantity$feb18 <- format(lowest_quantity$mar17, big.mark = ",", trim = TRUE)
lowest_quantity$mar17 <- format(lowest_quantity$feb18, big.mark = ",", trim = TRUE)
lowest_quantity$mar18 <- format(lowest_quantity$mar18, big.mark = ",", trim = TRUE)
lowest_quantity %>% head(10) %>% kable() %>% kable_styling()
| Country | feb17 | feb18 | mar17 | mar18 | Total |
|---|---|---|---|---|---|
| Singapore | 19 | 19 | 19 | 19 | 67 |
| Netherlands | 2 | 33 | 33 | 21 | 76 |
| Switzerland | 195 | 161 | 161 | 148 | 615 |
| Czech Republic | 37 | 488 | 488 | 116 | 682 |
| Ukraine | 271 | 165 | 165 | 343 | 1,351 |
| Other Pacific Rim | 475 | 366 | 366 | 241 | 1,387 |
| Finland | 407 | 345 | 345 | 447 | 1,721 |
| Brazil | 1,031 | 718 | 718 | 1,074 | 3,275 |
| South/Central America | 1,039 | 729 | 729 | 1,083 | 3,306 |
| Thailand | 917 | 2,188 | 2,188 | 123 | 3,524 |
library(dplyr)
library(tidyr)
library(rvest)
library(data.table)
library(tidyverse)
library(inegiR)
library(knitr)
library(kableExtra)
un_world_web_page <- read_html("http://data.un.org/en/reg/g1.html")
table <- html_nodes(un_world_web_page, "table") %>% .[3] %>% html_table(header = TRUE, fill = TRUE)
table_dataframe <- as.data.frame(table)
setnames(table_dataframe, old=c("Var.1","X2005", "X2010", "X2018"), new=c("KPI", "2005", "2010", "2018"))
table_long <- table_dataframe %>% gather(Year, Result, -"KPI", na.rm = FALSE, convert = TRUE)
table_long %>% head(100) %>% kable(format='markdown') %>% kable_styling(bootstrap_options = "basic")
| KPI | Year | Result |
|---|---|---|
| GDP: Gross domestic product (million current US$) | 2005 | 47 601 561 |
| GDP growth rate (annual %, const. 2010 prices) | 2005 | 3.8 |
| GDP per capita (current US$) | 2005 | 7 278.0 |
| Employment in agricultured (% of employed) | 2005 | 35.2 |
| Employment in industryd (% of employed) | 2005 | 22.6 |
| Employment in servicesd (% employed) | 2005 | 42.1 |
| Unemployment rated (% of labour force) | 2005 | 6 |
| Labour force participation rated (female/male pop. %) | 2005 | 51.1 / 77.4 |
| Agricultural production index (2004-2006=100) | 2005 | 100 |
| International trade: exports (million current US$) | 2005 | 10 373 445 |
| International trade: imports (million current US$) | 2005 | 10 577 013 |
| International trade: balance (million current US$) | 2005 | - 203 568 |
| GDP: Gross domestic product (million current US$) | 2010 | 66 010 167 |
| GDP growth rate (annual %, const. 2010 prices) | 2010 | 4.3 |
| GDP per capita (current US$) | 2010 | 9 489.0 |
| Employment in agricultured (% of employed) | 2010 | 30.8 |
| Employment in industryd (% of employed) | 2010 | 23 |
| Employment in servicesd (% employed) | 2010 | 46.2 |
| Unemployment rated (% of labour force) | 2010 | 5.8 |
| Labour force participation rated (female/male pop. %) | 2010 | 49.4 / 76.2 |
| Agricultural production index (2004-2006=100) | 2010 | 113 |
| International trade: exports (million current US$) | 2010 | 15 100 194 |
| International trade: imports (million current US$) | 2010 | 15 261 844 |
| International trade: balance (million current US$) | 2010 | - 161 649 |
| GDP: Gross domestic product (million current US$) | 2018 | 75 648 868c |
| GDP growth rate (annual %, const. 2010 prices) | 2018 | 2.4c |
| GDP per capita (current US$) | 2018 | 10 134.0c |
| Employment in agricultured (% of employed) | 2018 | 26 |
| Employment in industryd (% of employed) | 2018 | 22.4 |
| Employment in servicesd (% employed) | 2018 | 51.7 |
| Unemployment rated (% of labour force) | 2018 | 5.5 |
| Labour force participation rated (female/male pop. %) | 2018 | 48.5 / 75.0 |
| Agricultural production index (2004-2006=100) | 2018 | 127c |
| International trade: exports (million current US$) | 2018 | 17 177 323e |
| International trade: imports (million current US$) | 2018 | 17 507 492e |
| International trade: balance (million current US$) | 2018 | - 330 169e |