Dataset 1 - US Steel Imports Data (Ajay Arora)

Some analysis could be done on foreign trade e.g. steel imports, etc.

library(dplyr)
library(tidyr)
library(stringr)
library(rvest)
library(data.table)
library(tidyverse)
library(kableExtra)
library(knitr)
library(scales)
# Import the dataset CSV file into R. In order to remove the metadata from the top of the file, and to exclude the Total Census row, we add the "skip" argument to "read.csv()" and skip the first 15 lines.
steel_csv <- "https://raw.githubusercontent.com/stephen-haslett/data-607/master/project2-steel-dataset.csv"
steel_dataframe <- read.csv(steel_csv, header = FALSE, stringsAsFactors = FALSE, skip = 15)

# Select only rows and columns that have data. The CSV file contains 47 rows of solid data spread across 13 columns. The rest of the file's content is either empty space, or information that is irrelevant to our analysis. We are only interested in the first 9 columns.
steel_dataframe <- steel_dataframe[1:47, 1:9]

# Replace dashes and empty spaces in the data with NA values.
steel_dataframe[steel_dataframe == ""] <- NA
steel_dataframe[steel_dataframe == "(-)"] <- NA

# Add column names to the table.
names(steel_dataframe) <- c('Country', 'mar18_Quantity', 'mar18_Cost', 'feb18_Quantity', 'feb18_Cost', 'mar17_Quantity', 'mar17_Cost', 'feb17_Quantity', 'feb17_Cost')

# Convert the table to long format.
steel_long = gather(steel_dataframe, type, value, 2:9, factor_key = TRUE) %>% separate(type, c('period','type')) %>% spread(period, value, convert = TRUE)

# Strip commas out of the numeric values in the dataframe so that we can sum them.
steel_long$feb17 <- as.numeric(gsub(",", "", steel_long$feb17))
steel_long$mar17 <- as.numeric(gsub(",", "", steel_long$mar17))
steel_long$feb18 <- as.numeric(gsub(",", "", steel_long$feb18))
steel_long$mar18 <- as.numeric(gsub(",", "", steel_long$mar18))

# Sum values and add the Total column to the table.
summed_steel_data <- steel_long %>% rowwise() %>% mutate(Total = sum(`feb17`, `mar17`, `feb18`, `mar18`))

# Order results by total cost.
ordered_by_cost <- filter(summed_steel_data, type == 'Cost', !is.na(Total))

# Order results by total quantity.
ordered_by_quantity <- filter(summed_steel_data, type == "Quantity", !is.na(Total))

highest_cost <- ordered_by_cost %>% select(-type) %>% arrange(desc(Total))
# Convert numeric column values to dollar amounts.
dollar_format()(c(1:10 * 10))
##  [1] "$10"  "$20"  "$30"  "$40"  "$50"  "$60"  "$70"  "$80"  "$90"  "$100"
highest_cost$Total <- dollar(highest_cost$Total)
highest_cost$feb17 <- dollar(highest_cost$feb17)
highest_cost$mar17 <- dollar(highest_cost$mar17)
highest_cost$feb18 <- dollar(highest_cost$feb18)
highest_cost$mar18 <- dollar(highest_cost$mar18)

Top 10 Highest US Steel Import Costs per Country of Origin

highest_cost %>% head(10) %>% kable() %>% kable_styling()
Country feb17 feb18 mar17 mar18 Total
Europe $124,861 $156,209 $164,523 $183,656 $629,249
European Union $117,190 $150,295 $156,743 $177,054 $601,282
Euro Area $88,961 $120,555 $117,668 $137,157 $464,341
Pacific Rim Countries $81,213 $137,556 $95,804 $108,106 $422,679
Germany $36,296 $39,553 $45,631 $45,670 $167,150
Italy $21,727 $32,736 $34,270 $43,720 $132,453
Taiwan $31,684 $32,163 $31,585 $30,068 $125,500
North America $28,129 $29,088 $29,928 $35,667 $122,812
Other Countries $24,710 $24,798 $41,161 $29,555 $120,224
Japan $16,328 $29,004 $25,298 $31,386 $102,016
highest_quantity <- ordered_by_quantity %>% select(-type) %>% arrange(desc(Total))

# Add the thousands seperator back to quantity values. 
highest_quantity$Total <- format(highest_quantity$Total, big.mark = ",", trim = TRUE)
highest_quantity$feb17 <- format(highest_quantity$feb17, big.mark = ",", trim = TRUE)
highest_quantity$feb18 <- format(highest_quantity$mar17, big.mark = ",", trim = TRUE)
highest_quantity$mar17 <- format(highest_quantity$feb18, big.mark = ",", trim = TRUE)
highest_quantity$mar18 <- format(highest_quantity$mar18, big.mark = ",", trim = TRUE)

Top 10 Highest US Steel Import Quantities per Country of Origin

highest_quantity %>% head(10) %>% kable() %>% kable_styling()
Country feb17 feb18 mar17 mar18 Total
Europe 33,255 41,862 41,862 47,170 162,533
European Union 30,980 39,659 39,659 45,085 154,531
Pacific Rim Countries 22,381 25,457 25,457 28,455 121,694
Euro Area 22,224 29,769 29,769 34,848 117,970
Other Countries 10,822 12,567 12,567 11,108 43,594
Taiwan 11,086 10,350 10,350 10,134 42,323
North America 10,007 9,599 9,599 11,284 39,542
Germany 7,975 10,293 10,293 9,790 36,485
Italy 4,929 8,946 8,946 11,699 34,012
Indonesia 1,135 1,859 1,859 951 25,207
lowest_quantity <- ordered_by_quantity %>% select(-type) %>% arrange(Total)

# Add the thousands seperator back to quantity values. 
lowest_quantity$Total <- format(lowest_quantity$Total, big.mark = ",", trim = TRUE)
lowest_quantity$feb17 <- format(lowest_quantity$feb17, big.mark = ",", trim = TRUE)
lowest_quantity$feb18 <- format(lowest_quantity$mar17, big.mark = ",", trim = TRUE)
lowest_quantity$mar17 <- format(lowest_quantity$feb18, big.mark = ",", trim = TRUE)
lowest_quantity$mar18 <- format(lowest_quantity$mar18, big.mark = ",", trim = TRUE)

Top 10 Lowest US Steel Import Quantities per Country of Origin

lowest_quantity %>% head(10) %>% kable() %>% kable_styling()
Country feb17 feb18 mar17 mar18 Total
Singapore 19 19 19 19 67
Netherlands 2 33 33 21 76
Switzerland 195 161 161 148 615
Czech Republic 37 488 488 116 682
Ukraine 271 165 165 343 1,351
Other Pacific Rim 475 366 366 241 1,387
Finland 407 345 345 447 1,721
Brazil 1,031 718 718 1,074 3,275
South/Central America 1,039 729 729 1,083 3,306
Thailand 917 2,188 2,188 123 3,524

Dataset 2 - United Nations World Data - Economic Indicators (Donny Lofland)

Convert the year columns to a year variable.

library(dplyr)
library(tidyr)
library(rvest)
library(data.table)
library(tidyverse)
library(inegiR)
library(knitr)
library(kableExtra)

un_world_web_page <- read_html("http://data.un.org/en/reg/g1.html")
 
table <- html_nodes(un_world_web_page, "table") %>% .[3] %>% html_table(header = TRUE, fill = TRUE)
table_dataframe <- as.data.frame(table)
setnames(table_dataframe, old=c("Var.1","X2005", "X2010", "X2018"), new=c("KPI", "2005", "2010", "2018"))

table_long <- table_dataframe %>% gather(Year, Result, -"KPI", na.rm = FALSE, convert = TRUE)
table_long %>% head(100) %>% kable(format='markdown') %>% kable_styling(bootstrap_options = "basic")
KPI Year Result
GDP: Gross domestic product (million current US$) 2005 47 601 561
GDP growth rate (annual %, const. 2010 prices) 2005 3.8
GDP per capita (current US$) 2005 7 278.0
Employment in agricultured (% of employed) 2005 35.2
Employment in industryd (% of employed) 2005 22.6
Employment in servicesd (% employed) 2005 42.1
Unemployment rated (% of labour force) 2005 6
Labour force participation rated (female/male pop. %) 2005 51.1 / 77.4
Agricultural production index (2004-2006=100) 2005 100
International trade: exports (million current US$) 2005 10 373 445
International trade: imports (million current US$) 2005 10 577 013
International trade: balance (million current US$) 2005 - 203 568
GDP: Gross domestic product (million current US$) 2010 66 010 167
GDP growth rate (annual %, const. 2010 prices) 2010 4.3
GDP per capita (current US$) 2010 9 489.0
Employment in agricultured (% of employed) 2010 30.8
Employment in industryd (% of employed) 2010 23
Employment in servicesd (% employed) 2010 46.2
Unemployment rated (% of labour force) 2010 5.8
Labour force participation rated (female/male pop. %) 2010 49.4 / 76.2
Agricultural production index (2004-2006=100) 2010 113
International trade: exports (million current US$) 2010 15 100 194
International trade: imports (million current US$) 2010 15 261 844
International trade: balance (million current US$) 2010 - 161 649
GDP: Gross domestic product (million current US$) 2018 75 648 868c
GDP growth rate (annual %, const. 2010 prices) 2018 2.4c
GDP per capita (current US$) 2018 10 134.0c
Employment in agricultured (% of employed) 2018 26
Employment in industryd (% of employed) 2018 22.4
Employment in servicesd (% employed) 2018 51.7
Unemployment rated (% of labour force) 2018 5.5
Labour force participation rated (female/male pop. %) 2018 48.5 / 75.0
Agricultural production index (2004-2006=100) 2018 127c
International trade: exports (million current US$) 2018 17 177 323e
International trade: imports (million current US$) 2018 17 507 492e
International trade: balance (million current US$) 2018 - 330 169e