library(readxl)
## Warning: package 'readxl' was built under R version 4.5.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read_excel("C:/Users/mcquilt1/Downloads/Book1.xlsx")
existing_reporters <- unique(df$Reporter)
# Filter imports where Trade Partner is NOT already a reporter
imports <- df %>%
filter(`Trade Direction` == "Import",
!`Trade Partner` %in% existing_reporters)
# Create mirrored export rows
mirror <- imports %>%
mutate(
original_reporter = Reporter,
Reporter = `Trade Partner`,
`Trade Partner` = original_reporter,
`Trade Direction` = "Export"
) %>%
select(-original_reporter)
df_extended <- bind_rows(df, mirror)
df_extended <- df_extended %>%
filter(`Trade Direction` != "Import")
df_extended <- df_extended %>%
group_by(`Trade Partner`, `HS4 Code`) %>%
mutate(`Total export USD` = sum(USD, na.rm = TRUE)) %>%
ungroup()
df_extended <- df_extended %>%
mutate(`Total share of exports` = USD / `Total export USD`)
write.csv(df_extended, "C:/Users/mcquilt1/Downloads/MAINDATA.csv", row.names = FALSE)
gulf_countries <- c(
"Saudi Arabia", "United Arab Emirates", "Kuwait",
"Qatar", "Bahrain", "Oman"
)
df_extended <- df_extended %>%
mutate(A = ifelse(Reporter %in% gulf_countries, 1, 0))
df_gulf <- df_extended %>%
mutate(
Reporter = ifelse(
`HS4 Code` == 2709 & Reporter %in% gulf_countries,
"Gulf Countries",
Reporter
)
) %>%
filter(
(`HS4 Code` == 2709 & Reporter == "Gulf Countries") |
(`HS4 Code` == 2710)
)
df_gulf <- df_gulf %>%
group_by(
Year,
Reporter,
`Trade Direction`,
`Trade Partner`,
`HS4 Code`,
Income
) %>%
summarise(
USD = sum(USD, na.rm = TRUE),
`Total export USD` = sum(`Total export USD`, na.rm = TRUE),
`Total share of exports` = sum(`Total share of exports`, na.rm = TRUE),
.groups = "drop"
)
top10_2709 <- df_gulf %>%
filter(`HS4 Code` == 2709) %>%
group_by(`Trade Partner`) %>%
summarise(total_usd = sum(USD, na.rm = TRUE), .groups = "drop") %>%
slice_max(order_by = total_usd, n = 10, with_ties = FALSE)
top10_2709 <- df_gulf %>%
filter(`HS4 Code` == 2709) %>%
group_by(`Trade Partner`) %>%
summarise(total_usd = sum(USD, na.rm = TRUE), .groups = "drop") %>%
arrange(desc(total_usd)) %>%
{
bind_rows(
slice_head(., n = 10),
filter(., `Trade Partner` == "Canada")
)
} %>%
distinct(`Trade Partner`, .keep_all = TRUE)
#IF YOU WANT ONLY LOW INCOME!!!!!
# df_gulf <- df_gulf %>%
# filter(
# `HS4 Code` != 2710 |
# Income == "Low income"
# )
df_gulf_filtered <- df_gulf %>%
# keep all 2709 for top 10 partners
filter(
(`HS4 Code` == 2709 & `Trade Partner` %in% top10_2709$`Trade Partner`) |
(`HS4 Code` == 2710 & Reporter %in% top10_2709$`Trade Partner`)
) %>%
group_by(Reporter) %>%
filter(
`HS4 Code` != 2710 | rank(-USD) <= 5
) %>%
ungroup()
library(dplyr)
library(networkD3)
## Warning: package 'networkD3' was built under R version 4.5.3
crude <- df_gulf_filtered %>%
filter(`HS4 Code` == 2709) %>%
select(source = Reporter,
target = `Trade Partner`,
value = USD)
refined <- df_gulf_filtered %>%
filter(`HS4 Code` == 2710) %>%
select(source = Reporter,
target = `Trade Partner`,
value = USD)
nodes <- data.frame(
name = unique(c(crude$source, crude$target,
refined$source, refined$target))
)
links_crude <- crude %>%
transmute(
source = match(source, nodes$name) - 1,
target = match(target, nodes$name) - 1,
value = value
)
links_refined <- refined %>%
transmute(
source = match(source, nodes$name) - 1,
target = match(target, nodes$name) - 1,
value = value
)
links <- bind_rows(links_crude, links_refined)
sankeyNetwork(
Links = links,
Nodes = nodes,
Source = "source",
Target = "target",
Value = "value",
NodeID = "name",
fontSize = 12,
nodeWidth = 10
)
## Links is a tbl_df. Converting to a plain data frame.
df_gulf_filtered <- df_gulf_filtered %>%
mutate(`Total share of exports` = `Total share of exports` * 100)
crude <- df_gulf_filtered %>%
filter(`HS4 Code` == 2709) %>%
select(source = Reporter,
target = `Trade Partner`,
value = `Total share of exports`)
refined <- df_gulf_filtered %>%
filter(`HS4 Code` == 2710) %>%
select(source = Reporter,
target = `Trade Partner`,
value = `Total share of exports`)
nodes <- data.frame(
name = unique(c(crude$source, crude$target,
refined$source, refined$target))
)
links_crude <- crude %>%
transmute(
source = match(source, nodes$name) - 1,
target = match(target, nodes$name) - 1,
value = value
)
links_refined <- refined %>%
transmute(
source = match(source, nodes$name) - 1,
target = match(target, nodes$name) - 1,
value = value
)
links <- bind_rows(links_crude, links_refined)
sankeyNetwork(
Links = links,
Nodes = nodes,
Source = "source",
Target = "target",
Value = "value",
NodeID = "name",
fontSize = 12,
nodeWidth = 10
)
## Links is a tbl_df. Converting to a plain data frame.