library(readxl)
## Warning: package 'readxl' was built under R version 4.5.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- read_excel("C:/Users/mcquilt1/Downloads/Book1.xlsx")


existing_reporters <- unique(df$Reporter)

# Filter imports where Trade Partner is NOT already a reporter
imports <- df %>%
  filter(`Trade Direction` == "Import",
         !`Trade Partner` %in% existing_reporters)

# Create mirrored export rows
mirror <- imports %>%
  mutate(
    original_reporter = Reporter,
    Reporter = `Trade Partner`,
    `Trade Partner` = original_reporter,
    `Trade Direction` = "Export"
  ) %>%
  select(-original_reporter)

df_extended <- bind_rows(df, mirror)

df_extended <- df_extended %>%
  filter(`Trade Direction` != "Import")

df_extended <- df_extended %>%
  group_by(`Trade Partner`, `HS4 Code`) %>%
  mutate(`Total export USD` = sum(USD, na.rm = TRUE)) %>%
  ungroup()

df_extended <- df_extended %>%
  mutate(`Total share of exports` = USD / `Total export USD`)

write.csv(df_extended, "C:/Users/mcquilt1/Downloads/MAINDATA.csv", row.names = FALSE)


gulf_countries <- c(
  "Saudi Arabia", "United Arab Emirates", "Kuwait",
  "Qatar", "Bahrain", "Oman"
)

df_extended <- df_extended %>%
  mutate(A = ifelse(Reporter %in% gulf_countries, 1, 0))

df_gulf <- df_extended %>%
  mutate(
    Reporter = ifelse(
      `HS4 Code` == 2709 & Reporter %in% gulf_countries,
      "Gulf Countries",
      Reporter
    )
  ) %>%
  filter(
    (`HS4 Code` == 2709 & Reporter == "Gulf Countries") |
      (`HS4 Code` == 2710)
  )

df_gulf <- df_gulf %>%
  group_by(
    Year,
    Reporter,
    `Trade Direction`,
    `Trade Partner`,
    `HS4 Code`,
    Income
  ) %>%
  summarise(
    USD = sum(USD, na.rm = TRUE),
    `Total export USD` = sum(`Total export USD`, na.rm = TRUE),
    `Total share of exports` = sum(`Total share of exports`, na.rm = TRUE),
    .groups = "drop"
  ) 



top10_2709 <- df_gulf %>%
  filter(`HS4 Code` == 2709) %>%
  group_by(`Trade Partner`) %>%
  summarise(total_usd = sum(USD, na.rm = TRUE), .groups = "drop") %>%
  slice_max(order_by = total_usd, n = 10, with_ties = FALSE)


top10_2709 <- df_gulf %>%
  filter(`HS4 Code` == 2709) %>%
  group_by(`Trade Partner`) %>%
  summarise(total_usd = sum(USD, na.rm = TRUE), .groups = "drop") %>%
  arrange(desc(total_usd)) %>%
  {
    bind_rows(
      slice_head(., n = 10),                   
      filter(., `Trade Partner` == "Canada")       
    )
  } %>%
  distinct(`Trade Partner`, .keep_all = TRUE)       


#IF YOU WANT ONLY LOW INCOME!!!!!
# df_gulf <- df_gulf %>%
#   filter(
#     `HS4 Code` != 2710 |
#       Income == "Low income"
#   )



df_gulf_filtered <- df_gulf %>%
  # keep all 2709 for top 10 partners
  filter(
    (`HS4 Code` == 2709 & `Trade Partner` %in% top10_2709$`Trade Partner`) |
      (`HS4 Code` == 2710 & Reporter %in% top10_2709$`Trade Partner`)
  ) %>%
  group_by(Reporter) %>%
  filter(
    `HS4 Code` != 2710 | rank(-USD) <= 5
  ) %>%
  ungroup()

library(dplyr)
library(networkD3)
## Warning: package 'networkD3' was built under R version 4.5.3
crude <- df_gulf_filtered %>%
  filter(`HS4 Code` == 2709) %>%
  select(source = Reporter,
         target = `Trade Partner`,
         value = USD)

refined <- df_gulf_filtered %>%
  filter(`HS4 Code` == 2710) %>%
  select(source = Reporter,
         target = `Trade Partner`,
         value = USD)


nodes <- data.frame(
  name = unique(c(crude$source, crude$target,
                  refined$source, refined$target))
)


links_crude <- crude %>%
  transmute(
    source = match(source, nodes$name) - 1,
    target = match(target, nodes$name) - 1,
    value = value
  )


links_refined <- refined %>%
  transmute(
    source = match(source, nodes$name) - 1,
    target = match(target, nodes$name) - 1,
    value = value
  )


links <- bind_rows(links_crude, links_refined)


sankeyNetwork(
  Links = links,
  Nodes = nodes,
  Source = "source",
  Target = "target",
  Value = "value",
  NodeID = "name",
  fontSize = 12,
  nodeWidth = 10
)
## Links is a tbl_df. Converting to a plain data frame.
df_gulf_filtered <- df_gulf_filtered %>%
  mutate(`Total share of exports` = `Total share of exports` * 100)


crude <- df_gulf_filtered %>%
  filter(`HS4 Code` == 2709) %>%
  select(source = Reporter,
         target = `Trade Partner`,
         value = `Total share of exports`)

refined <- df_gulf_filtered %>%
  filter(`HS4 Code` == 2710) %>%
  select(source = Reporter,
         target = `Trade Partner`,
         value = `Total share of exports`)


nodes <- data.frame(
  name = unique(c(crude$source, crude$target,
                  refined$source, refined$target))
)


links_crude <- crude %>%
  transmute(
    source = match(source, nodes$name) - 1,
    target = match(target, nodes$name) - 1,
    value = value
  )


links_refined <- refined %>%
  transmute(
    source = match(source, nodes$name) - 1,
    target = match(target, nodes$name) - 1,
    value = value
  )


links <- bind_rows(links_crude, links_refined)


sankeyNetwork(
  Links = links,
  Nodes = nodes,
  Source = "source",
  Target = "target",
  Value = "value",
  NodeID = "name",
  fontSize = 12,
  nodeWidth = 10
)
## Links is a tbl_df. Converting to a plain data frame.