install.packages(“readxl”) install.packages(“ggpubr”)

install.packages(“flextable”)

install.packages(“janitor”)

#obviously to make this part work you may have to alter the path to the file to work for you. library(readxl) Firm_level_dataset_Chile <- read_excel(“C:/Users/baile/Downloads/Firm Level Dataset Chile.xlsx”)

Trade_Data_UK <- read_excel(“C:/Users/baile/Downloads/Trade Data for UK.xlsx”)

library(dplyr) library(tidyr) library(flextable) library(janitor) library(ggplot2) library(stringr)

#5a
#Create dummy variable
Firm_level_dataset_Chile$exporter<-ifelse(Firm_level_dataset_Chile$exports>0,1,0) 

#Creating a simple table

#Note, I left out owners and comissioned workers from total employment as they are likely not employees 
summary_table <- Firm_level_dataset_Chile%>%group_by(exporter)%>%summarise(avg_total_employment = mean(mceo+fceo+mskilled+fskilled+madm+fadm+munskilled+funskilled+mserv+fserv,na.rm = TRUE), avg_sale = mean(sales,na.rm = TRUE), avg_va = mean(va, na.rm = TRUE), avg_ceo_pay = mean(wageceo,na.rm = TRUE), avg_skilled_pay = mean(wageskilled,na.rm = TRUE), avg_unskilled_pay = mean(wageunskilled,na.rm = TRUE), avg_Admin_wage = mean(wageadm,na.rm = TRUE), avg_Services_wage = mean(wageserv,na.rm = TRUE), avg_Technology_Transfer_Spending = mean(asiiva+aseval,na.rm = TRUE), avg_Patent_expenses = mean(patimp,na.rm = TRUE), avg_Labour_Productivity = mean(ifelse((mceo+fceo+mskilled+fskilled+madm+fadm+munskilled+funskilled+mserv+fserv) == 0,NA,va/(mceo+fceo+mskilled+fskilled+madm+fadm+munskilled+funskilled+mserv+fserv)),na.rm = TRUE))

#Reformatting to better layout
summary_table_long <- summary_table %>% pivot_longer(cols = -exporter,names_to = "variable",values_to = "value") %>%pivot_wider(names_from = exporter,values_from = value,names_prefix = "exporter_")%>%mutate(Difference = exporter_1 - exporter_0)%>%mutate(Percentage_difference = (exporter_1 - exporter_0)/exporter_0 * 100)

#making the names clearer
summary_table_long <- summary_table_long %>%rename(Non_exporter = exporter_0,Exporter = exporter_1)

print(summary_table_long)
#5b
#Creating the table
Summary_table1 <- Firm_level_dataset_Chile %>% mutate(domestic_sales = sales - exports) %>% group_by(isic3_2d, description_industry) %>% summarise(total_domestic_sales = sum(domestic_sales, na.rm = TRUE), total_exports = sum(exports, na.rm = TRUE), total_sales = sum(sales, na.rm = TRUE),.groups = "drop")%>% mutate(export_share = round(100*total_exports/total_sales, 1)) %>% select(description_industry, total_domestic_sales, total_exports, total_sales, export_share) %>% arrange(desc(total_sales))

#Changing into a flex table as this made it much neater
ft <- flextable(Summary_table1)
ft <- autofit(ft)
ft

#Simple regression
model_industry <- lm(total_exports ~ total_domestic_sales, data = Summary_table1)
summary(model_industry)
#6a
#Cleaning the names so they are lower case
Trade_Data_UK <- Trade_Data_UK %>% clean_names()

#creating the summary table
Trade_Data_UK_avg <- Trade_Data_UK %>% group_by(year) %>% summarise(East_Asia_Pacific = mean(east_asia_pacific_exports_million_usd, na.rm = TRUE),EU = mean(eu_28_exports_million_usd, na.rm = TRUE),MENA= mean(middle_east_north_africa_exports_million_usd, na.rm = TRUE), North_America = mean(north_america_exports_million_usd, na.rm = TRUE), South_Asia = mean(south_asia_exports_million_usd, na.rm = TRUE), Sub_Saharan_Africa = mean(sub_saharan_africa_exports_million_usd, na.rm = TRUE))

#Converting to a long summarey table
Trade_Data_UK_long <- Trade_Data_UK_avg %>% pivot_longer(cols = -year,names_to = "region",values_to = "avg_exports")

#Changing the way the year data is formatted 
Trade_Data_UK_long$year <- as.integer(Trade_Data_UK_long$year)

#Creating the graph
ggplot(Trade_Data_UK_long, aes(x = year, y = avg_exports, colour = region)) + geom_line(size = 1.2) + geom_point(size = 2) + scale_x_continuous(breaks = 2013:2024, labels = as.character(2013:2024)) + labs(title = "Average UK Exports by Region (2013–2024)",x = "Year",y = "Average Exports (Million USD)",colour = "Region") + theme_minimal(base_size = 14) + theme(legend.position = "bottom",plot.title = element_text(face = "bold"))

#Find the total avg exports for each industry
industry_avg_exports <- Trade_Data_UK %>% group_by(product_code) %>% summarise(avg_total_exports = mean(total_exports_million_usd, na.rm = TRUE)) %>% arrange(desc(avg_total_exports))

#Filter out the top 5
top5_industries <- industry_avg_exports %>% slice_max(avg_total_exports, n = 5) %>% pull(product_code)

#Filter the data set for just the top 5 industries
UK_top5 <- Trade_Data_UK %>% filter(product_code %in% top5_industries)

#Convert the data set to long form
UK_top5_long <- UK_top5 %>% pivot_longer(cols = c(east_asia_pacific_exports_million_usd,eu_28_exports_million_usd,middle_east_north_africa_exports_million_usd,north_america_exports_million_usd,south_asia_exports_million_usd,sub_saharan_africa_exports_million_usd),names_to = "region",values_to = "exports")

#Cleaning up the names
UK_top5_long <- UK_top5_long %>% mutate(region = case_when(str_detect(region, "eu_28") ~ "EU",str_detect(region, "north_america") ~ "North America",str_detect(region, "south_asia") ~ "South Asia",str_detect(region, "east_asia_pacific") ~ "East Asia-Pacific",str_detect(region, "middle_east_north_africa") ~ "MENA",str_detect(region, "sub_saharan_africa") ~ "Sub-Saharan Africa",TRUE ~ region))

#Plotting the exports of the top 5 industries 
ggplot(UK_top5_long, aes(x = year, y = exports, colour = region)) + geom_line(size = 1.2) + facet_wrap(~ product_name, scales = "free_y") + scale_x_continuous(breaks = 2013:2024) + labs(title = "Exports by Region for Top 5 UK Industries (2013–2024)",x = "Year",y = "Exports (Million USD)",colour = "Region") + theme_minimal(base_size = 14) + theme(legend.position = "bottom",plot.title = element_text(face = "bold"),axis.text.x = element_text(angle = 45, hjust = 1))
---
title: "R Notebook"
output: html_notebook
---

install.packages("readxl")
install.packages("ggpubr")

install.packages("flextable")

install.packages("janitor")

#obviously to make this part work you may have to alter the path to the file to work for you. 
library(readxl)
Firm_level_dataset_Chile <- read_excel("C:/Users/baile/Downloads/Firm Level Dataset Chile.xlsx")

Trade_Data_UK <- read_excel("C:/Users/baile/Downloads/Trade Data for UK.xlsx")

library(dplyr)
library(tidyr)
library(flextable)
library(janitor)
library(ggplot2)
library(stringr)

```{r}
#5a
#Create dummy variable
Firm_level_dataset_Chile$exporter<-ifelse(Firm_level_dataset_Chile$exports>0,1,0) 

#Creating a simple table

#Note, I left out owners and comissioned workers from total employment as they are likely not employees 
summary_table <- Firm_level_dataset_Chile%>%group_by(exporter)%>%summarise(avg_total_employment = mean(mceo+fceo+mskilled+fskilled+madm+fadm+munskilled+funskilled+mserv+fserv,na.rm = TRUE), avg_sale = mean(sales,na.rm = TRUE), avg_va = mean(va, na.rm = TRUE), avg_ceo_pay = mean(wageceo,na.rm = TRUE), avg_skilled_pay = mean(wageskilled,na.rm = TRUE), avg_unskilled_pay = mean(wageunskilled,na.rm = TRUE), avg_Admin_wage = mean(wageadm,na.rm = TRUE), avg_Services_wage = mean(wageserv,na.rm = TRUE), avg_Technology_Transfer_Spending = mean(asiiva+aseval,na.rm = TRUE), avg_Patent_expenses = mean(patimp,na.rm = TRUE), avg_Labour_Productivity = mean(ifelse((mceo+fceo+mskilled+fskilled+madm+fadm+munskilled+funskilled+mserv+fserv) == 0,NA,va/(mceo+fceo+mskilled+fskilled+madm+fadm+munskilled+funskilled+mserv+fserv)),na.rm = TRUE))

#Reformatting to better layout
summary_table_long <- summary_table %>% pivot_longer(cols = -exporter,names_to = "variable",values_to = "value") %>%pivot_wider(names_from = exporter,values_from = value,names_prefix = "exporter_")%>%mutate(Difference = exporter_1 - exporter_0)%>%mutate(Percentage_difference = (exporter_1 - exporter_0)/exporter_0 * 100)

#making the names clearer
summary_table_long <- summary_table_long %>%rename(Non_exporter = exporter_0,Exporter = exporter_1)

print(summary_table_long)
```

```{r}
#5b
#Creating the table
Summary_table1 <- Firm_level_dataset_Chile %>% mutate(domestic_sales = sales - exports) %>% group_by(isic3_2d, description_industry) %>% summarise(total_domestic_sales = sum(domestic_sales, na.rm = TRUE), total_exports = sum(exports, na.rm = TRUE), total_sales = sum(sales, na.rm = TRUE),.groups = "drop")%>% mutate(export_share = round(100*total_exports/total_sales, 1)) %>% select(description_industry, total_domestic_sales, total_exports, total_sales, export_share) %>% arrange(desc(total_sales))

#Changing into a flex table as this made it much neater
ft <- flextable(Summary_table1)
ft <- autofit(ft)
ft

#Simple regression
model_industry <- lm(total_exports ~ total_domestic_sales, data = Summary_table1)
summary(model_industry)

```

```{r}
#6a
#Cleaning the names so they are lower case
Trade_Data_UK <- Trade_Data_UK %>% clean_names()

#creating the summary table
Trade_Data_UK_avg <- Trade_Data_UK %>% group_by(year) %>% summarise(East_Asia_Pacific = mean(east_asia_pacific_exports_million_usd, na.rm = TRUE),EU = mean(eu_28_exports_million_usd, na.rm = TRUE),MENA= mean(middle_east_north_africa_exports_million_usd, na.rm = TRUE), North_America = mean(north_america_exports_million_usd, na.rm = TRUE), South_Asia = mean(south_asia_exports_million_usd, na.rm = TRUE), Sub_Saharan_Africa = mean(sub_saharan_africa_exports_million_usd, na.rm = TRUE))

#Converting to a long summarey table
Trade_Data_UK_long <- Trade_Data_UK_avg %>% pivot_longer(cols = -year,names_to = "region",values_to = "avg_exports")

#Changing the way the year data is formatted 
Trade_Data_UK_long$year <- as.integer(Trade_Data_UK_long$year)

#Creating the graph
ggplot(Trade_Data_UK_long, aes(x = year, y = avg_exports, colour = region)) + geom_line(size = 1.2) + geom_point(size = 2) + scale_x_continuous(breaks = 2013:2024, labels = as.character(2013:2024)) + labs(title = "Average UK Exports by Region (2013–2024)",x = "Year",y = "Average Exports (Million USD)",colour = "Region") + theme_minimal(base_size = 14) + theme(legend.position = "bottom",plot.title = element_text(face = "bold"))

```

```{r}

#Find the total avg exports for each industry
industry_avg_exports <- Trade_Data_UK %>% group_by(product_code) %>% summarise(avg_total_exports = mean(total_exports_million_usd, na.rm = TRUE)) %>% arrange(desc(avg_total_exports))

#Filter out the top 5
top5_industries <- industry_avg_exports %>% slice_max(avg_total_exports, n = 5) %>% pull(product_code)

#Filter the data set for just the top 5 industries
UK_top5 <- Trade_Data_UK %>% filter(product_code %in% top5_industries)

#Convert the data set to long form
UK_top5_long <- UK_top5 %>% pivot_longer(cols = c(east_asia_pacific_exports_million_usd,eu_28_exports_million_usd,middle_east_north_africa_exports_million_usd,north_america_exports_million_usd,south_asia_exports_million_usd,sub_saharan_africa_exports_million_usd),names_to = "region",values_to = "exports")

#Cleaning up the names
UK_top5_long <- UK_top5_long %>% mutate(region = case_when(str_detect(region, "eu_28") ~ "EU",str_detect(region, "north_america") ~ "North America",str_detect(region, "south_asia") ~ "South Asia",str_detect(region, "east_asia_pacific") ~ "East Asia-Pacific",str_detect(region, "middle_east_north_africa") ~ "MENA",str_detect(region, "sub_saharan_africa") ~ "Sub-Saharan Africa",TRUE ~ region))

#Plotting the exports of the top 5 industries 
ggplot(UK_top5_long, aes(x = year, y = exports, colour = region)) + geom_line(size = 1.2) + facet_wrap(~ product_name, scales = "free_y") + scale_x_continuous(breaks = 2013:2024) + labs(title = "Exports by Region for Top 5 UK Industries (2013–2024)",x = "Year",y = "Exports (Million USD)",colour = "Region") + theme_minimal(base_size = 14) + theme(legend.position = "bottom",plot.title = element_text(face = "bold"),axis.text.x = element_text(angle = 45, hjust = 1))

```




