1 Preparation

# Remove all data
rm(list = ls())

# Load the necessary libraries
library("rio")           # For importing data 
library("tidyverse")     # For data manipulation
library("countrycode")   # For creating the country ISO codes
library("Hmisc")         # For labeling the variables

2 Sector Codes

# Import the sector codes correspondence table for 1990, 2000, and 2009 
SectorCode_Map <- import("Data_raw/KSK_Code_tanaka2024.xlsx",
sheet = "SectorCode_Map", skip = 3)
# Converting the SectorCode into numeric
SectorCode_Map$SectorCode1990 = as.numeric(SectorCode_Map$SectorCode1990)
SectorCode_Map$SectorCode2000 = as.numeric(SectorCode_Map$SectorCode2000)
SectorCode_Map$SectorCode2009 = as.numeric(SectorCode_Map$SectorCode2009)

# Import the sector codes for 2009
SectorCode2009 <- import("Data_raw/KSK_Code_tanaka2024.xlsx",
sheet = "SectorCode2009", skip = 3)
SectorCode2009 = SectorCode2009 %>%
  select(c(SectorCode2009, SectorName2009, SectorNameAlph,
           SectorClass, SectorClassName, SectorClassNameAlph))

# Converting the SectorCode into numeric
SectorCode_Map$SectorCode2009 = as.numeric(SectorCode_Map$SectorCode2009)

3 Load the data “SHJP”, “JP”, and “FA”

  • “SHJP”: Japanese parent firms’ ownership data
  • “JP”: Japanese parent firms’ financial data
  • “FA”: Japanese affiliates’ basic data
  # Japanese parent firms' ownership data
  SHJP <- import("Data_raw/KSKSHJP.dta") 
    # Create the year variable by subtracting 1 from the EditionYear    
    SHJP$year =  SHJP$EditionYear-1
    # Remove "EditionYear" column
    SHJP$EditionYear = NULL
    # Remove if KSJOwnRatio < 10
    SHJP = SHJP[SHJP$KSJOwnRatio >= 10, ]
    # order by KSKJPCode and year
    SHJP = SHJP[order(SHJP$KSKJPCode, SHJP$year), ]
    
  # Japanese parent firms' financial data
  JP <- import("Data_raw/KSKJP.dta")
    # Create the year variable by subtracting 1 from the EditionYear   
    JP$year =  JP$EditionYear-1 
    # Remove "EditionYear" column
    JP$EditionYear = NULL
    JP = JP[order(JP$KSKJPCode, JP$year), ]

    
  # Japanese affiliates' basic data
  FA <- import("Data_raw/KSKFA.dta")
    # Create the year variable by subtracting 1 from the EditionYear
    FA$year =  FA$EditionYear-1
    # Remove "EditionYear" column
    FA$EditionYear = NULL
    # Order by KSFAffiliateCode and year
    FA = FA[order(FA$KSFAffiliateCode, FA$year), ]

4 Combining the sector codes with JP data

# Step 1 - 1990
## Merging JP with "SectorCode_Map" using SectorCode1990 as a key
  JP <- left_join(JP, SectorCode_Map %>% 
                    select(c(SectorCode1990, SectorCode2009)), 
                  by = c("KSKSectorCode1990" = "SectorCode1990"))

## Replacing "KSKSectorCode2009" with "SectorCode2009" if "KSKSectorCode1990" is non-missing
  JP <- JP %>%
    mutate(KSKSectorCode2009 = 
             ifelse(!is.na(KSKSectorCode1990), SectorCode2009, KSKSectorCode2009))  
  
## Remove "SectorCode2009" from JP data
  JP$SectorCode2009 = NULL

# Step 2 - 2000
## Merging JP with "SectorCode_Map" using SectorCode2000 as a key
  JP <- left_join(JP, SectorCode_Map %>% 
                    select(c(SectorCode2000, SectorCode2009)), 
                  by = c("KSKSectorCode2000" = "SectorCode2000"))

## Replacing "KSKSectorCode2009" with "SectorCode2009" if "KSKSectorCode2000" is non-missing
  JP <- JP %>%
    mutate(KSKSectorCode2009 = 
             ifelse(!is.na(KSKSectorCode2000), SectorCode2009, KSKSectorCode2009))  
   
## Remove "SectorCode2009" from JP data
  JP$SectorCode2009 = NULL
  
# Step 3 
## Remove "KSKSectorCode1990" & "KSKSectorCode2000"
  JP$KSKSectorCode1990 = NULL
  JP$KSKSectorCode2000 = NULL

# Step 4 Modification
## replacing KSKSectorCode2009 for 3000 and 8000
## Create the lagged and lead sector codes
  JP <- JP %>%
    group_by(KSKJPCode) %>%
    mutate(KSKSectorCode2009L1 = lag(KSKSectorCode2009, n=1))
  
  JP <- JP %>%
    group_by(KSKJPCode) %>%
    mutate(KSKSectorCode2009F1 = lead(KSKSectorCode2009, n=1))

## Replacing the missing KSKSectorCode2009 with KSKSectorCode2009L1 if KSKSectorCode2009 is missing
  JP$KSKSectorCode2009[is.na(JP$KSKSectorCode2009)] <- JP$KSKSectorCode2009L1[is.na(JP$KSKSectorCode2009)]

## Replacing the missing KSKSectorCode2009 with KSKSectorCode2009F1 if KSKSectorCode2009 is missing
  JP$KSKSectorCode2009[is.na(JP$KSKSectorCode2009)] <- JP$KSKSectorCode2009F1[is.na(JP$KSKSectorCode2009)]
  
## Replacing KSKSectorCode2009 = 3000 if KSKSectorCode1990 == 3000
  JP$KSKSectorCode2009[JP$KSKSectorCode1990 == 3000&is.na(JP$KSKSectorCode2009)] = 5210 # Other wholesaling
## Replacing KSKSectorCode2009 = 3000 if KSKSectorCode1990 == 3000
  JP$KSKSectorCode2009[JP$KSKSectorCode1990 == 8000&is.na(JP$KSKSectorCode2009)] = 7910 #Other Services
  

# Step 5 
## Combine the "JP" data with the "SectorCode2009" data
  SectorCode2009$SectorCode2009 <- as.numeric(SectorCode2009$SectorCode2009)
  JP <- left_join(JP, SectorCode2009, 
                  by = c("KSKSectorCode2009" = "SectorCode2009"))

5 Combining the sector codes with FA data

# Step 1
## Merging FA with SectorCOde_Map using SectorCode1990 as a key
  FA <- left_join(FA, SectorCode_Map %>% 
                    select(c(SectorCode1990, SectorCode2009)), 
                  by = c("KSFSectorCode1990" = "SectorCode1990"))

## Replacing "KSFSectorCode2009" with "SectorCode2009" if "KSFSectorCode2009" is missing
  FA$KSFSectorCode2009[is.na(FA$KSFSectorCode2009)] <- FA$SectorCode2009[is.na(FA$KSFSectorCode2009)]
  
## Remove "SectorCode2009" from FA data
  FA$SectorCode2009 = NULL

# Step 2
## Merging FA with SectorCOde_Map using SectorCode2000 as a key
  FA <- left_join(FA, SectorCode_Map %>% 
                    select(c(SectorCode2000, SectorCode2009)), 
                  by = c("KSFSectorCode2000" = "SectorCode2000"))
## Replacing "KSFSectorCode2009" with "SectorCode2009" if "KSFSectorCode2000" is non-missing
  FA$KSFSectorCode2009[is.na(FA$KSFSectorCode2009)] <- FA$SectorCode2009[is.na(FA$KSFSectorCode2009)]
  
## Remove "SectorCode2009" from FA data
  FA$SectorCode2009 = NULL
  
# Step 3
## Remove "KSFSectorCode1900" & "KSFSectorCode2000"
  FA$KSFSectorCode1990 = NULL
  FA$KSFSectorCode2000 = NULL

6 Creating the country ISO codes using countrycode package for the affiliate data “FA”

# Replace countrynames
# Example) Replace "Estoniya" with "Republic of Estonia" with gsub()
FA$KSFNationNameAlph = gsub("Estoniya", "Republic of Estonia", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Latviya", "Republic of Latvia", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Lebanese Republic", "Republic of Lebanon", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Portuguese Republic", "Portugal", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Republic of Cote D'ivire", "Ivory Coast", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Slovenija", "Slovenia", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("New Yugoslavia", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Serbia and Montenegro", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Socialist Federal Republic of Yugoslavia", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Socialist Federal Republic of Yugoslavia", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Netherlands Antilles", "Netherlands", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Saipan", "United States", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("The Cooperative Republic of Guyana", "Guyana", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Republic of Namibia", "Namibia", FA$KSFNationNameAlph)


# Create the country ISO codes for Japan
  FA <- FA %>% 
    mutate(iso = countrycode(KSFNationNameAlph, origin = "country.name", destination = "iso3c"))

# Move the column "iso" after the column KSFNationNameAlph
  FA <- FA %>% 
    select(KSFNationNameAlph, iso, everything())
  
# Create countryname with countrycode package
  FA <- FA %>% 
    mutate(countryname = countrycode(iso, origin = "iso3c", destination = "country.name"))

# Remove "KSFNationNameAlph"
  FA$KSFNationNameAlph = NULL

7 Merging the EU dummy data with the affiliate data “FA”

# Load the EU dummy data
  EU <- import("Data_raw/eu.dta")

# Make the euall column in the EU dummy data as 0
  EU$euall <- 1
  
# Label the column "euall" as "Former/current EU members"
  label(EU$euall) <- "Former/current EU members"

# Merge the EU dummy data with the affiliate data
  FA <- left_join(FA, EU %>% select(iso_code, year, eu, euall), 
                                by = c("iso" = "iso_code", "year" = "year"))
  
# Replace the column "eu" if it is NA with 0
  FA$eu[is.na(FA$eu)] <- 0

# Replace the column "euall" if it is NA with 0
  FA$euall[is.na(FA$euall)] <- 0
  
# Move the columns "eu" and "europe" after the column "iso"
  FA <- FA %>% 
    select(iso, year, eu, euall, everything())

8 Combine the FA data with the WDI data

# Load the WDI.csv
  WDI <- import("Data_raw/WDI.csv")
# Select the necessary columns
  WDI <- WDI %>% 
    select(iso3c, year, GDP_PPP, GDP_per_capita, Ease_business)
# Merge the WDI data with the affiliate data
  FA <- left_join(FA, WDI, 
                   by = c("iso" = "iso3c", "year" = "year"))

9 Combine the FA data with the WGI data

# Load the WGI.csv
  WGI <- import("Data_output/WGI.csv")
# Merge the WDI data with the affiliate data
  FA <- left_join(FA, WGI, 
                   by = c("iso" = "code", "year" = "year"))

10 Combine the FA data with the distance data from the CEPII’s Gravity database

# Load the CEPII's distance data
  distance <- import("Data_raw/distance.dta")
# Keep the iso_o=="JPN" and iso_d=="iso"
  distance <- distance %>% 
    filter(iso3_o=="JPN")
# Select the necessary columns
  distance <- distance %>% 
    select(iso3_d, dist)
# Merge the distance data with the affiliate data
  FA <- left_join(FA, distance,
                   by = c("iso" = "iso3_d"))

11 Combine the Japanese ownership data “SHJP” with the Japanese parent firms’ financial data “JP”

# Select the necessary columns from JP
JP_select <- JP %>% 
  select(KSKJPCode, year, KSKCommonName, KSKCommonNameAlphabet, 
         KSKSectorCode2009, SectorNameAlph, SectorClass, SectorClassNameAlph)

# Merge Japanese parent firms' ownership data (SHJPFA) with Japanese parent firms' financial data (JP)
  SHJP_JP <- left_join(SHJP, JP_select,
                           by = c("KSKJPCode" = "KSKJPCode", 
                                  "year" = "year"))
  
# Remove "KSK" from the column names of SHJP_JP
  names(SHJP_JP) <- gsub("KSK", "", names(SHJP_JP))
  
# Remove "KSF" from the column names of SHJP_JP
  names(SHJP_JP) <- gsub("KSF", "", names(SHJP_JP))
  
# Remove "KS" from the column names of SHJP_JP
  names(SHJP_JP) <- gsub("KS", "", names(SHJP_JP))
  
# Add "Parent_" to the column names starting "Common"
  names(SHJP_JP) <- gsub("^Common", "Parent_", names(SHJP_JP))

# Add "Parent_" to the column names starting "Sector"
  names(SHJP_JP) <- gsub("^Sector", "Parent_Sector", names(SHJP_JP))

# Remove "2009" from the column "Parent_SectorCode2009"
  SHJP_JP$Parent_SectorCode2009 = gsub("2009", "", SHJP_JP$Parent_SectorCode2009)

12 Combine the Japanese ownership data “SHJP_JP” with the Japanese affiliates’ basic data “FA”

# Merge Japanese parent firms' ownership data (SHJP) and Japanese affiliates' basic data (FA) using lef_join()
  SHJP_JP_FA <- left_join(SHJP_JP, FA, 
                         by = c("AffiliateCode" = "KSFAffiliateCode", 
                                "year" = "year"))
# Drop year.x and year.y 
  SHJP_JP_FA$year.x = NULL
  SHJP_JP_FA$year.y = NULL
  
# Sort the data by KSKJPCode, iso, and year
  SHJP_JP_FA = SHJP_JP_FA[order(SHJP_JP_FA$JPCode, 
                                SHJP_JP_FA$iso, 
                                SHJP_JP_FA$year), ]
  
# Remove duplicates
  SHJP_JP_FA <- SHJP_JP_FA %>%
    distinct(JPCode, year, AffiliateCode, .keep_all = TRUE)
  

# Rename "KSFSectorCode2009" to "FA_Sector"
  colnames(SHJP_JP_FA)[colnames(SHJP_JP_FA) == "KSFSectorCode2009"] <- "FA_Sector"
  
# Rename "KSFSectorName" to "FA_SectorName"
  colnames(SHJP_JP_FA)[colnames(SHJP_JP_FA) == "KSFSectorName"] <- "FA_SectorName"

# Remove "KSF" from the columns names
  names(SHJP_JP_FA) <- gsub("KSF", "", names(SHJP_JP_FA))

13 The number of affiliates in EU

# Create a dummy variable for iso == GBR
SHJP_JP_FA <- SHJP_JP_FA %>% 
  mutate(GBR = ifelse(iso == "GBR", 1, 0))

# Creating the number of affiliate in GBR
  SHJP_JP_FA <- SHJP_JP_FA %>%
    group_by(JPCode, year) %>%
    mutate(N_Aff_GBR = sum(GBR))

# Creating the number of affiliates in EU
  SHJP_JP_FA <- SHJP_JP_FA %>%
    group_by(JPCode, year) %>%
    mutate(N_Aff_EU = sum(euall))

# Substracting N_Aff_GBR from N_Aff_EU
  SHJP_JP_FA <- SHJP_JP_FA %>%
    mutate(N_Aff_EU = N_Aff_EU - N_Aff_GBR)
  

# Barlot by the existence of affiliates in EU
  SHJP_JP_FA$N_Aff_EU_dummy <- ifelse(SHJP_JP_FA$N_Aff_EU > 0, 1, 0)
  barplot(table(SHJP_JP_FA$N_Aff_EU_dummy), 
          main = "The existence of affiliates in EU except the UK")

  SHJP_JP_FA$N_Aff_EU_dummy <- NULL

# Histogram of the number of affiliates in EU  if N_Aff_EU > 0
  barplot(table(SHJP_JP_FA$N_Aff_EU[SHJP_JP_FA$N_Aff_EU > 0]),
          main = "The number of affiliates in EU except the UK")

# Mean of the number of affiliates in EU
  mean(SHJP_JP_FA$N_Aff_EU, na.rm = TRUE)
## [1] 3.53687
# # Mean of the number of affiliates in EU if N_Aff_EU > 0
  mean(SHJP_JP_FA$N_Aff_EU[SHJP_JP_FA$N_Aff_EU > 0], na.rm = TRUE)
## [1] 6.32217

14 Example data

# Example data
  Daikin = subset(SHJP_JP_FA, SHJP_JP_FA$Parent_NameAlphabet == "DAIKIN INDUSTRIES, LTD.")
  GBR = subset(SHJP_JP_FA, SHJP_JP_FA$iso=="GBR")

15 Distribution of the observations

library(ggplot2)

# plot the distribution of the observations by year
SHJP_JP_FA %>%
  group_by(year) %>%
  summarise(n = n()) %>%
  ggplot(aes(x = year, y = n)) +
  geom_bar(stat = "identity") +
  labs(title = "Distribution of the observations by year",
       x = "Year",
       y = "Number of observations")

16 Distribution of the observations by country

# plot the distribution of the observations by top 20 countries
SHJP_JP_FA %>%
  group_by(countryname) %>%
  summarise(n = n()) %>%
  top_n(20, n) %>%
  ggplot(aes(x = reorder(countryname, n), y = n)) +
  geom_bar(stat = "identity") +
  coord_flip() +
  labs(title = "Distribution of the observations by country",
       x = "Country",
       y = "Number of observations")

17 Describe the data

library(gtsummary)
# Ungroup SHJP_JP_FA to prevent describe group variables JPCode & year
  SHJP_JP_FA <- ungroup(SHJP_JP_FA)
# Describe the data
SHJP_JP_FA %>%
  select(year, eu, Parent_SectorClassNameAlph, ParentJPOwnRatio, 
         GDP_PPP, GDP_per_capita, Ease_business, 
         pvr, ger, rqr, rlr, ccr, dist, N_Aff_EU) %>%
  tbl_summary()
Characteristic N = 357,1721
year 2,010 (2,000, 2,017)
eu 40,961 (11%)
    Unknown 3
Parent_SectorClassNameAlph
    Agriculture,Mining 2,280 (0.6%)
    HeadQuarter 83 (<0.1%)
    Manufacturing 215,362 (60%)
    Retail 6,215 (1.7%)
    Service,Others 62,488 (17%)
    Wholesale 70,717 (20%)
    Unknown 27
ParentJPOwnRatio 100 (65, 100)
    Unknown 3
GDP_PPP 2,164,812,825,331 (631,965,321,180, 11,884,957,832,397)
    Unknown 18,983
GDP_per_capita 19,995 (10,628, 46,865)
    Unknown 18,983
Ease_business 77 (67, 84)
    Unknown 274,378
pvr 50 (28, 71)
    Unknown 42,323
ger 76 (59, 91)
    Unknown 42,354
rqr 67 (45, 93)
    Unknown 42,354
rlr 64 (43, 91)
    Unknown 42,294
ccr 61 (41, 92)
    Unknown 42,329
dist 5,310 (2,109, 9,582)
    Unknown 90
N_Aff_EU 1 (0, 3)
    Unknown 43
1 Median (IQR); n (%)

18 Export the data

# Export the data
  export(SHJP_JP, "Data_output/SHJP_JP.csv")
  export(SHJP_JP_FA, "Data_output/SHJP_JP_FA.csv")
  export(FA, "Data_output/FA.csv")
  export(JP, "Data_output/JP.csv")
  export(GBR, "Data_output/GBR.csv")
  export(Daikin, "Data_output/Daikin.csv")
  export(SectorCode_Map, "Data_output/SectorCode_Map.csv")
  export(SectorCode2009, "Data_output/SectorCode2009.csv")

19 Remove the loaded data

# Remove all data
#rm(list = ls())