Preparation
# Remove all data
rm(list = ls())
# Load the necessary libraries
library("rio") # For importing data
library("tidyverse") # For data manipulation
library("countrycode") # For creating the country ISO codes
library("Hmisc") # For labeling the variables
Sector Codes
# Import the sector codes correspondence table for 1990, 2000, and 2009
SectorCode_Map <- import("Data_raw/KSK_Code_tanaka2024.xlsx",
sheet = "SectorCode_Map", skip = 3)
# Converting the SectorCode into numeric
SectorCode_Map$SectorCode1990 = as.numeric(SectorCode_Map$SectorCode1990)
SectorCode_Map$SectorCode2000 = as.numeric(SectorCode_Map$SectorCode2000)
SectorCode_Map$SectorCode2009 = as.numeric(SectorCode_Map$SectorCode2009)
# Import the sector codes for 2009
SectorCode2009 <- import("Data_raw/KSK_Code_tanaka2024.xlsx",
sheet = "SectorCode2009", skip = 3)
SectorCode2009 = SectorCode2009 %>%
select(c(SectorCode2009, SectorName2009, SectorNameAlph,
SectorClass, SectorClassName, SectorClassNameAlph))
# Converting the SectorCode into numeric
SectorCode_Map$SectorCode2009 = as.numeric(SectorCode_Map$SectorCode2009)
Load the data “SHJP”,
“JP”, and “FA”
- “SHJP”: Japanese parent firms’ ownership data
- “JP”: Japanese parent firms’ financial data
- “FA”: Japanese affiliates’ basic data
# Japanese parent firms' ownership data
SHJP <- import("Data_raw/KSKSHJP.dta")
# Create the year variable by subtracting 1 from the EditionYear
SHJP$year = SHJP$EditionYear-1
# Remove "EditionYear" column
SHJP$EditionYear = NULL
# Remove if KSJOwnRatio < 10
SHJP = SHJP[SHJP$KSJOwnRatio >= 10, ]
# order by KSKJPCode and year
SHJP = SHJP[order(SHJP$KSKJPCode, SHJP$year), ]
# Japanese parent firms' financial data
JP <- import("Data_raw/KSKJP.dta")
# Create the year variable by subtracting 1 from the EditionYear
JP$year = JP$EditionYear-1
# Remove "EditionYear" column
JP$EditionYear = NULL
JP = JP[order(JP$KSKJPCode, JP$year), ]
# Japanese affiliates' basic data
FA <- import("Data_raw/KSKFA.dta")
# Create the year variable by subtracting 1 from the EditionYear
FA$year = FA$EditionYear-1
# Remove "EditionYear" column
FA$EditionYear = NULL
# Order by KSFAffiliateCode and year
FA = FA[order(FA$KSFAffiliateCode, FA$year), ]
Combining the sector
codes with JP data
# Step 1 - 1990
## Merging JP with "SectorCode_Map" using SectorCode1990 as a key
JP <- left_join(JP, SectorCode_Map %>%
select(c(SectorCode1990, SectorCode2009)),
by = c("KSKSectorCode1990" = "SectorCode1990"))
## Replacing "KSKSectorCode2009" with "SectorCode2009" if "KSKSectorCode1990" is non-missing
JP <- JP %>%
mutate(KSKSectorCode2009 =
ifelse(!is.na(KSKSectorCode1990), SectorCode2009, KSKSectorCode2009))
## Remove "SectorCode2009" from JP data
JP$SectorCode2009 = NULL
# Step 2 - 2000
## Merging JP with "SectorCode_Map" using SectorCode2000 as a key
JP <- left_join(JP, SectorCode_Map %>%
select(c(SectorCode2000, SectorCode2009)),
by = c("KSKSectorCode2000" = "SectorCode2000"))
## Replacing "KSKSectorCode2009" with "SectorCode2009" if "KSKSectorCode2000" is non-missing
JP <- JP %>%
mutate(KSKSectorCode2009 =
ifelse(!is.na(KSKSectorCode2000), SectorCode2009, KSKSectorCode2009))
## Remove "SectorCode2009" from JP data
JP$SectorCode2009 = NULL
# Step 3
## Remove "KSKSectorCode1990" & "KSKSectorCode2000"
JP$KSKSectorCode1990 = NULL
JP$KSKSectorCode2000 = NULL
# Step 4 Modification
## replacing KSKSectorCode2009 for 3000 and 8000
## Create the lagged and lead sector codes
JP <- JP %>%
group_by(KSKJPCode) %>%
mutate(KSKSectorCode2009L1 = lag(KSKSectorCode2009, n=1))
JP <- JP %>%
group_by(KSKJPCode) %>%
mutate(KSKSectorCode2009F1 = lead(KSKSectorCode2009, n=1))
## Replacing the missing KSKSectorCode2009 with KSKSectorCode2009L1 if KSKSectorCode2009 is missing
JP$KSKSectorCode2009[is.na(JP$KSKSectorCode2009)] <- JP$KSKSectorCode2009L1[is.na(JP$KSKSectorCode2009)]
## Replacing the missing KSKSectorCode2009 with KSKSectorCode2009F1 if KSKSectorCode2009 is missing
JP$KSKSectorCode2009[is.na(JP$KSKSectorCode2009)] <- JP$KSKSectorCode2009F1[is.na(JP$KSKSectorCode2009)]
## Replacing KSKSectorCode2009 = 3000 if KSKSectorCode1990 == 3000
JP$KSKSectorCode2009[JP$KSKSectorCode1990 == 3000&is.na(JP$KSKSectorCode2009)] = 5210 # Other wholesaling
## Replacing KSKSectorCode2009 = 3000 if KSKSectorCode1990 == 3000
JP$KSKSectorCode2009[JP$KSKSectorCode1990 == 8000&is.na(JP$KSKSectorCode2009)] = 7910 #Other Services
# Step 5
## Combine the "JP" data with the "SectorCode2009" data
SectorCode2009$SectorCode2009 <- as.numeric(SectorCode2009$SectorCode2009)
JP <- left_join(JP, SectorCode2009,
by = c("KSKSectorCode2009" = "SectorCode2009"))
Combining the sector
codes with FA data
# Step 1
## Merging FA with SectorCOde_Map using SectorCode1990 as a key
FA <- left_join(FA, SectorCode_Map %>%
select(c(SectorCode1990, SectorCode2009)),
by = c("KSFSectorCode1990" = "SectorCode1990"))
## Replacing "KSFSectorCode2009" with "SectorCode2009" if "KSFSectorCode2009" is missing
FA$KSFSectorCode2009[is.na(FA$KSFSectorCode2009)] <- FA$SectorCode2009[is.na(FA$KSFSectorCode2009)]
## Remove "SectorCode2009" from FA data
FA$SectorCode2009 = NULL
# Step 2
## Merging FA with SectorCOde_Map using SectorCode2000 as a key
FA <- left_join(FA, SectorCode_Map %>%
select(c(SectorCode2000, SectorCode2009)),
by = c("KSFSectorCode2000" = "SectorCode2000"))
## Replacing "KSFSectorCode2009" with "SectorCode2009" if "KSFSectorCode2000" is non-missing
FA$KSFSectorCode2009[is.na(FA$KSFSectorCode2009)] <- FA$SectorCode2009[is.na(FA$KSFSectorCode2009)]
## Remove "SectorCode2009" from FA data
FA$SectorCode2009 = NULL
# Step 3
## Remove "KSFSectorCode1900" & "KSFSectorCode2000"
FA$KSFSectorCode1990 = NULL
FA$KSFSectorCode2000 = NULL
Creating the country
ISO codes using countrycode
package for the affiliate data
“FA”
# Replace countrynames
# Example) Replace "Estoniya" with "Republic of Estonia" with gsub()
FA$KSFNationNameAlph = gsub("Estoniya", "Republic of Estonia", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Latviya", "Republic of Latvia", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Lebanese Republic", "Republic of Lebanon", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Portuguese Republic", "Portugal", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Republic of Cote D'ivire", "Ivory Coast", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Slovenija", "Slovenia", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("New Yugoslavia", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Serbia and Montenegro", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Socialist Federal Republic of Yugoslavia", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Socialist Federal Republic of Yugoslavia", "Yugoslavia", FA$KSFNationNameAlph) # Former Yugoslavia
FA$KSFNationNameAlph = gsub("Netherlands Antilles", "Netherlands", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Saipan", "United States", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("The Cooperative Republic of Guyana", "Guyana", FA$KSFNationNameAlph)
FA$KSFNationNameAlph = gsub("Republic of Namibia", "Namibia", FA$KSFNationNameAlph)
# Create the country ISO codes for Japan
FA <- FA %>%
mutate(iso = countrycode(KSFNationNameAlph, origin = "country.name", destination = "iso3c"))
# Move the column "iso" after the column KSFNationNameAlph
FA <- FA %>%
select(KSFNationNameAlph, iso, everything())
# Create countryname with countrycode package
FA <- FA %>%
mutate(countryname = countrycode(iso, origin = "iso3c", destination = "country.name"))
# Remove "KSFNationNameAlph"
FA$KSFNationNameAlph = NULL
Merging the EU dummy
data with the affiliate data “FA”
# Load the EU dummy data
EU <- import("Data_raw/eu.dta")
# Make the euall column in the EU dummy data as 0
EU$euall <- 1
# Label the column "euall" as "Former/current EU members"
label(EU$euall) <- "Former/current EU members"
# Merge the EU dummy data with the affiliate data
FA <- left_join(FA, EU %>% select(iso_code, year, eu, euall),
by = c("iso" = "iso_code", "year" = "year"))
# Replace the column "eu" if it is NA with 0
FA$eu[is.na(FA$eu)] <- 0
# Replace the column "euall" if it is NA with 0
FA$euall[is.na(FA$euall)] <- 0
# Move the columns "eu" and "europe" after the column "iso"
FA <- FA %>%
select(iso, year, eu, euall, everything())
Combine the FA data
with the WDI data
# Load the WDI.csv
WDI <- import("Data_raw/WDI.csv")
# Select the necessary columns
WDI <- WDI %>%
select(iso3c, year, GDP_PPP, GDP_per_capita, Ease_business)
# Merge the WDI data with the affiliate data
FA <- left_join(FA, WDI,
by = c("iso" = "iso3c", "year" = "year"))
Combine the FA data
with the WGI data
# Load the WGI.csv
WGI <- import("Data_output/WGI.csv")
# Merge the WDI data with the affiliate data
FA <- left_join(FA, WGI,
by = c("iso" = "code", "year" = "year"))
Combine the FA data
with the distance data from the CEPII’s Gravity database
# Load the CEPII's distance data
distance <- import("Data_raw/distance.dta")
# Keep the iso_o=="JPN" and iso_d=="iso"
distance <- distance %>%
filter(iso3_o=="JPN")
# Select the necessary columns
distance <- distance %>%
select(iso3_d, dist)
# Merge the distance data with the affiliate data
FA <- left_join(FA, distance,
by = c("iso" = "iso3_d"))
Combine the Japanese
ownership data “SHJP” with the Japanese parent firms’ financial data
“JP”
# Select the necessary columns from JP
JP_select <- JP %>%
select(KSKJPCode, year, KSKCommonName, KSKCommonNameAlphabet,
KSKSectorCode2009, SectorNameAlph, SectorClass, SectorClassNameAlph)
# Merge Japanese parent firms' ownership data (SHJPFA) with Japanese parent firms' financial data (JP)
SHJP_JP <- left_join(SHJP, JP_select,
by = c("KSKJPCode" = "KSKJPCode",
"year" = "year"))
# Remove "KSK" from the column names of SHJP_JP
names(SHJP_JP) <- gsub("KSK", "", names(SHJP_JP))
# Remove "KSF" from the column names of SHJP_JP
names(SHJP_JP) <- gsub("KSF", "", names(SHJP_JP))
# Remove "KS" from the column names of SHJP_JP
names(SHJP_JP) <- gsub("KS", "", names(SHJP_JP))
# Add "Parent_" to the column names starting "Common"
names(SHJP_JP) <- gsub("^Common", "Parent_", names(SHJP_JP))
# Add "Parent_" to the column names starting "Sector"
names(SHJP_JP) <- gsub("^Sector", "Parent_Sector", names(SHJP_JP))
# Remove "2009" from the column "Parent_SectorCode2009"
SHJP_JP$Parent_SectorCode2009 = gsub("2009", "", SHJP_JP$Parent_SectorCode2009)
Combine the Japanese
ownership data “SHJP_JP” with the Japanese affiliates’ basic data
“FA”
# Merge Japanese parent firms' ownership data (SHJP) and Japanese affiliates' basic data (FA) using lef_join()
SHJP_JP_FA <- left_join(SHJP_JP, FA,
by = c("AffiliateCode" = "KSFAffiliateCode",
"year" = "year"))
# Drop year.x and year.y
SHJP_JP_FA$year.x = NULL
SHJP_JP_FA$year.y = NULL
# Sort the data by KSKJPCode, iso, and year
SHJP_JP_FA = SHJP_JP_FA[order(SHJP_JP_FA$JPCode,
SHJP_JP_FA$iso,
SHJP_JP_FA$year), ]
# Remove duplicates
SHJP_JP_FA <- SHJP_JP_FA %>%
distinct(JPCode, year, AffiliateCode, .keep_all = TRUE)
# Rename "KSFSectorCode2009" to "FA_Sector"
colnames(SHJP_JP_FA)[colnames(SHJP_JP_FA) == "KSFSectorCode2009"] <- "FA_Sector"
# Rename "KSFSectorName" to "FA_SectorName"
colnames(SHJP_JP_FA)[colnames(SHJP_JP_FA) == "KSFSectorName"] <- "FA_SectorName"
# Remove "KSF" from the columns names
names(SHJP_JP_FA) <- gsub("KSF", "", names(SHJP_JP_FA))
The number of
affiliates in EU
# Create a dummy variable for iso == GBR
SHJP_JP_FA <- SHJP_JP_FA %>%
mutate(GBR = ifelse(iso == "GBR", 1, 0))
# Creating the number of affiliate in GBR
SHJP_JP_FA <- SHJP_JP_FA %>%
group_by(JPCode, year) %>%
mutate(N_Aff_GBR = sum(GBR))
# Creating the number of affiliates in EU
SHJP_JP_FA <- SHJP_JP_FA %>%
group_by(JPCode, year) %>%
mutate(N_Aff_EU = sum(euall))
# Substracting N_Aff_GBR from N_Aff_EU
SHJP_JP_FA <- SHJP_JP_FA %>%
mutate(N_Aff_EU = N_Aff_EU - N_Aff_GBR)
# Barlot by the existence of affiliates in EU
SHJP_JP_FA$N_Aff_EU_dummy <- ifelse(SHJP_JP_FA$N_Aff_EU > 0, 1, 0)
barplot(table(SHJP_JP_FA$N_Aff_EU_dummy),
main = "The existence of affiliates in EU except the UK")

SHJP_JP_FA$N_Aff_EU_dummy <- NULL
# Histogram of the number of affiliates in EU if N_Aff_EU > 0
barplot(table(SHJP_JP_FA$N_Aff_EU[SHJP_JP_FA$N_Aff_EU > 0]),
main = "The number of affiliates in EU except the UK")

# Mean of the number of affiliates in EU
mean(SHJP_JP_FA$N_Aff_EU, na.rm = TRUE)
## [1] 3.53687
# # Mean of the number of affiliates in EU if N_Aff_EU > 0
mean(SHJP_JP_FA$N_Aff_EU[SHJP_JP_FA$N_Aff_EU > 0], na.rm = TRUE)
## [1] 6.32217
Example data
# Example data
Daikin = subset(SHJP_JP_FA, SHJP_JP_FA$Parent_NameAlphabet == "DAIKIN INDUSTRIES, LTD.")
GBR = subset(SHJP_JP_FA, SHJP_JP_FA$iso=="GBR")
Distribution of the
observations
library(ggplot2)
# plot the distribution of the observations by year
SHJP_JP_FA %>%
group_by(year) %>%
summarise(n = n()) %>%
ggplot(aes(x = year, y = n)) +
geom_bar(stat = "identity") +
labs(title = "Distribution of the observations by year",
x = "Year",
y = "Number of observations")

Distribution of the
observations by country
# plot the distribution of the observations by top 20 countries
SHJP_JP_FA %>%
group_by(countryname) %>%
summarise(n = n()) %>%
top_n(20, n) %>%
ggplot(aes(x = reorder(countryname, n), y = n)) +
geom_bar(stat = "identity") +
coord_flip() +
labs(title = "Distribution of the observations by country",
x = "Country",
y = "Number of observations")

Describe the data
library(gtsummary)
# Ungroup SHJP_JP_FA to prevent describe group variables JPCode & year
SHJP_JP_FA <- ungroup(SHJP_JP_FA)
# Describe the data
SHJP_JP_FA %>%
select(year, eu, Parent_SectorClassNameAlph, ParentJPOwnRatio,
GDP_PPP, GDP_per_capita, Ease_business,
pvr, ger, rqr, rlr, ccr, dist, N_Aff_EU) %>%
tbl_summary()
Characteristic |
N = 357,172 |
year |
2,010 (2,000, 2,017) |
eu |
40,961 (11%) |
Unknown |
3 |
Parent_SectorClassNameAlph |
|
Agriculture,Mining |
2,280 (0.6%) |
HeadQuarter |
83 (<0.1%) |
Manufacturing |
215,362 (60%) |
Retail |
6,215 (1.7%) |
Service,Others |
62,488 (17%) |
Wholesale |
70,717 (20%) |
Unknown |
27 |
ParentJPOwnRatio |
100 (65, 100) |
Unknown |
3 |
GDP_PPP |
2,164,812,825,331 (631,965,321,180, 11,884,957,832,397) |
Unknown |
18,983 |
GDP_per_capita |
19,995 (10,628, 46,865) |
Unknown |
18,983 |
Ease_business |
77 (67, 84) |
Unknown |
274,378 |
pvr |
50 (28, 71) |
Unknown |
42,323 |
ger |
76 (59, 91) |
Unknown |
42,354 |
rqr |
67 (45, 93) |
Unknown |
42,354 |
rlr |
64 (43, 91) |
Unknown |
42,294 |
ccr |
61 (41, 92) |
Unknown |
42,329 |
dist |
5,310 (2,109, 9,582) |
Unknown |
90 |
N_Aff_EU |
1 (0, 3) |
Unknown |
43 |
Export the data
# Export the data
export(SHJP_JP, "Data_output/SHJP_JP.csv")
export(SHJP_JP_FA, "Data_output/SHJP_JP_FA.csv")
export(FA, "Data_output/FA.csv")
export(JP, "Data_output/JP.csv")
export(GBR, "Data_output/GBR.csv")
export(Daikin, "Data_output/Daikin.csv")
export(SectorCode_Map, "Data_output/SectorCode_Map.csv")
export(SectorCode2009, "Data_output/SectorCode2009.csv")
Remove the loaded
data
# Remove all data
#rm(list = ls())