Welcome to the RPubs for the paper on patent diffusion, titled: “Has the East met the West? Spatial Dynamics of Innovation in Europe” (by Katarzyna Kopczewska and Katarzyna Piotrowska).

The paper was written by Spatial Warsaw team based at the Faculty of Economic Sciences at University of Warsaw. Visit us on LinkedIn.

This site is a collection of all R codes included in this paper. They enable reproducing the analysis presented in the paper.

Introduction: about the data

The paper uses two datasets on patents.

The first dataset includes the geo-located cases of patents by inventors locations for years 1980-2014. It was published in Scientific Data. De Rassenfosse, G., Kozak, J., & Seliger, F. (2019). Geocoding of worldwide patent data. Scientific data, 6(1), 260.

This original dataset was reduced for the purpose of our analysis. To enable reproducing the results, we share the partial datasets, derived on the basis of the original full dataset.

The second dataset includes regional fractional counts of patents for years 1995-2022 as reported by OECD in Atlas of Regions and Cities

Links to data will be in sections below. These are raw data as well as pre-processed data that enable replication of results.

# starter for work
setwd("C:/my_folder_with_data") # set working directory - please set own path

# read the universal packages (when downloaded) and set seed
library(sf)         # for sf class
library(dplyr)      # for data processing
library(tidyr)      # for data processing
library(readr)      # for read_delim()
library(readxl)         # for import of Excel files

library(rworldmap)  # for maps of countries
library(eurostat)   # for EUROSTAT data
library(giscoR)     # for EUROSTAT data

library(ggplot2)        # for plotting
library(ggrepel)        # for labels in ggplot
library(viridisLite)    # for colour palettes

library(spatialWarsaw)  # for ETA
library(ineq)       # for Gini index

set.seed(9876)      # for reproducibility of all analyses

1. Read data

Read data from de Rassenfosse et al.(2019)

The original de Rassenfosse et al.(2019) dataset is stored in geoc_app.txt file, available from Harvard repository.

# this is not obligatory step - outcome of this operation is saved as a file
# please use data as published by Rassenfosse et al.(2019)

# read patent data from the Scientific Data paper
patents<-read_delim("geoc_app.txt") # mamy tibble
dim(patents) # [1] 23.904.046       16
labels<-unique(patents$ctry_code)

# select European countries
europe.east<-c("CZ", "HU", "SI", "SK", "BG", "PL", "RO", "HR", "LT", "LV", "EE") 
europe.west<-c("FI", "GB", "DE", "IT", "CH", "FR", "AT", "DK", "BE", "LI", "LU", "NL", "HU", "SE", "IE", "NO", "SI", "ES", "MT", "GR", "PT", "IS") 
europe<-c("FI", "GB", "DE", "IT", "CH", "FR", "AT", "DK", "BE", "LI", "LU", "NL", "HU", "SE", "IE", "NO", "SI", "ES", "MT", "GR", "PT", "IS", "CZ", "HU", "SI", "SK", "BG", "PL", "RO", "HR", "LT", "LV", "EE")

patents.europe<-nature[nature$ctry_code %in% europe,] # select European countries
dim(patents.europe) #[1] 3.247.672      16
pat.europe.df<-as.data.frame(patents.europe)
write.table(pat.europe.df, file="pat_europe_app.txt")

# upgrade of dataset with labels for time dimension
pat.europe<-read.table("pat_europe_app.txt") # mamy df
pat.europe$ones<-rep(1, times=dim(pat.europe)[1])
pat.europe$year<-format(as.Date(pat.europe$filing_date), "%Y")
pat.europe$slot<-0
pat.europe$slot[pat.europe$year>=1980 & pat.europe$year<=1984]<-"years_1980_1984"
pat.europe$slot[pat.europe$year>=1985 & pat.europe$year<=1989]<-"years_1985_1989"
pat.europe$slot[pat.europe$year>=1990 & pat.europe$year<=1994]<-"years_1990_1994"
pat.europe$slot[pat.europe$year>=1995 & pat.europe$year<=1999]<-"years_1995_1999"
pat.europe$slot[pat.europe$year>=2000 & pat.europe$year<=2004]<-"years_2000_2004"
pat.europe$slot[pat.europe$year>=2005 & pat.europe$year<=2009]<-"years_2005_2009"
pat.europe$slot[pat.europe$year>=2010 & pat.europe$year<=2014]<-"years_2010_2014"
pat.europe$slots<-0
pat.europe$slots[pat.europe$year>=1980 & pat.europe$year<=1984]<-"slot1"
pat.europe$slots[pat.europe$year>=1985 & pat.europe$year<=1989]<-"slot2"
pat.europe$slots[pat.europe$year>=1990 & pat.europe$year<=1994]<-"slot3"
pat.europe$slots[pat.europe$year>=1995 & pat.europe$year<=1999]<-"slot4"
pat.europe$slots[pat.europe$year>=2000 & pat.europe$year<=2004]<-"slot5"
pat.europe$slots[pat.europe$year>=2005 & pat.europe$year<=2009]<-"slot6"
pat.europe$slots[pat.europe$year>=2010 & pat.europe$year<=2014]<-"slot7"
jitter.lat<-rnorm(dim(pat.europe)[1], 0,0.0001) # avoid overlapping locations - allocation by 1 m
jitter.lng<-rnorm(dim(pat.europe)[1], 0,0.0001)
pat.europe$lat<-pat.europe$lat+jitter.lat
pat.europe$lng<-pat.europe$lng+jitter.lng
write.table(pat.europe, file="pat_europe_app2.txt")

The effect of the above operation is available to download. We selected European subset of de Rassenfosse et al.(2019) dataset, which is pat_europe_app2.txt and should be read as pat.europe object.

# read prepared data
pat.europe<-read.table("pat_europe_app2.txt") # mamy df
pat.europe.sf<-st_as_sf(pat.europe, coords=c("lng", "lat"), crs=4326)

Read countours of countries from the R package rworldmap::

# get maps of countries for ETA 
library(rworldmap)  # for maps of countries
worldMap<-getMap()  # from rworldmap ::, SpatialPolygonsDataFrame
maps<-st_as_sf(worldMap, crs=4326)
head(maps)  # names of countries in maps

2. Maps of European regions

Prepare a map with a population - to calculate total patents per capita

# population in NUTS2 from EUROSTAT with shapefile
library(sf)
library(dplyr)
library(eurostat)
library(giscoR)

# List of countries 
country <- c("FI","GB","DE","IT","CH","FR","AT","DK","BE","LI","LU","NL","SE","IE","NO","ES","MT","GR","PT","IS","CZ","HU","SI","SK","BG","PL","RO", "HR","LT","LV","EE")

# IMPORTANT: Eurostat/GISCO country codes for NUTS use:
# - UK (not GB)
# - EL (not GR)
country_nuts<-country       # Keep everything else as-is
country_nuts[country_nuts == "GB"]<-"UK"
country_nuts[country_nuts == "GR"]<-"EL"

# download NUTS2 geometries (Eurostat GISCO)
# NUTS 2024 is current in GISCO distribution; resolution "01" = 1:1M; EPSG:4326
nuts2<-gisco_get_nuts(year=2016, nuts_level=2, resolution="01", epsg=4326) %>% filter(CNTR_CODE %in% country_nuts)

# download population at NUTS2 (Eurostat) - recommended: tgs00096 (Population on 1 January by NUTS 2 region)
pop<-get_eurostat("tgs00096", time_format="num")
pop_2016 <- pop %>% filter(TIME_PERIOD==2016) %>% transmute(NUTS_ID=geo, pop=values, year=TIME_PERIOD)

# join population onto geometry
nuts2_pop<-nuts2 %>% left_join(pop_2016, by="NUTS_ID")

# exclude far-away islands
exclude_nuts <- c(
  "^FRY", "^FR9",   # French overseas
  "^ES70",          # Canary Islands
  "^PT20", "^PT30", # Azores, Madeira
  "^NO0B"           # Svalbard
)

nuts2_conti<-nuts2_pop %>% filter(!grepl(paste(exclude_nuts, collapse="|"), NUTS_ID))
nuts2_conti.df<-as.data.frame(nuts2_conti)

# export ESRI Shapefile
st_write(nuts2_pop, "nuts2_population.shp", delete_layer=TRUE)

# plot population count on a regional level
ggplot() + geom_sf(data=nuts2_conti, aes(fill=pop)) + theme_minimal() + scale_fill_distiller(palette = "Spectral")

Regional NUTS2 map of population

Assign patents to regions to obtain patents per capita

# assign patents to regions (to the map with population) - to know which patent is located in which region
temp1<-st_join(pat.europe.sf, nuts2_conti, join=st_intersects)
#st_write(temp1, "pat.europe.NUTS2label.shp", delete_layer=TRUE) # save the output
#temp1.df<-as.data.frame(temp1)
#write.table(temp1.df, "pat.europe.NUTS2label.txt") # save the joined databases without map attached

# aggregate patents by NUTS regions to count total number (sum)
temp1.agg<-aggregate(temp1$ones, by=list(temp1$geo), sum) 

# merge datasets
temp2<-merge(nuts2_conti, temp1.agg, by.x="NUTS_ID", by.y="Group.1", all.x=TRUE, sort=FALSE)
temp3<-merge(temp2, all, by.x="CNTR_CODE", by.y="country", all.x=TRUE)
temp2$pat.per.capita<-temp2$x/temp2$pop # patents per capita

var<-"pat.per.capita"  # name of variable to be analysed
sf_obj<-temp2 # name of object to be used

# thresholds and labels for the legend (increasingly)
brks<-c(-Inf, 0.0005, 0.001, 0.0025, 0.005, 0.0075, 0.01, 0.05, 0.1, Inf)
labs<-c("<0.0005", "<0.001", "<0.0025", "<0.005", "<0.0075", "<0.01", "<0.05", "<0.1", "<0.2")

sf_obj2<-sf_obj %>% mutate(cls=cut(.data[[var]], breaks=brks, labels=labs,
include.lowest=TRUE, right=FALSE)) %>% mutate(cls=factor(cls, levels=labs))

n<-length(labs) # number of colours
pal<-viridis(n, direction = -1) # colour palette

ggplot(sf_obj2) + geom_sf(aes(fill=cls), color="white", linewidth=0.3) +
  scale_fill_manual(values=pal, drop=FALSE, na.value="grey90") +
  guides(fill=guide_legend(title=NULL)) + theme_void() + theme(legend.position="bottom")

Figure 1: The average annual number of patents issued between 1980-2014 grouped in regions by innovator locations (per 1,000 inhabitants)

Calculate and map the growth rate of patents between first and last years

# growth rate of patents between slot1 and slot7
temp1.agg1<-aggregate(temp1$ones[temp1$slots=="slot1"], by=list(temp1$geo[temp1$slots=="slot1"]), sum)  # patents in slot1
temp1.agg2<-aggregate(temp1$ones[temp1$slots=="slot2"], by=list(temp1$geo[temp1$slots=="slot2"]), sum)  # patents in slot2
temp1.agg3<-aggregate(temp1$ones[temp1$slots=="slot3"], by=list(temp1$geo[temp1$slots=="slot3"]), sum)  # patents in slot3
temp1.agg4<-aggregate(temp1$ones[temp1$slots=="slot4"], by=list(temp1$geo[temp1$slots=="slot4"]), sum)  # patents in slot4
temp1.agg5<-aggregate(temp1$ones[temp1$slots=="slot5"], by=list(temp1$geo[temp1$slots=="slot5"]), sum)  # patents in slot5
temp1.agg6<-aggregate(temp1$ones[temp1$slots=="slot6"], by=list(temp1$geo[temp1$slots=="slot6"]), sum)  # patents in slot6
temp1.agg7<-aggregate(temp1$ones[temp1$slots=="slot7"], by=list(temp1$geo[temp1$slots=="slot7"]), sum)  # patents in slot7

# merge datasets
temp3<-merge(nuts2_conti, temp1.agg1, by.x="NUTS_ID", by.y="Group.1", all.x=TRUE, sort=FALSE)
temp4<-merge(temp3, temp1.agg7, by.x="NUTS_ID", by.y="Group.1", all.x=TRUE, sort=FALSE)
temp4$growth.rate<-temp4$x.y/temp4$x.x # growth rate of patents between time slot7 and slot1

var<-"growth.rate"  # name of the variable to be plotted
sf_obj<-temp4 # name of the sf object to be used

# threholds and lables for the legend
brks<-c(-Inf, 1, 2, 4, 6, 8, 10, 50, 100, Inf)
labs<-c("<1", "<2", "<4", "<6", "<8", "<10", "<50", "<100", "<1000")

sf_obj2<-sf_obj %>% mutate(cls=cut(.data[[var]], breaks=brks, labels=labs,
include.lowest=TRUE, right=FALSE)) %>% mutate(cls=factor(cls, levels=labs))

n<-length(labs)
pal<-viridis(n, direction=-1) # colour palette

ggplot(sf_obj2) + geom_sf(aes(fill=cls), color="white", linewidth=0.3) +
  scale_fill_manual(values=pal, drop=FALSE, na.value="grey90") +
  guides(fill=guide_legend(title=NULL)) + theme_void() + theme(legend.position="bottom")

Figure 2: Regional growth rate of number of partents between 1980-1984 and 2010-2014

3. Circular histograms for patents by regions in selected country

# circular histograms for a selected year
# use pat.europe object
temp1<-pat.europe

# subset for a selected year
my.year==1980 # selected year
temp1.agg<-aggregate(temp1$ones[temp1$year.x==my.year], by=list(temp1$geo[temp1$year.x==1980]), sum)
temp2<-merge(nuts2_conti, temp1.agg, by.x="NUTS_ID", by.y="Group.1", all.x=TRUE, sort=FALSE)
sub<-data.frame(country=temp2$CNTR_CODE, regions=temp2$NAME_LATN, patents=temp2$x)

# function to process data and to generate radial histogram
plot_radial<-function(data, country_code) {
  df_plot<-data %>% filter(country==country_code) %>%
    arrange(desc(patents)) %>% mutate(id = row_number(),
      angle = 90 - 360 * (id - 0.5) / n(),
      hjust = ifelse(angle < -90, 1, 0),
      angle = ifelse(angle < -90, angle + 180, angle))
  gap<-max(df_plot$patents, na.rm = TRUE) * 0.015

ggplot(df_plot, aes(x = factor(id), y=patents, fill=patents)) + geom_col(width=1, color="white") + coord_polar(start=0) + theme_void() + geom_text(aes(y=patents*0.5, label=patents), color="white", size=3.2, fontface="bold") + geom_text(aes(y=patents + gap, label=regions, hjust=hjust, angle=angle), size=3) + scale_fill_viridis_c(option="D", guide="none", direction=1)
}

# run the function and plot radial histogram
plot_radial(sub, "PL")

Plots of radial histogram

4. Calculate Gini index for regional patent data

Calculate Gini index from de Rassenfosse et al.(2019) data

This is the code we used to derive Gini for cumulative number of patents for each year between 1980-2014 for each European country. Please use already aggregated regionally data, available as pat_nat_reg_n.txt and import it to data.frame n.nat object.

# read data on patent counts by regions - aggregated from de Rassenfosse et al. (2019) data
n.nat<-read.delim("pat_nat_reg_n.txt", sep="\t", dec =".", fileEncoding="UTF-16LE", stringsAsFactors=FALSE, check.names=FALSE)
n.nat<-n.nat[,1:39]

# Gini in 5-years rolling window for each region
year_cols<-grep("^y\\d{4}$", names(n.nat), value=TRUE)

n.nat2<-n.nat %>% mutate(across(all_of(year_cols), ~ as.numeric(gsub(",", ".", gsub("[^0-9\\.]", "", as.character(.x))))))

df_long<-n.nat2 %>% pivot_longer(cols=all_of(year_cols), names_to="Year", values_to="Value") %>% mutate(Year=as.integer(sub("^y", "", Year)))

window_size<-5

region_window_sum <- df_long %>%
  mutate(window_end = Year + (window_size - 1)) %>%
  filter(window_end <= max(Year)) %>%
  group_by(east.west, Country, Name, window_end) %>%
  summarise(WindowSum = sum(Value, na.rm = TRUE), .groups = "drop")

gini_rolling<-region_window_sum %>% group_by(east.west, Country, window_end) %>% summarise(Gini=if (sum(WindowSum, na.rm = TRUE) == 0) 0 else ineq(WindowSum, type = "Gini"), .groups="drop") %>% rename(Year=window_end)
gini_wide<-gini_rolling %>%  mutate(Year=as.integer(Year)) %>%
  pivot_wider(id_cols=c(east.west, Country), names_from=Year, values_from=Gini, names_prefix="Gini_") %>% arrange(east.west, Country)
#gini.wide.df<-as.data.frame(gini_wide)
#write.table(gini.wide.df, "gini_rolling_nature.txt")

# linear plot - countries across years - for Western Europe countries
# for Central-Eastern countries please change to 'east' at data selction and in the title of figure

# prepare data
west_data<-gini_rolling %>% filter(east.west=="east")

# general average (exclude zeros)
west_avg<-west_data %>% filter(Gini>0) %>% group_by(Year) %>% summarise(Mean_Gini=mean(Gini, na.rm=TRUE), .groups="drop")

# labels at start and end period
lab_start<-west_avg %>% slice_min(Year, with_ties=FALSE) %>% mutate(label=sprintf("Avg: %.2f", Mean_Gini))
lab_end<-west_avg %>% slice_max(Year, with_ties=FALSE) %>% mutate(label=sprintf("Avg: %.2f", Mean_Gini))

# plot
ggplot() + geom_line(data=west_data %>% filter(Gini>0), aes(x=Year, y=Gini, group=Country, color=Country), linewidth=1, alpha=0.9) + geom_point(data=west_data %>% filter(Gini>0), aes(x=Year, y=Gini, color=Country), size=2) + geom_line(data=west_avg, aes(x=Year, y=Mean_Gini), color="black", linewidth=1.5) + geom_point(data=west_avg, aes(x=Year, y=Mean_Gini), color="black", size=2) + geom_label(data=lab_start, aes(x=Year, y=Mean_Gini, label=label), fill="white", color="black", label.size=0.3, hjust=0, nudge_x=0.5) + geom_label(data=lab_end, aes(x=Year, y=Mean_Gini, label=label), fill="white", color="black", label.size=0.3, hjust=1, nudge_x=-0.5) + theme_minimal() + labs(title="Rolling 5-year Gini Index by countries (East)\nbased on individual point data", x="Year (end of 5-year window)", y="Gini index", color="Country" ) + theme(plot.title=element_text(face="bold"), legend.position="right")

Gini coefficient for a regional number of patents in Western European and Central-Eastern European countries - rolling window of 5-years average - years 1980-2014

Calculate Gini index from the OECD Regions and cities atlas data

This is the code we used to derive Gini for cumulative number of patents for each year between 1995-2022 for each European country. Please use already aggregated regionally data, available as oecd_tl3.txt and import it to data.frame n.oecd3 object.

# read data on patent counts by regions - from OECD
n.oecd3<-read.delim("oecd_tl3.txt", sep = "\t", dec = ".", fileEncoding = "UTF-16LE", stringsAsFactors = FALSE, check.names = FALSE)
n.oecd3[is.na(n.oecd3)]<-0

# Gini index in 5-years rolling window for each country
df_long<-n.oecd3 %>% pivot_longer(cols=matches("^y\\d{4}$"), names_to="Year", values_to="Value") %>% mutate(Year=as.integer(sub("^y_", "", Year)))
window_size<-5
df_windows<-df_long %>% group_by(Country, Name) %>% arrange(Year) %>% mutate(window_id=Year-(window_size-1)) %>% ungroup()

region_window_sum<-df_windows %>% filter(window_id >= min(Year)) %>% group_by(east.west, Country, Name, window_id) %>% summarise(WindowSum=sum(Value, na.rm=TRUE), .groups="drop")

gini_rolling<-region_window_sum %>% group_by(east.west, Country, window_id) %>% summarise(Gini=if (sum(WindowSum) == 0) 0 else ineq(WindowSum, type="Gini"), .groups="drop" ) %>% rename(Year=window_id)

gini_wide<-gini_rolling %>% mutate(Year=as.integer(Year)) %>% pivot_wider(id_cols=c(east.west, Country), names_from=Year, values_from=Gini, names_prefix="Gini_") %>% arrange(east.west, Country)
gini.wide.df<-as.data.frame(gini_wide)
#write.table(gini.wide.df, "gini_rolling_oecd_TL3.txt")

Please use already aggregated regionally data, available as gini_rolling_oecd_TL3.txt and import it to data.frame gini_rolling object.

# linear plot - countries across years - for Western Europe countries
# for Central-Eastern countries please change to 'east' at data selction and in the title of figure

# read and prepare data
gini_rolling<-read.table("gini_rolling_oecd_TL3.txt")
west_data<-gini_rolling %>% filter(east.west=="east")

# calculate average (exclude zeros)
west_avg<-west_data %>% filter(Gini>0) %>% group_by(Year) %>% summarise(Mean_Gini=mean(Gini, na.rm=TRUE), .groups="drop")

# create labels at start and end periods
lab_start<-west_avg %>% slice_min(Year, with_ties=FALSE) %>% mutate(label=sprintf("Avg: %.2f", Mean_Gini))
lab_end<-west_avg %>% slice_max(Year, with_ties=FALSE) %>% mutate(label=sprintf("Avg: %.2f", Mean_Gini))

# plot
ggplot() + geom_line(data=west_data %>% filter(Gini>0), aes(x=Year, y=Gini, group=Country, color=Country), linewidth=1, alpha=0.9) + geom_point(data=west_data %>% filter(Gini>0), aes(x=Year, y=Gini, color=Country), size=2) + geom_line(data=west_avg, aes(x=Year, y=Mean_Gini), color="black", linewidth=1.5) + geom_point(data=west_avg, aes(x=Year, y=Mean_Gini), color="black", size=2) + geom_label(data=lab_start, aes(x=Year, y=Mean_Gini, label=label), fill="white", color="black", label.size=0.3, hjust=0, nudge_x=0.5) + geom_label(data=lab_end, aes(x=Year, y=Mean_Gini, label=label), fill="white", color="black", label.size=0.3, hjust=1, nudge_x=-0.5) + theme_minimal() + labs(title="Rolling 5-year Gini Index by countries (East)\nbased on TL3 OECD data", x="Year (end of 5-year window)", y="Gini index", color="Country") + theme(plot.title=element_text(face="bold"), legend.position="right")

Gini coefficient for a regional number of patents in Western European and Central-Eastern European countries - rolling window of 5-years average - years 1995-2022

5. Evolution of spatial concentration of patents in regions

Please use already aggregated regionally data, available as Gini.xlsx and import it to data.frame df object.

library(tidyverse)
library(readxl)

# Load data
df<-read_excel("Gini.xlsx")

# prepare data and visualise
# reshape wide to long format
long<-df %>% pivot_longer(cols=starts_with("Gini_"), names_to="year", values_to="gini") %>% mutate(year=as.integer(str_remove(year, "Gini_")), east_west=east.west)

# overall mean and standar deviation
overall_df<-long %>% group_by(year) %>% summarise(mean_gini=mean(gini, na.rm=TRUE), sd_gini=sd(gini, na.rm=TRUE), .groups="drop")

# average values in groups of East / West countries
ew_df<-long %>% group_by(year, east_west) %>% summarise(mean_gini=mean(gini, na.rm=TRUE), .groups="drop")

# plot
ggplot(overall_df, aes(x=year, y=mean_gini)) + geom_ribbon(aes(ymin=mean_gini-sd_gini, ymax=mean_gini+sd_gini), alpha=0.25) + geom_line(linewidth=1.3) + geom_line(data=ew_df, aes(x=year, y=mean_gini, linetype=east_west), linewidth=1) + labs(x="Year", y="Average Gini (within-region patent concentration)", title="U-shaped evolution of patent concentration", subtitle="Overall mean with ±1 SD ribbon; East and West averages shown as lines", linetype="Region") + theme_minimal(base_size=12) + theme(legend.position="bottom")

Figure 6: U-shaped temporal trajectory of Gini index – changes of spatial concentration of innovations

6. Cumulative distributions of patents in 1980 and 2014

Please use already aggregated regionally data, available as n_nature.xlsx and import it to data.frame df object.

# read data
df<-read_excel("n_nature.xlsx", sheet=1)
df<-df %>% filter(east.west=="west") # for CEE countries change to 'east'

# define periods
early_years<-1980
late_years<-2014

early_cols<-paste0("y", early_years)
late_cols<-paste0("y", late_years)

# aggregate patents by region
df2<-df %>% mutate(patents_early=rowSums(across(all_of(early_cols)), na.rm=TRUE), patents_late=rowSums(across(all_of(late_cols)), na.rm=TRUE)) %>% filter(patents_early>0 | patents_late>0)

# order regions by later-period patents
df_ord<-df2 %>% arrange(desc(patents_late), desc(patents_early)) %>% mutate(rank=row_number())

# cumulative percentages
sum_early<-sum(df_ord$patents_early)
sum_late<-sum(df_ord$patents_late)

plot_df<-df_ord %>% transmute(rank, cum_early=cumsum(patents_early) / sum_early * 100,
    cum_late=cumsum(patents_late) / sum_late  * 100)
n_regions<-max(plot_df$rank)

# plot
ggplot(plot_df, aes(x=rank)) + geom_ribbon(aes(ymin=0, ymax=cum_early), fill="grey80") + geom_ribbon(aes(ymin=cum_early, ymax=cum_late), fill="#1f4e99", alpha=0.9) + geom_line(aes(y=cum_early), linewidth=1, color="black", linetype="dashed") + geom_line(aes(y=cum_late), linewidth=1.2, color="black") + geom_abline(intercept=0, slope=100 / n_regions, linetype="dotted") + scale_x_continuous(breaks=seq(0, n_regions, by=25), limits=c(0, n_regions)) + scale_y_continuous(limits=c(0, 100), breaks=seq(0, 100, by=10) ) + labs(x="Cumulative number of regions", y="Cumulative percent of patents", title="Western European countries", subtitle="Cumulative curves for 2014 (blue) and 1980 (grey)") + theme_minimal(base_size=12) + theme(panel.grid.minor=element_blank(), panel.grid.major.x=element_blank())

Figure 7: Regional concentration of patent grouped by innovator locations: (a) Central-Eastern Europe, (b) West Europe (2014- grey area, 1980 – gray and blue area)

8. ETA (Entropy-Tesselation-Agglomeration) for point data from de Rassenfosse et al.(2019)

# function for ETA (Entropy-Tesselation-Agglomeration)
# it is also available in spatialWarsaw:: package
own.eta.noplot<-function(points_sf, region_sf) { 
crds <- as.data.frame(st_coordinates(points_sf))
colnames(crds) <- c("X_coord", "Y_coord")
crds_sf<-st_as_sf(crds, coords=c("X_coord", "Y_coord"), crs=st_crs(points_sf))

crds_sf <- st_transform(crds_sf, crs = 3857)
region_sf <- st_transform(region_sf, crs = 3857)
crds_sfc <- st_geometry(crds_sf)
region_sfc <- st_geometry(region_sf)
crds_sfc_union <- st_union(crds_sfc)
tess_result <- st_voronoi(crds_sfc_union, region_sfc)
tess_result<-st_intersection(st_cast(tess_result), st_union(region_sfc))
tess_area <- st_area(tess_result)
tess_area_rel <- tess_area/sum(tess_area)
S_ent <- sum(-1 * tess_area_rel * log(tess_area_rel))
ent.ref <- log(1/length(tess_area)) * (-1)
H_ent <- S_ent/ent.ref
list(H_ent = as.numeric(H_ent), n_points = round(length(tess_area), 2))
}

# prepare classification and selection objects
country<-c("FI", "GB", "DE", "IT", "CH", "FR", "AT", "DK", "BE", "LI", "LU", "NL", "SE", "IE", "NO", "ES", "MT", "GR", "PT", "IS", "CZ", "HU", "SI", "SK", "BG", "PL", "RO", "HR", "LT", "LV", "EE") # 32 countries, i
country2<-c("Finland", "Great Britain", "Germany", "Italy", "Switzerland", "France", "Austria", "Denmark", "Belgium", "Lichtenstein", "Luxemburg", "Netherlands", "Sweden", "Irland", "Norway", "Spain", "Malta", "Greece", "Portugal", "Island", "Czech", "Hungary", "Slovenia", "Slovakia", "Bulgaria", "Poland", "Romania", "Croatia", "Lithuania", "Lativa", "Estonia") # countries labeled as i
east.west<-c("west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "west", "east", "east", "east", "east", "east", "east", "east", "east", "east", "east", "east")
all<-cbind(country, country2, east.west)
period<-c("slot1", "slot2", "slot3", "slot4", "slot5", "slot6", "slot7") # j

# Gini, ETA and n for cumulative number of patents by regions

output.gini.cum<-matrix(0, ncol=31, nrow=35) # rows for 35 years, cols for 31 countries
output.eta.cum<-matrix(0, ncol=31, nrow=35) # rows for 35 years, cols for 31 countries
output.n.cum<-matrix(0, ncol=31, nrow=35) # rows for 35 years, cols for 31 countries

for(i in 1:31){ # by countries
my.country<-country[i]
my.map<-maps[maps$ISO_A2==my.country,]
n.reg<-length(unique(pat.europe.sf$name_1[pat.europe.sf$ctry_code==my.country]))

for(j in 1:35){ # by years
my.pat<-pat.europe.sf[pat.europe.sf$ctry_code==my.country & pat.europe.sf$year<1980+j, ]

my.eta<-eta(my.pat, my.map) # calculate ETA, with spatialWarsaw:: package
output.eta.cum[j,i]<-my.eta$H_ent
output.n.cum[j,i]<-my.eta$n_points

my.agg<-aggregate(my.pat$ones, by=list(my.pat$name_1), sum)
n.reg.diff<-n.reg-dim(my.agg)[1]
x<-c(my.agg$x, rep(0, times=n.reg.diff))
my.gini<-Gini(x)        # Gini by regions
output.gini.cum[j,i]<-my.gini
}
}

# save outputs 
write.table(output.eta.cum, file="output_eta_cum.txt")
write.table(output.n.cum, file="output_n_cum.txt")
write.table(output.gini.cum, file="output_gini_cum.txt")

ETA index for European countries for slots 1980-1084 and 2010-2014

patent_diffusion

Katarzyna Kopczewska

2026-02-22