total <- read.csv("~/Desktop/PKP/TotalAggregated.csv", header = TRUE, sep = ",", dec = ".")
region_name_mapping <- c("EU" = "Europe", "US" = "North America",
"JUS" = "South America", "ANZ" = "Oceania",
"ASIA" = "Asia")
names(total)[names(total) == "Country"] <- "Region"
# Update the Country column in your dataset using the mapping
total <- total %>%
mutate(Region = region_name_mapping[Region])
total$Year <- lubridate::year(total$Date)
# Group by Region, Year, and all other columns in the original dataset
# Summarize by summing variables (excluding Region and Date) for each year
aggregated_data <- total %>%
group_by(across(-c(Region, Date))) %>%
dplyr::summarize(across(where(is.numeric), sum))
## `summarise()` has grouped output by 'Papers', 'Paper_Citations', 'Articles',
## 'Patents', 'Citations', 'AI_Projects', 'Projects', 'Funding', 'norm_Papers',
## 'norm_Paper_Citations', 'norm_Articles', 'norm_Patents', 'norm_Citations',
## 'norm_AI_Projects', 'norm_Projects', 'norm_Funding'. You can override using the
## `.groups` argument.
# Filter out the rows with specific years
filtered_data <- total %>%
filter(Year %in% 2010:2022)
# Group by Region and Year, and summarize by summing variables (excluding Region and Date) for each year
summed_data <- filtered_data %>%
group_by(Region, Year) %>%
dplyr::summarize(across(where(is.numeric), sum))
## `summarise()` has grouped output by 'Region'. You can override using the
## `.groups` argument.
# Append the aggregated rows back to the original dataset
total_aggregated <- bind_rows(aggregated_data, summed_data)
total_aggregated <- total_aggregated %>%
filter(!is.na(Region))
total_aggregated <- total_aggregated %>%
arrange(Region, Year)
total_aggregated <- total_aggregated %>%
select(Year, Region, everything())
print(total_aggregated)
## # A tibble: 65 × 18
## # Groups: Papers, Paper_Citations, Articles, Patents, Citations, AI_Projects,
## # Projects, Funding, norm_Papers, norm_Paper_Citations, norm_Articles,
## # norm_Patents, norm_Citations, norm_AI_Projects, norm_Projects, norm_Funding
## # [65]
## Year Region Papers Paper_…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵ Funding
## <dbl> <chr> <dbl> <dbl> <int> <int> <int> <int> <int> <dbl>
## 1 2010 Asia 150193. 972822. 0 11578 127443 105 9 1.30e6
## 2 2011 Asia 154602. 1060488. 0 12055 116129 288 8 1.51e6
## 3 2012 Asia 145329. 1019775. 0 11677 97100 612 7 6.50e5
## 4 2013 Asia 144896. 1103366. 0 12777 94105 1342 6 6.57e5
## 5 2014 Asia 141779. 1203931. 3422 13023 83764 2846 4 9.52e5
## 6 2015 Asia 130611. 1339516. 7304 11537 61359 5935 5 7.10e5
## 7 2016 Asia 124234. 1432777. 33292 9110 35998 12397 20 7.09e5
## 8 2017 Asia 134931. 1629925. 63831 9034 25576 30076 11 3.41e6
## 9 2018 Asia 162308. 1818648. 129247 12084 20566 46888 9 1.55e6
## 10 2019 Asia 192570. 1888756. 281760 13356 10650 56660 15 1.96e6
## # … with 55 more rows, 8 more variables: norm_Papers <dbl>,
## # norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## # norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## # norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## # ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
sum_patent <- read.csv("indicators_econ/Patents/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
sum_patent <- rename(sum_patent, Month = YearMonth,
Patents = X..Patents, Patents_norm = X..Patents..normalized.,
Citations = X..Citations, Citations_norm = X..Citations..normalized.)
sum_patent$Date <- as.Date(paste(sum_patent$Month, "-01", sep = ""), format = "%Y-%m-%d")
sum_patent <- sum_patent[sum_patent$Date >= as.Date("2010-01-01"), ]
sum_patent$Month <- NULL
sum_patent <- sum_patent %>%
group_by(Country) %>%
dplyr::summarise(Sum_Patents = sum(Patents))
sum_patent <- sum_patent[!sum_patent$Country == "ALL",]
sum_patent <- sum_patent[!sum_patent$Country == "G7",]
sum_patent <- sum_patent[!sum_patent$Country == "G20",]
sum_patent <- sum_patent[!sum_patent$Country == "OECD",]
sum_patent <- sum_patent[!sum_patent$Country == "#Type!",]
sum_patent[sum_patent$Country == "CHN", 2] <-
sum_patent[sum_patent$Country == "CHN", 2] + sum_patent[sum_patent$Country == "TWN", 2]
sum_patent <- sum_patent[!sum_patent$Country == "TWN",]
sum_patent <- sum_patent %>%
arrange(desc(Sum_Patents))
sum_patent <- sum_patent[1:20,]
gdp <- data.frame(
gpd_per_capita = c(76398.6, 37149.6, 33815.3, 32254.6, 40963.8, 12720.2, 48432.5, 45850.4,
34158.0, 92101.5, 55873.2, 54966.5, 64491.4, 53757.9, 50536.6,
2388.6, 55985.4, 54659.8, 82807.6, 48983.6)
)
sum_patent$gdp_per_capita <- gdp$gpd_per_capita
head(sum_patent)
## # A tibble: 6 × 3
## Country Sum_Patents gdp_per_capita
## <chr> <int> <dbl>
## 1 USA 180516 76399.
## 2 EU27 74500 37150.
## 3 JPN 62378 33815.
## 4 KOR 38667 32255.
## 5 FRA 32278 40964.
## 6 CHN 29863 12720.
average_by_year <- total_aggregated %>%
group_by(Year) %>%
dplyr::summarize(average_norm_Papers = mean(norm_Papers, na.rm = TRUE))
ggplot(total_aggregated, aes(x = Year, y = norm_Papers, color = Region, group = Region)) +
geom_line(size = 1) +
geom_line(data = average_by_year, aes(x = Year, y = average_norm_Papers, group = 1),
color = "red", linetype = "dashed") +
labs(title = "Number of Research Papers per million people\nfrom 2010-2022 by Region",
x = "Year", y = "Number of Scientific Papers per\nmillion inhabitants") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2022, by = 2), limits = c(2010, 2022)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.

# Calculate average norm_Papers value for each date
average_papers_by_date <- total_aggregated %>%
group_by(Year) %>%
dplyr::summarize(Average_norm_Papers = mean(norm_Papers))
# Merge the average back into the original data
total_papers_aggregated <- merge(total_aggregated, average_papers_by_date, by = "Year")
# Calculate the index values
total_papers_aggregated$Index <- total_papers_aggregated$norm_Papers -
total_papers_aggregated$Average_norm_Papers
# Create a bar chart using ggplot
ggplot(total_papers_aggregated, aes(x = Year, y = Index, fill = Region)) +
geom_bar(stat = "identity") +
labs(title = "Number of Research Papers above or below average\nby Region and Year from 2010-2022",
x = "Year", y = "Index") +
theme_minimal() +
scale_fill_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2022, by = 2))

year_divisors <- list(
list(year = "2010", divisor = 13),
list(year = "2011", divisor = 12),
list(year = "2012", divisor = 11),
list(year = "2013", divisor = 10),
list(year = "2014", divisor = 9),
list(year = "2015", divisor = 8),
list(year = "2016", divisor = 7),
list(year = "2017", divisor = 6),
list(year = "2018", divisor = 5),
list(year = "2019", divisor = 4),
list(year = "2020", divisor = 3),
list(year = "2021", divisor = 2),
list(year = "2022", divisor = 1)
)
# Initialize new columns for the divided values
total_aggregated$norm_Paper_Citations_year <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- total_aggregated %>%
filter(Year == selected_year)
total_aggregated$norm_Paper_Citations_year[total_aggregated$Year %in% selected_rows$Year] <-
selected_rows$norm_Paper_Citations / divisor
}
ggplot(total_aggregated,
aes(x = Year, y = norm_Paper_Citations_year, color = Region, group = Region)) +
geom_line(size = 1) +
labs(title = "Number of Research Paper Citations (per Year)\nper million people from 2010-2022 by Region",
x = "Year", y = "Number of Citations per Year") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2022, by = 2), limits = c(2010, 2022)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)

ggplot(total_aggregated[total_aggregated$Year >= 2014, ],
aes(x = Year, y = norm_Articles, color = Region, group = Region)) +
geom_line(size = 1) +
labs(title = "Number of AI related news Articles per million\npeople from 2010-2022 by Region",
x = "Year", y = "Number of AI news Articles\nper million inhabitants") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2014, 2022, by = 2), limits = c(2014, 2022)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)

ggplot(total_aggregated[total_aggregated$Year <= 2021, ],
aes(x = Year, y = norm_Patents, color = Region, group = Region)) +
geom_line(size = 1) +
labs(title = "Number of AI related Patents per million people\nfrom 2010-2022 by Region",
x = "Year", y = "Number of AI Patents per\nmillion inhabitants") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2021, by = 2), limits = c(2010, 2021)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)

sum_patent[sum_patent$Country == "JPN", 1] <- "Japan"
sum_patent[sum_patent$Country == "GBR", 1] <- "UK"
sum_patent[sum_patent$Country == "DEU", 1] <- "Germany"
sum_patent[sum_patent$Country == "ITA", 1] <- "Italy"
sum_patent[sum_patent$Country == "FRA", 1] <- "France"
sum_patent[sum_patent$Country == "KOR", 1] <- "South Korea"
sum_patent[sum_patent$Country == "CHN", 1] <- "China"
sum_patent[sum_patent$Country == "CHE", 1] <- "Switzerland"
ggplot(sum_patent, aes(x = gdp_per_capita, y = Sum_Patents, label = Country)) +
geom_point(shape = 16, size = 3, color = "black") +
labs(title = "Scatter Plot of GDP per Capita vs. Patents",
x = "GDP per Capita", y = "Patents") +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_blank(),
legend.text = element_text(size = 12))+
geom_text(nudge_x = 4000, nudge_y = 200) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(labels = scales::comma, breaks = seq(0, 100000, by = 25000), limits = c(0, 100000))

ggplot(sum_patent[1:10,],
aes(x = gdp_per_capita, y = Sum_Patents, label = Country)) +
geom_point(shape = 16, size = 4, color = "black") +
geom_segment(aes(xend = gdp_per_capita, yend = Sum_Patents), color = "grey80") +
geom_text_repel(nudge_x = 2000, nudge_y = 10) +
labs(title = "Scatter Plot of GDP per Capita vs. Patents",
x = "GDP per Capita (in USD)", y = "Number of Patents") +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_y_continuous(labels = scales::comma) +
scale_x_continuous(labels = scales::comma, breaks = seq(0, 100000, by = 20000), limits = c(0, 100000))

ggplot(total_aggregated, aes(x = Year, y = norm_AI_Projects, color = Region, group = Region)) +
geom_line(size = 1) +
labs(title = "Number of AI software related Projects per million people\nfrom 2010-2022 by Region",
x = "Year", y = "Number of AI software Projects\nper million inhabitants") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2022, by = 2), limits = c(2010, 2022)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)

ggplot(total_aggregated, aes(x = Year, y = norm_Projects, color = Region, group = Region)) +
geom_line(size = 1) +
labs(title = "Number of AI related Projects per million people\nfrom 2010-2022 by Region",
x = "Year", y = "Number of AI Projects") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2022, by = 2), limits = c(2010, 2022)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)

ggplot(total_aggregated, aes(x = Year, y = norm_Funding, color = Region, group = Region)) +
geom_line(size = 1) +
labs(title = "Amount of AI Project Funding per million people\nfrom 2010-2022 by Region",
x = "Year", y = "Amount of AI Project Funding") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2022, by = 2), limits = c(2010, 2022)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)

job <- read.csv("indicators_econ/Adzuna/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
job <- rename(job, Postings = X..Job.postings, Month = YearMonth)
job$Date <- as.Date(paste(job$Month, "-01", sep = ""), format = "%Y-%m-%d")
job <- job[job$Date >= as.Date("2010-01-01"), ]
job$Month <- NULL
job$Year <- lubridate::year(job$Date)
# Group by Country, Year, and all other columns in the original dataset
# Summarize by summing variables (excluding Country and Date) for each year
aggregated_data <- job %>%
group_by(across(-c(Country, Date))) %>%
dplyr::summarize(across(where(is.numeric), sum))
## `summarise()` has grouped output by 'Postings'. You can override using the
## `.groups` argument.
# Filter out the rows with specific years
filtered_data <- job %>%
filter(Year %in% 2010:2022)
# Group by Country and Year, and summarize by summing variables (excluding Country and Date) for each year
summed_data <- filtered_data %>%
dplyr::group_by(Country, Year) %>%
dplyr::summarize(dplyr::across(where(is.numeric), sum))
## `summarise()` has grouped output by 'Country'. You can override using the
## `.groups` argument.
# Append the aggregated rows back to the original dataset
job_aggregated <- bind_rows(aggregated_data, summed_data)
job_aggregated <- job_aggregated %>%
filter(!is.na(Country))
job_aggregated <- job_aggregated %>%
arrange(Country, Year)
job_aggregated <- job_aggregated %>%
select(Year, Country, everything())
print(job_aggregated)
## # A tibble: 102 × 3
## # Groups: Postings [102]
## Year Country Postings
## <dbl> <chr> <int>
## 1 2017 ALL 91424
## 2 2018 ALL 173064
## 3 2019 ALL 190336
## 4 2020 ALL 201221
## 5 2021 ALL 164823
## 6 2022 ALL 322907
## 7 2017 AUS 2917
## 8 2018 AUS 5749
## 9 2019 AUS 5664
## 10 2020 AUS 3999
## # … with 92 more rows
ggplot(job_aggregated[job_aggregated$Country == "ALL",], aes(x = Year, y = Postings)) +
geom_area(size = 1, fill = "lightgrey", col = "black") +
labs(title = "Number of total Job Postings from 2017-2022", x = "Year", y = "Number of Job Postings related to AI") +
scale_fill_manual(values = "skyblue") +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2017, 2022, by = 1), limits = c(2017, 2022)) +
scale_y_continuous(labels = scales::comma)

head(job_aggregated, 10)
## # A tibble: 10 × 3
## # Groups: Postings [10]
## Year Country Postings
## <dbl> <chr> <int>
## 1 2017 ALL 91424
## 2 2018 ALL 173064
## 3 2019 ALL 190336
## 4 2020 ALL 201221
## 5 2021 ALL 164823
## 6 2022 ALL 322907
## 7 2017 AUS 2917
## 8 2018 AUS 5749
## 9 2019 AUS 5664
## 10 2020 AUS 3999
total_aggregated$norm_cpp <-
as.numeric(total_aggregated$norm_Paper_Citations_year/total_aggregated$norm_Papers)
head(total_aggregated)
## # A tibble: 6 × 20
## # Groups: Papers, Paper_Citations, Articles, Patents, Citations, AI_Projects,
## # Projects, Funding, norm_Papers, norm_Paper_Citations, norm_Articles,
## # norm_Patents, norm_Citations, norm_AI_Projects, norm_Projects, norm_Funding
## # [6]
## Year Region Papers Paper_C…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵ Funding
## <dbl> <chr> <dbl> <dbl> <int> <int> <int> <int> <int> <dbl>
## 1 2010 Asia 150193. 972822. 0 11578 127443 105 9 1.30e6
## 2 2011 Asia 154602. 1060488. 0 12055 116129 288 8 1.51e6
## 3 2012 Asia 145329. 1019775. 0 11677 97100 612 7 6.50e5
## 4 2013 Asia 144896. 1103366. 0 12777 94105 1342 6 6.57e5
## 5 2014 Asia 141779. 1203931. 3422 13023 83764 2846 4 9.52e5
## 6 2015 Asia 130611. 1339516. 7304 11537 61359 5935 5 7.10e5
## # … with 10 more variables: norm_Papers <dbl>, norm_Paper_Citations <dbl>,
## # norm_Articles <dbl>, norm_Patents <dbl>, norm_Citations <dbl>,
## # norm_AI_Projects <dbl>, norm_Projects <dbl>, norm_Funding <dbl>,
## # norm_Paper_Citations_year <dbl>, norm_cpp <dbl>, and abbreviated variable
## # names ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
ggplot(total_aggregated,
aes(x = Year, y = norm_cpp, color = Region, group = Region)) +
geom_line(size = 1) +
labs(title = "Number of Annual Citations per Scientific Paper\n from 2010-2022 by Region",
x = "Year", y = "Number of Annual Citations\nper Scientific paper") +
scale_color_manual(values = c("Europe" = "black", "North America" = "grey40",
"South America" = "grey60", "Oceania" = "grey80",
"Asia" = "grey20")) +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_x_continuous(breaks = seq(2010, 2022, by = 2), limits = c(2010, 2022)) +
geom_point(aes(shape = Region, col = Region), size = 3) +
scale_y_continuous(labels = scales::comma)
