PKP1

job <- read.csv("indicators_econ/Adzuna/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".") 
job <- rename(job, Postings = X..Job.postings, Month = YearMonth)
job$Date <- as.Date(paste(job$Month, "-01", sep = ""), format = "%Y-%m-%d")
job <- job[job$Date >= as.Date("2010-01-01"), ]
job$Month <- NULL

project_fund <- read.csv("indicators_econ/Cordis/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".") 
project_fund <- rename(project_fund, Month = YearMonth, Projects = X..Projects)
project_fund$Date <- as.Date(paste(project_fund$Month, "-01", sep = ""), format = "%Y-%m-%d")
project_fund <- project_fund[project_fund$Date >= as.Date("2010-01-01"), ]
project_fund$Month <- NULL

project_soft <- read.csv("indicators_econ/Github/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
project_soft <- rename(project_soft, Month = YearMonth, AI_Projects = X..AI.software.projects,
                       AI_Projects_norm = X..AI.software.projects..normalized.)
project_soft$Date <- as.Date(paste(project_soft$Month, "-01", sep = ""), format = "%Y-%m-%d")
project_soft <- project_soft[project_soft$Date >= as.Date("2010-01-01"), ]
project_soft$Month <- NULL

article <- read.csv("indicators_econ/News/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
article <- rename(article, Month = YearMonth, Articles = X..Articles)
article$Date <- as.Date(paste(article$Month, "-01", sep = ""), format = "%Y-%m-%d")
article <- article[article$Date >= as.Date("2010-01-01"), ]
article$Month <- NULL

article_topic <- read.csv("indicators_econ/News/CountryTopicTimeSeries.csv", 
                            header=TRUE, sep=",", dec=".")
article_topic <- rename(article_topic, Month = YearMonth, Articles = X..Articles, Topic = concept)
article_topic$Date <- as.Date(paste(article_topic$Month, "-01", sep = ""), format = "%Y-%m-%d")
article_topic <- article_topic[article_topic$Date >= as.Date("2010-01-01"), ]
article_topic$Month <- NULL

patent <- read.csv("indicators_econ/Patents/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
patent <- rename(patent, Month = YearMonth, Patents = X..Patents, Patents_norm = X..Patents..normalized.,
                 Citations = X..Citations, Citations_norm = X..Citations..normalized.)
patent$Date <- as.Date(paste(patent$Month, "-01", sep = ""), format = "%Y-%m-%d")
patent <- patent[patent$Date >= as.Date("2010-01-01"), ]
patent$Month <- NULL

patent_topic <- read.csv("indicators_econ/Patents/CountryTopicTimeSeries.csv", 
                           header=TRUE, sep=",", dec=".")
patent_topic <- rename(patent_topic, Month = YearMonth, Patents = X..Patents, 
                       Patents_norm = X..Patents..normalized., Citations = X..Citations,
                       Citations_norm = X..Citations..normalized.)
patent_topic$Date <- as.Date(paste(patent_topic$Month, "-01", sep = ""), format = "%Y-%m-%d")
patent_topic <- patent_topic[patent_topic$Date >= as.Date("2010-01-01"), ]
patent_topic$Month <- NULL

research <- read.csv("indicators_econ/Research/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
research <- rename(research, Month = YearMonth, Papers = X..Papers, Papers_norm = X..Papers..normalized.,
                   Paper_Citations = X..Citations, Paper_Citations_norm = X..Citations..normalized.)
research$Date <- as.Date(paste(research$Month, "-01", sep = ""), format = "%Y-%m-%d")
research <- research[research$Date >= as.Date("2010-01-01"), ]
research$Month <- NULL

research_topic <- read.csv("indicators_econ/Research/CountryTopicTimeSeries.csv", 
                           header=TRUE, sep=",", dec=".")
research_topic <- rename(research_topic, Month = YearMonth, Papers = X..Papers,
                         Papers_norm = X..Papers..normalized., Paper_Citations = X..Citations,
                         Paper_Citations_norm = X..Citations..normalized.)
research_topic$Date <- as.Date(paste(research_topic$Month, "-01", sep = ""), format = "%Y-%m-%d")
research_topic <- research_topic[research_topic$Date >= as.Date("2010-01-01"), ]
research_topic$Month <- NULL

population <- read.csv("~/Desktop/PKP/Population.csv", header = TRUE, sep = ";", dec = ".")
population <- rename(population, Country = Country.Code)

# Define a function to merge datasets by "Country" and "Date" with an outer join
merge_datasets <- function(datasets) {
  merged_dataset <- datasets[[1]]
  
  for (i in 2:length(datasets)) {
    merged_dataset <- merge(merged_dataset, datasets[[i]], by = c("Country", "Date"), all = TRUE)
  }
  
  return(merged_dataset)
}

merge_topic_datasets <- function(datasets) {
  merged_dataset <- datasets[[1]]
  
  for (i in 2:length(datasets)) {
    merged_dataset <- merge(merged_dataset, datasets[[i]], by = c("Country", "Date", "Topic"), all = TRUE)
  }
  
  return(merged_dataset)
}

# Assuming you have datasets named research, article, patent, project_soft, and project_fund
all_datasets <- list(research, article, patent, project_soft, project_fund)
merged <- merge_datasets(all_datasets)

all_datasets_topic <- list(research_topic, article_topic, patent_topic)
merged_topic <- merge_topic_datasets(all_datasets_topic)

tail(merged)

##       Country       Date Papers Papers_norm Paper_Citations
## 27006     ZWE 2022-07-01     21   11.450000               4
## 27007     ZWE 2022-08-01     11    6.861111              14
## 27008     ZWE 2022-09-01      9    4.466667              21
## 27009     ZWE 2022-10-01      9    5.904762               5
## 27010     ZWE 2022-11-01     28   20.938636              12
## 27011     ZWE 2022-12-01      7    3.433333               3
##       Paper_Citations_norm Articles Patents Patents_norm Citations
## 27006            2.2000000       34      NA           NA        NA
## 27007            6.3888889       10      NA           NA        NA
## 27008           16.5666667       16      NA           NA        NA
## 27009            3.3333333       21      NA           NA        NA
## 27010            4.0075758       20      NA           NA        NA
## 27011            0.7666667       27      NA           NA        NA
##       Citations_norm AI_Projects AI_Projects_norm Projects Funding
## 27006             NA          18             17.5       NA      NA
## 27007             NA           7              7.0       NA      NA
## 27008             NA           8              8.0       NA      NA
## 27009             NA          11             11.0       NA      NA
## 27010             NA          11             11.0       NA      NA
## 27011             NA           7              7.0       NA      NA

tail(merged_topic)

##        Country       Date                   Topic Papers Papers_norm
## 176035     ZWE 2022-12-01             Agriculture     NA          NA
## 176036     ZWE 2022-12-01 Artificial intelligence      4    1.100000
## 176037     ZWE 2022-12-01      Internet of things     NA          NA
## 176038     ZWE 2022-12-01        Machine learning      3    2.333333
## 176039     ZWE 2022-12-01                Medicine      1    0.100000
## 176040     ZWE 2022-12-01                Robotics     NA          NA
##        Paper_Citations Paper_Citations_norm Articles Patents Patents_norm
## 176035              NA                   NA        6      NA           NA
## 176036               1            0.1000000       16      NA           NA
## 176037              NA                   NA        2      NA           NA
## 176038               2            0.6666667        1      NA           NA
## 176039               1            0.1000000       NA      NA           NA
## 176040              NA                   NA        1      NA           NA
##        Citations Citations_norm
## 176035        NA             NA
## 176036        NA             NA
## 176037        NA             NA
## 176038        NA             NA
## 176039        NA             NA
## 176040        NA             NA

countries_EU <- c("BEL", "BGR", "CZE", "DNK", "DEU", "EST", "IRL", "GRC", "ESP",
                  "FRA", "HRV", "ITA", "CYP", "LTU", "LUX", "LVA", "POL", "HUN",
                  "NLD", "MLT", "AUT", "SVN", "SVK", "ROU", "PRT", "FIN", "SWE", 
                  "GBR", "CHE", "NOR", "ISL", "LIE", "ALB", "TUR", "SRB", "MNE", "MKD")

countries_US <- c("USA", "CAN")

countries_ANZ <- c("AUS", "NZL")

countries_JUS <- c("ARG", "BRA", "CHL", "MEX", "PER", "URY", "COL")

countries_ASIA <- c("CHN", "JPN", "KOR", "IND", "SGP", "VNM", "MYS", "RUS", "TWN")

population_EU <- population %>%
  filter(Country %in% countries_EU)%>%
  group_by(Year) %>%
  summarize(
    Country = "EU",
    Population = sum(Population)
  ) %>%
  ungroup()

merged_EU <- merged %>%
  filter(Country %in% countries_EU)%>%
  group_by(Date) %>%
  summarize(
    Country = "EU",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE),
    AI_Projects =sum(AI_Projects, na.rm = TRUE),
    Projects = sum(Projects, na.rm = TRUE),
    Funding = sum(Funding, na.rm = TRUE)
  ) %>%
  ungroup()

head(merged_EU)

## # A tibble: 6 × 10
##   Date       Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##   <date>     <chr>    <dbl>        <dbl>   <int>   <int>   <int>   <int>   <int>
## 1 2010-01-01 EU      26053.      325320.       0     616    6222      25     135
## 2 2010-02-01 EU       3181.      121857.       0     693    6488      24      18
## 3 2010-03-01 EU       5233.      148914.       0     880    9238      12      34
## 4 2010-04-01 EU       4332.      122116.       0     786    7347      19      32
## 5 2010-05-01 EU       5714.      140800.       0     718    7811       8      10
## 6 2010-06-01 EU       6998.      153567.       0     911    8024      33      30
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Test agregacije podatkov s temami
merged_topic_EU <- merged_topic %>%
  filter(Country %in% countries_EU) %>%
  group_by(Date, Topic) %>%
  summarize(
    Country = "EU",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE)
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.

head(merged_topic_EU, 12)

## # A tibble: 12 × 8
##    Date       Topic              Country  Papers Paper…¹ Artic…² Patents Citat…³
##    <date>     <chr>              <chr>     <dbl>   <dbl>   <int>   <int>   <int>
##  1 2010-01-01 Agriculture        EU      6.45e+1  3.12e2       0       0       0
##  2 2010-01-01 Artificial intell… EU      2.36e+4  2.86e5       0     596    5987
##  3 2010-01-01 Automotive indust… EU      6.82e+1  8.15e2       0       0       0
##  4 2010-01-01 Computer vision    EU      3.11e+3  5.11e4       0     332    3306
##  5 2010-01-01 Food industry      EU      3.00e+0  6   e0       0       0       0
##  6 2010-01-01 Higher education   EU      4.65e+1  4.96e2       0       0       0
##  7 2010-01-01 Industry 4.0       EU      5   e-1  0            0       0       0
##  8 2010-01-01 Internet of Things EU      1.97e+1  2.33e2       0       0       0
##  9 2010-01-01 Machine learning   EU      5.11e+3  8.10e4       0      57     590
## 10 2010-01-01 Manufacturing      EU      5.33e+0  8.33e0       0       0       0
## 11 2010-01-01 Medicine           EU      2.56e+3  3.76e4       0     100    1172
## 12 2010-01-01 Natural language … EU      1.41e+3  1.03e4       0       4      67
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations

n_of_people <- 1000000
year_divisors_EU <- list(
  list(year = "2010", divisor = as.double(population_EU[1,3]/n_of_people)),
  list(year = "2011", divisor = as.double(population_EU[2,3]/n_of_people)),
  list(year = "2012", divisor = as.double(population_EU[3,3]/n_of_people)),
  list(year = "2013", divisor = as.double(population_EU[4,3]/n_of_people)),
  list(year = "2014", divisor = as.double(population_EU[5,3]/n_of_people)),
  list(year = "2015", divisor = as.double(population_EU[6,3]/n_of_people)),
  list(year = "2016", divisor = as.double(population_EU[7,3]/n_of_people)),
  list(year = "2017", divisor = as.double(population_EU[8,3]/n_of_people)),
  list(year = "2018", divisor = as.double(population_EU[9,3]/n_of_people)),
  list(year = "2019", divisor = as.double(population_EU[10,3]/n_of_people)),
  list(year = "2020", divisor = as.double(population_EU[11,3]/n_of_people)),
  list(year = "2021", divisor = as.double(population_EU[12,3]/n_of_people)),
  list(year = "2022", divisor = as.double(population_EU[13,3]/n_of_people))
)

# Initialize new columns for the divided values
merged_EU$norm_Papers <- NA
merged_EU$norm_Paper_Citations <- NA
merged_EU$norm_Articles <- NA
merged_EU$norm_Patents <- NA
merged_EU$norm_Citations <- NA
merged_EU$norm_AI_Projects <- NA
merged_EU$norm_Projects <- NA
merged_EU$norm_Funding <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_EU) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_EU %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_EU$norm_Papers[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_EU$norm_Paper_Citations[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_EU$norm_Articles[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_EU$norm_Patents[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_EU$norm_Citations[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor
  
  merged_EU$norm_AI_Projects[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$AI_Projects / divisor
  
  merged_EU$norm_Projects[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Projects / divisor
   
  merged_EU$norm_Funding[merged_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Funding / divisor
}

head(merged_EU, 16)

## # A tibble: 16 × 18
##    Date       Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##    <date>     <chr>    <dbl>       <dbl>   <int>   <int>   <int>   <int>   <int>
##  1 2010-01-01 EU      26053.     325320.       0     616    6222      25     135
##  2 2010-02-01 EU       3181.     121857.       0     693    6488      24      18
##  3 2010-03-01 EU       5233.     148914.       0     880    9238      12      34
##  4 2010-04-01 EU       4332.     122116.       0     786    7347      19      32
##  5 2010-05-01 EU       5714.     140800.       0     718    7811       8      10
##  6 2010-06-01 EU       6998.     153567.       0     911    8024      33      30
##  7 2010-07-01 EU       6352.     135912.       0     759    6797      29       9
##  8 2010-08-01 EU       6009.     129799.       0     673    6097      57       5
##  9 2010-09-01 EU       6887.     138418.       0     804    7388      44      48
## 10 2010-10-01 EU       5853.     132084.       0     823    7208      26      26
## 11 2010-11-01 EU       4642.     115096.       0     826    8757      39      25
## 12 2010-12-01 EU       5099.     119993.       0     992    9489      41      11
## 13 2011-01-01 EU      27038.     314411.       0     615    6211      51      47
## 14 2011-02-01 EU       3463.     113495.       0     614    5661      79      52
## 15 2011-03-01 EU       4763.     168440.       0     867    8522      64      43
## 16 2011-04-01 EU       4703.     120632.       0     682    6660     128      19
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## #   norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## #   norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## #   norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## #   ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Initialize new columns for the divided values
merged_topic_EU$norm_Papers <- NA
merged_topic_EU$norm_Paper_Citations <- NA
merged_topic_EU$norm_Articles <- NA
merged_topic_EU$norm_Patents <- NA
merged_topic_EU$norm_Citations <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_EU) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_topic_EU %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_topic_EU$norm_Papers[merged_topic_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_topic_EU$norm_Paper_Citations[merged_topic_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_topic_EU$norm_Articles[merged_topic_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_topic_EU$norm_Patents[merged_topic_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_topic_EU$norm_Citations[merged_topic_EU$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor

}

head(merged_topic_EU, 16)

## # A tibble: 16 × 13
##    Date       Topic      Country  Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
##    <date>     <chr>      <chr>     <dbl>   <dbl>   <int>   <int>   <int>   <dbl>
##  1 2010-01-01 Agricultu… EU      6.45e+1  3.12e2       0       0       0 1.08e-1
##  2 2010-01-01 Artificia… EU      2.36e+4  2.86e5       0     596    5987 3.96e+1
##  3 2010-01-01 Automotiv… EU      6.82e+1  8.15e2       0       0       0 1.14e-1
##  4 2010-01-01 Computer … EU      3.11e+3  5.11e4       0     332    3306 5.22e+0
##  5 2010-01-01 Food indu… EU      3.00e+0  6   e0       0       0       0 5.04e-3
##  6 2010-01-01 Higher ed… EU      4.65e+1  4.96e2       0       0       0 7.81e-2
##  7 2010-01-01 Industry … EU      5   e-1  0            0       0       0 8.39e-4
##  8 2010-01-01 Internet … EU      1.97e+1  2.33e2       0       0       0 3.30e-2
##  9 2010-01-01 Machine l… EU      5.11e+3  8.10e4       0      57     590 8.58e+0
## 10 2010-01-01 Manufactu… EU      5.33e+0  8.33e0       0       0       0 8.95e-3
## 11 2010-01-01 Medicine   EU      2.56e+3  3.76e4       0     100    1172 4.30e+0
## 12 2010-01-01 Natural l… EU      1.41e+3  1.03e4       0       4      67 2.37e+0
## 13 2010-01-01 Pharmacol… EU      3.01e+1  3.93e2       0       0       0 5.05e-2
## 14 2010-01-01 Robot      EU      1.10e+3  9.05e3       0      10     238 1.84e+0
## 15 2010-01-01 Robotics   EU      1.80e+2  1.54e3       0       1      37 3.02e-1
## 16 2010-02-01 Agricultu… EU      5.50e+0  2.22e2       0       0       0 9.23e-3
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## #   norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers

population_US <- population %>%
  filter(Country %in% countries_US)%>%
  group_by(Year) %>%
  summarize(
    Country = "US",
    Population = sum(Population)
  ) %>%
  ungroup()

merged_US <- merged %>%
  filter(Country %in% countries_US)%>%
  group_by(Date) %>%
  summarize(
    Country = "US",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE),
    AI_Projects =sum(AI_Projects, na.rm = TRUE),
    Projects = sum(Projects, na.rm = TRUE),
    Funding = sum(Funding, na.rm = TRUE)
  ) %>%
  ungroup()

head(merged_US)

## # A tibble: 6 × 10
##   Date       Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##   <date>     <chr>    <dbl>        <dbl>   <int>   <int>   <int>   <int>   <int>
## 1 2010-01-01 US      17698.      334861.       0     914   24793      12       0
## 2 2010-02-01 US       2751.      120632.       0     888   20785      10       0
## 3 2010-03-01 US       7005.      177088.       0    1218   26044      16       0
## 4 2010-04-01 US       4479.      134279.       0    1197   26978      27       0
## 5 2010-05-01 US       4180.      138240.       0    1053   26349      16       0
## 6 2010-06-01 US       6050.      208470.       0    1410   30543      12       1
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Test agregacije podatkov s temami
merged_topic_US <- merged_topic %>%
  filter(Country %in% countries_US) %>%
  group_by(Date, Topic) %>%
  summarize(
    Country = "US",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE)
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.

head(merged_topic_US, 12)

## # A tibble: 12 × 8
##    Date       Topic              Country  Papers Paper…¹ Artic…² Patents Citat…³
##    <date>     <chr>              <chr>     <dbl>   <dbl>   <int>   <int>   <int>
##  1 2010-01-01 Agriculture        US      5.83e+1  1.07e3       0       0       0
##  2 2010-01-01 Artificial intell… US      1.55e+4  2.92e5       0     885   23923
##  3 2010-01-01 Automotive indust… US      2.06e+1  1.41e3       0       2      70
##  4 2010-01-01 Computer vision    US      2.14e+3  3.12e4       0     466   11619
##  5 2010-01-01 Food industry      US      5   e+0  6   e0       0       0       0
##  6 2010-01-01 Higher education   US      5.78e+1  5.96e2       0       0       0
##  7 2010-01-01 Industry 4.0       US      5   e-1  0            0       0       0
##  8 2010-01-01 Internet of Things US      9   e+0  7.43e1       0       0       0
##  9 2010-01-01 Machine learning   US      3.91e+3  8.45e4       0     149    3829
## 10 2010-01-01 Manufacturing      US      6.67e-1  6.67e0       0       0       0
## 11 2010-01-01 Medicine           US      2.35e+3  4.70e4       0     110    6364
## 12 2010-01-01 Natural language … US      6.56e+2  1.32e4       0      21     391
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations

year_divisors_US <- list(
  list(year = "2010", divisor = as.double(population_US[1,3]/n_of_people)),
  list(year = "2011", divisor = as.double(population_US[2,3]/n_of_people)),
  list(year = "2012", divisor = as.double(population_US[3,3]/n_of_people)),
  list(year = "2013", divisor = as.double(population_US[4,3]/n_of_people)),
  list(year = "2014", divisor = as.double(population_US[5,3]/n_of_people)),
  list(year = "2015", divisor = as.double(population_US[6,3]/n_of_people)),
  list(year = "2016", divisor = as.double(population_US[7,3]/n_of_people)),
  list(year = "2017", divisor = as.double(population_US[8,3]/n_of_people)),
  list(year = "2018", divisor = as.double(population_US[9,3]/n_of_people)),
  list(year = "2019", divisor = as.double(population_US[10,3]/n_of_people)),
  list(year = "2020", divisor = as.double(population_US[11,3]/n_of_people)),
  list(year = "2021", divisor = as.double(population_US[12,3]/n_of_people)),
  list(year = "2022", divisor = as.double(population_US[13,3]/n_of_people))
)

# Initialize new columns for the divided values
merged_US$norm_Papers <- NA
merged_US$norm_Paper_Citations <- NA
merged_US$norm_Articles <- NA
merged_US$norm_Patents <- NA
merged_US$norm_Citations <- NA
merged_US$norm_AI_Projects <- NA
merged_US$norm_Projects <- NA
merged_US$norm_Funding <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_US) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_US %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_US$norm_Papers[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_US$norm_Paper_Citations[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_US$norm_Articles[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_US$norm_Patents[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_US$norm_Citations[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor
  
  merged_US$norm_AI_Projects[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$AI_Projects / divisor
  
  merged_US$norm_Projects[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$Projects / divisor
   
  merged_US$norm_Funding[merged_US$Date %in% selected_rows$Date] <- 
    selected_rows$Funding / divisor
}

head(merged_US, 16)

## # A tibble: 16 × 18
##    Date       Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##    <date>     <chr>    <dbl>       <dbl>   <int>   <int>   <int>   <int>   <int>
##  1 2010-01-01 US      17698.     334861.       0     914   24793      12       0
##  2 2010-02-01 US       2751.     120632.       0     888   20785      10       0
##  3 2010-03-01 US       7005.     177088.       0    1218   26044      16       0
##  4 2010-04-01 US       4479.     134279.       0    1197   26978      27       0
##  5 2010-05-01 US       4180.     138240.       0    1053   26349      16       0
##  6 2010-06-01 US       6050.     208470.       0    1410   30543      12       1
##  7 2010-07-01 US       4646.     135005.       0    1042   21101       9       0
##  8 2010-08-01 US       4741.     144007.       0    1041   19635      19       0
##  9 2010-09-01 US       4572.     143566.       0    1218   24876      28       1
## 10 2010-10-01 US       4515.     142516.       0    1219   24185      10       0
## 11 2010-11-01 US       3777.     123581.       0    1222   22517      16       0
## 12 2010-12-01 US       4375.     132346.       0    1657   33387      15       0
## 13 2011-01-01 US      17516.     277008.       0     946   19016      32       2
## 14 2011-02-01 US       3098.     116251.       0     982   20191      21       2
## 15 2011-03-01 US       4871.     149175.       0    1365   26053      40       0
## 16 2011-04-01 US       3923.     117758.       0    1153   22868      55       0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## #   norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## #   norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## #   norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## #   ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Initialize new columns for the divided values
merged_topic_US$norm_Papers <- NA
merged_topic_US$norm_Paper_Citations <- NA
merged_topic_US$norm_Articles <- NA
merged_topic_US$norm_Patents <- NA
merged_topic_US$norm_Citations <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_US) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_topic_US %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_topic_US$norm_Papers[merged_topic_US$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_topic_US$norm_Paper_Citations[merged_topic_US$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_topic_US$norm_Articles[merged_topic_US$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_topic_US$norm_Patents[merged_topic_US$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_topic_US$norm_Citations[merged_topic_US$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor

}

head(merged_topic_US, 16)

## # A tibble: 16 × 13
##    Date       Topic      Country  Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
##    <date>     <chr>      <chr>     <dbl>   <dbl>   <int>   <int>   <int>   <dbl>
##  1 2010-01-01 Agricultu… US      5.83e+1  1.07e3       0       0       0 1.70e-1
##  2 2010-01-01 Artificia… US      1.55e+4  2.92e5       0     885   23923 4.52e+1
##  3 2010-01-01 Automotiv… US      2.06e+1  1.41e3       0       2      70 6.00e-2
##  4 2010-01-01 Computer … US      2.14e+3  3.12e4       0     466   11619 6.22e+0
##  5 2010-01-01 Food indu… US      5   e+0  6   e0       0       0       0 1.46e-2
##  6 2010-01-01 Higher ed… US      5.78e+1  5.96e2       0       0       0 1.68e-1
##  7 2010-01-01 Industry … US      5   e-1  0            0       0       0 1.46e-3
##  8 2010-01-01 Internet … US      9   e+0  7.43e1       0       0       0 2.62e-2
##  9 2010-01-01 Machine l… US      3.91e+3  8.45e4       0     149    3829 1.14e+1
## 10 2010-01-01 Manufactu… US      6.67e-1  6.67e0       0       0       0 1.94e-3
## 11 2010-01-01 Medicine   US      2.35e+3  4.70e4       0     110    6364 6.86e+0
## 12 2010-01-01 Natural l… US      6.56e+2  1.32e4       0      21     391 1.91e+0
## 13 2010-01-01 Pharmacol… US      3.45e+1  5.43e2       0       0       0 1.00e-1
## 14 2010-01-01 Robot      US      6.31e+2  6.01e3       0      25    4488 1.84e+0
## 15 2010-01-01 Robotics   US      1.05e+2  6.69e2       0       0       0 3.05e-1
## 16 2010-02-01 Agricultu… US      9.17e+0  3.08e2       0       0       0 2.67e-2
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## #   norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers

population_JUS <- population %>%
  filter(Country %in% countries_JUS)%>%
  group_by(Year) %>%
  summarize(
    Country = "JUS",
    Population = sum(Population)
  ) %>%
  ungroup()

merged_JUS <- merged %>%
  filter(Country %in% countries_JUS)%>%
  group_by(Date) %>%
  summarize(
    Country = "JUS",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE),
    AI_Projects =sum(AI_Projects, na.rm = TRUE),
    Projects = sum(Projects, na.rm = TRUE),
    Funding = sum(Funding, na.rm = TRUE)
  ) %>%
  ungroup()

head(merged_JUS)

## # A tibble: 6 × 10
##   Date       Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##   <date>     <chr>    <dbl>        <dbl>   <int>   <int>   <int>   <int>   <int>
## 1 2010-01-01 JUS      1730.       11720.       0       0       0       0       0
## 2 2010-02-01 JUS       230.        4881.       0       2      19       4       0
## 3 2010-03-01 JUS       426.        5859.       0       3      15       4       0
## 4 2010-04-01 JUS       317.        4639.       0       1       2       2       2
## 5 2010-05-01 JUS       362.        4944.       0       4      30       1       0
## 6 2010-06-01 JUS       603.        6204.       0       0       0       2       0
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Test agregacije podatkov s temami
merged_topic_JUS <- merged_topic %>%
  filter(Country %in% countries_JUS) %>%
  group_by(Date, Topic) %>%
  summarize(
    Country = "JUS",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE)
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.

head(merged_topic_JUS, 12)

## # A tibble: 12 × 8
##    Date       Topic               Country Papers Paper…¹ Artic…² Patents Citat…³
##    <date>     <chr>               <chr>    <dbl>   <dbl>   <int>   <int>   <int>
##  1 2010-01-01 Agriculture         JUS     2.2 e0  8   e0       0       0       0
##  2 2010-01-01 Artificial intelli… JUS     1.58e3  1.06e4       0       0       0
##  3 2010-01-01 Automotive industry JUS     3   e0  4   e0       0       0       0
##  4 2010-01-01 Computer vision     JUS     1.92e2  1.17e3       0       0       0
##  5 2010-01-01 Higher education    JUS     4.17e0  3.33e0       0       0       0
##  6 2010-01-01 Internet of Things  JUS     1   e0  7   e0       0       0       0
##  7 2010-01-01 Machine learning    JUS     3.74e2  2.73e3       0       0       0
##  8 2010-01-01 Manufacturing       JUS     2   e0  1.6 e1       0       0       0
##  9 2010-01-01 Medicine            JUS     1.41e2  1.82e3       0       0       0
## 10 2010-01-01 Natural language p… JUS     8.75e1  3.73e2       0       0       0
## 11 2010-01-01 Pharmacology        JUS     1.4 e0  1.52e1       0       0       0
## 12 2010-01-01 Robot               JUS     9.11e1  5.61e2       0       0       0
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations

year_divisors_JUS <- list(
  list(year = "2010", divisor = as.double(population_JUS[1,3]/n_of_people)),
  list(year = "2011", divisor = as.double(population_JUS[2,3]/n_of_people)),
  list(year = "2012", divisor = as.double(population_JUS[3,3]/n_of_people)),
  list(year = "2013", divisor = as.double(population_JUS[4,3]/n_of_people)),
  list(year = "2014", divisor = as.double(population_JUS[5,3]/n_of_people)),
  list(year = "2015", divisor = as.double(population_JUS[6,3]/n_of_people)),
  list(year = "2016", divisor = as.double(population_JUS[7,3]/n_of_people)),
  list(year = "2017", divisor = as.double(population_JUS[8,3]/n_of_people)),
  list(year = "2018", divisor = as.double(population_JUS[9,3]/n_of_people)),
  list(year = "2019", divisor = as.double(population_JUS[10,3]/n_of_people)),
  list(year = "2020", divisor = as.double(population_JUS[11,3]/n_of_people)),
  list(year = "2021", divisor = as.double(population_JUS[12,3]/n_of_people)),
  list(year = "2022", divisor = as.double(population_JUS[13,3]/n_of_people))
)

# Initialize new columns for the divided values
merged_JUS$norm_Papers <- NA
merged_JUS$norm_Paper_Citations <- NA
merged_JUS$norm_Articles <- NA
merged_JUS$norm_Patents <- NA
merged_JUS$norm_Citations <- NA
merged_JUS$norm_AI_Projects <- NA
merged_JUS$norm_Projects <- NA
merged_JUS$norm_Funding <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_JUS) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_JUS %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_JUS$norm_Papers[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_JUS$norm_Paper_Citations[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_JUS$norm_Articles[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_JUS$norm_Patents[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_JUS$norm_Citations[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor
  
  merged_JUS$norm_AI_Projects[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$AI_Projects / divisor
  
  merged_JUS$norm_Projects[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Projects / divisor
   
  merged_JUS$norm_Funding[merged_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Funding / divisor
}

head(merged_JUS, 16)

## # A tibble: 16 × 18
##    Date       Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##    <date>     <chr>    <dbl>       <dbl>   <int>   <int>   <int>   <int>   <int>
##  1 2010-01-01 JUS      1730.      11720.       0       0       0       0       0
##  2 2010-02-01 JUS       230.       4881.       0       2      19       4       0
##  3 2010-03-01 JUS       426.       5859.       0       3      15       4       0
##  4 2010-04-01 JUS       317.       4639.       0       1       2       2       2
##  5 2010-05-01 JUS       362.       4944.       0       4      30       1       0
##  6 2010-06-01 JUS       603.       6204.       0       0       0       2       0
##  7 2010-07-01 JUS       511.       6971.       0       0       0       1       0
##  8 2010-08-01 JUS       534.       6112.       0       1       0       7       0
##  9 2010-09-01 JUS       633.       5577.       0       2      34       4       0
## 10 2010-10-01 JUS       461.       5600.       0       2      10       1       0
## 11 2010-11-01 JUS       448.       4417.       0       3      66       6       0
## 12 2010-12-01 JUS       565.       5947.       0       5     153       0       0
## 13 2011-01-01 JUS      1949.      11435.       0       0       0       7       0
## 14 2011-02-01 JUS       273.       5200.       0       1      15       6       0
## 15 2011-03-01 JUS       435.       6407.       0       3      20       2       0
## 16 2011-04-01 JUS       386.       5346.       0       1      42      10       0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## #   norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## #   norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## #   norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## #   ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Initialize new columns for the divided values
merged_topic_JUS$norm_Papers <- NA
merged_topic_JUS$norm_Paper_Citations <- NA
merged_topic_JUS$norm_Articles <- NA
merged_topic_JUS$norm_Patents <- NA
merged_topic_JUS$norm_Citations <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_JUS) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_topic_JUS %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_topic_JUS$norm_Papers[merged_topic_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_topic_JUS$norm_Paper_Citations[merged_topic_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_topic_JUS$norm_Articles[merged_topic_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_topic_JUS$norm_Patents[merged_topic_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_topic_JUS$norm_Citations[merged_topic_JUS$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor

}

head(merged_topic_JUS, 16)

## # A tibble: 16 × 13
##    Date       Topic       Country Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
##    <date>     <chr>       <chr>    <dbl>   <dbl>   <int>   <int>   <int>   <dbl>
##  1 2010-01-01 Agriculture JUS     2.2 e0  8   e0       0       0       0 0.00495
##  2 2010-01-01 Artificial… JUS     1.58e3  1.06e4       0       0       0 3.55   
##  3 2010-01-01 Automotive… JUS     3   e0  4   e0       0       0       0 0.00676
##  4 2010-01-01 Computer v… JUS     1.92e2  1.17e3       0       0       0 0.433  
##  5 2010-01-01 Higher edu… JUS     4.17e0  3.33e0       0       0       0 0.00938
##  6 2010-01-01 Internet o… JUS     1   e0  7   e0       0       0       0 0.00225
##  7 2010-01-01 Machine le… JUS     3.74e2  2.73e3       0       0       0 0.843  
##  8 2010-01-01 Manufactur… JUS     2   e0  1.6 e1       0       0       0 0.00450
##  9 2010-01-01 Medicine    JUS     1.41e2  1.82e3       0       0       0 0.319  
## 10 2010-01-01 Natural la… JUS     8.75e1  3.73e2       0       0       0 0.197  
## 11 2010-01-01 Pharmacolo… JUS     1.4 e0  1.52e1       0       0       0 0.00315
## 12 2010-01-01 Robot       JUS     9.11e1  5.61e2       0       0       0 0.205  
## 13 2010-01-01 Robotics    JUS     1.36e1  2.20e2       0       0       0 0.0306 
## 14 2010-02-01 Artificial… JUS     2.04e2  4.29e3       0       2      19 0.460  
## 15 2010-02-01 Automotive… JUS     1   e0  1   e1       0       0       0 0.00225
## 16 2010-02-01 Computer v… JUS     2.58e1  5.43e2       0       0       0 0.0581 
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## #   norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers

population_ANZ <- population %>%
  filter(Country %in% countries_ANZ)%>%
  group_by(Year) %>%
  summarize(
    Country = "ANZ",
    Population = sum(Population)
  ) %>%
  ungroup()

merged_ANZ <- merged %>%
  filter(Country %in% countries_ANZ)%>%
  group_by(Date) %>%
  summarize(
    Country = "ANZ",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE),
    AI_Projects =sum(AI_Projects, na.rm = TRUE),
    Projects = sum(Projects, na.rm = TRUE),
    Funding = sum(Funding, na.rm = TRUE)
  ) %>%
  ungroup()

head(merged_ANZ)

## # A tibble: 6 × 10
##   Date       Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##   <date>     <chr>    <dbl>        <dbl>   <int>   <int>   <int>   <int>   <int>
## 1 2010-01-01 ANZ      2148.       31917.       0       7      63       1       0
## 2 2010-02-01 ANZ       287.       10542.       0      17     149       0       0
## 3 2010-03-01 ANZ       424.       13055.       0      16     187       1       0
## 4 2010-04-01 ANZ       297.       10811.       0      25     216       0       0
## 5 2010-05-01 ANZ       435.       12453.       0      16     139       0       0
## 6 2010-06-01 ANZ       531.       12745.       0      14      76       2       0
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Test agregacije podatkov s temami
merged_topic_ANZ <- merged_topic %>%
  filter(Country %in% countries_ANZ) %>%
  group_by(Date, Topic) %>%
  summarize(
    Country = "ANZ",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE)
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.

head(merged_topic_ANZ, 12)

## # A tibble: 12 × 8
##    Date       Topic               Country Papers Paper…¹ Artic…² Patents Citat…³
##    <date>     <chr>               <chr>    <dbl>   <dbl>   <int>   <int>   <int>
##  1 2010-01-01 Agriculture         ANZ     1.21e1  2.44e2       0       0       0
##  2 2010-01-01 Artificial intelli… ANZ     1.85e3  2.83e4       0       7      63
##  3 2010-01-01 Automotive industry ANZ     2.25e0  8.5 e0       0       0       0
##  4 2010-01-01 Computer vision     ANZ     2.19e2  2.09e3       0       4      18
##  5 2010-01-01 Higher education    ANZ     2.23e1  2.23e2       0       0       0
##  6 2010-01-01 Internet of Things  ANZ     2   e0  5.2 e1       0       0       0
##  7 2010-01-01 Machine learning    ANZ     5.24e2  6.86e3       0       0       0
##  8 2010-01-01 Manufacturing       ANZ     1   e0  7   e0       0       0       0
##  9 2010-01-01 Medicine            ANZ     3.28e2  4.17e3       0       1       1
## 10 2010-01-01 Natural language p… ANZ     5.81e1  3.46e2       0       0       0
## 11 2010-01-01 Pharmacology        ANZ     1.33e0  5.33e0       0       0       0
## 12 2010-01-01 Robot               ANZ     7.97e1  6.15e2       0       1      14
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations

year_divisors_ANZ <- list(
  list(year = "2010", divisor = as.double(population_ANZ[1,3]/n_of_people)),
  list(year = "2011", divisor = as.double(population_ANZ[2,3]/n_of_people)),
  list(year = "2012", divisor = as.double(population_ANZ[3,3]/n_of_people)),
  list(year = "2013", divisor = as.double(population_ANZ[4,3]/n_of_people)),
  list(year = "2014", divisor = as.double(population_ANZ[5,3]/n_of_people)),
  list(year = "2015", divisor = as.double(population_ANZ[6,3]/n_of_people)),
  list(year = "2016", divisor = as.double(population_ANZ[7,3]/n_of_people)),
  list(year = "2017", divisor = as.double(population_ANZ[8,3]/n_of_people)),
  list(year = "2018", divisor = as.double(population_ANZ[9,3]/n_of_people)),
  list(year = "2019", divisor = as.double(population_ANZ[10,3]/n_of_people)),
  list(year = "2020", divisor = as.double(population_ANZ[11,3]/n_of_people)),
  list(year = "2021", divisor = as.double(population_ANZ[12,3]/n_of_people)),
  list(year = "2022", divisor = as.double(population_ANZ[13,3]/n_of_people))
)

# Initialize new columns for the divided values
merged_ANZ$norm_Papers <- NA
merged_ANZ$norm_Paper_Citations <- NA
merged_ANZ$norm_Articles <- NA
merged_ANZ$norm_Patents <- NA
merged_ANZ$norm_Citations <- NA
merged_ANZ$norm_AI_Projects <- NA
merged_ANZ$norm_Projects <- NA
merged_ANZ$norm_Funding <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_ANZ) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_ANZ %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_ANZ$norm_Papers[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_ANZ$norm_Paper_Citations[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_ANZ$norm_Articles[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_ANZ$norm_Patents[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_ANZ$norm_Citations[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor
  
  merged_ANZ$norm_AI_Projects[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$AI_Projects / divisor
  
  merged_ANZ$norm_Projects[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Projects / divisor
   
  merged_ANZ$norm_Funding[merged_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Funding / divisor
}

head(merged_ANZ, 16)

## # A tibble: 16 × 18
##    Date       Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##    <date>     <chr>    <dbl>       <dbl>   <int>   <int>   <int>   <int>   <int>
##  1 2010-01-01 ANZ      2148.      31917.       0       7      63       1       0
##  2 2010-02-01 ANZ       287.      10542.       0      17     149       0       0
##  3 2010-03-01 ANZ       424.      13055.       0      16     187       1       0
##  4 2010-04-01 ANZ       297.      10811.       0      25     216       0       0
##  5 2010-05-01 ANZ       435.      12453.       0      16     139       0       0
##  6 2010-06-01 ANZ       531.      12745.       0      14      76       2       0
##  7 2010-07-01 ANZ       590.      14253.       0      15     482       0       0
##  8 2010-08-01 ANZ       493.      12197.       0      24     221       3       0
##  9 2010-09-01 ANZ       480.      17239.       0      23     454       0       0
## 10 2010-10-01 ANZ       452.      12061.       0      18     176       0       0
## 11 2010-11-01 ANZ       459.      13410.       0      24     515       3       0
## 12 2010-12-01 ANZ       716.      15637.       0      26     343       2       0
## 13 2011-01-01 ANZ      2241.      27010.       0      13     136       2       0
## 14 2011-02-01 ANZ       316.      10394.       0      18     182       7       0
## 15 2011-03-01 ANZ       387.      10715.       0      33     287       4       0
## 16 2011-04-01 ANZ       355.      12623.       0      26     372      10       0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## #   norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## #   norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## #   norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## #   ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Initialize new columns for the divided values
merged_topic_ANZ$norm_Papers <- NA
merged_topic_ANZ$norm_Paper_Citations <- NA
merged_topic_ANZ$norm_Articles <- NA
merged_topic_ANZ$norm_Patents <- NA
merged_topic_ANZ$norm_Citations <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_ANZ) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_topic_ANZ %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_topic_ANZ$norm_Papers[merged_topic_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_topic_ANZ$norm_Paper_Citations[merged_topic_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_topic_ANZ$norm_Articles[merged_topic_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_topic_ANZ$norm_Patents[merged_topic_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_topic_ANZ$norm_Citations[merged_topic_ANZ$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor

}

head(merged_topic_ANZ, 16)

## # A tibble: 16 × 13
##    Date       Topic      Country  Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
##    <date>     <chr>      <chr>     <dbl>   <dbl>   <int>   <int>   <int>   <dbl>
##  1 2010-01-01 Agricultu… ANZ       12.1   2.44e2       0       0       0  0.459 
##  2 2010-01-01 Artificia… ANZ     1852.    2.83e4       0       7      63 70.2   
##  3 2010-01-01 Automotiv… ANZ        2.25  8.5 e0       0       0       0  0.0853
##  4 2010-01-01 Computer … ANZ      219.    2.09e3       0       4      18  8.32  
##  5 2010-01-01 Higher ed… ANZ       22.3   2.23e2       0       0       0  0.845 
##  6 2010-01-01 Internet … ANZ        2     5.2 e1       0       0       0  0.0758
##  7 2010-01-01 Machine l… ANZ      524.    6.86e3       0       0       0 19.9   
##  8 2010-01-01 Manufactu… ANZ        1     7   e0       0       0       0  0.0379
##  9 2010-01-01 Medicine   ANZ      328.    4.17e3       0       1       1 12.4   
## 10 2010-01-01 Natural l… ANZ       58.1   3.46e2       0       0       0  2.20  
## 11 2010-01-01 Pharmacol… ANZ        1.33  5.33e0       0       0       0  0.0505
## 12 2010-01-01 Robot      ANZ       79.7   6.15e2       0       1      14  3.02  
## 13 2010-01-01 Robotics   ANZ       19.8   2.19e2       0       0       0  0.749 
## 14 2010-02-01 Agricultu… ANZ        1     1.70e3       0       0       0  0.0379
## 15 2010-02-01 Artificia… ANZ      238.    8.85e3       0      16     148  9.02  
## 16 2010-02-01 Automotiv… ANZ        0.5   0            0       0       0  0.0190
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## #   norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers

population_ASIA <- population %>%
  filter(Country %in% countries_ASIA)%>%
  group_by(Year) %>%
  summarize(
    Country = "ASIA",
    Population = sum(Population)
  ) %>%
  ungroup()

merged_ASIA <- merged %>%
  filter(Country %in% countries_ASIA)%>%
  group_by(Date) %>%
  summarize(
    Country = "ASIA",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE),
    AI_Projects =sum(AI_Projects, na.rm = TRUE),
    Projects = sum(Projects, na.rm = TRUE),
    Funding = sum(Funding, na.rm = TRUE)
  ) %>%
  ungroup()

head(merged_ASIA)

## # A tibble: 6 × 10
##   Date       Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##   <date>     <chr>    <dbl>        <dbl>   <int>   <int>   <int>   <int>   <int>
## 1 2010-01-01 ASIA    76073.      156723.       0     781    9137       7       1
## 2 2010-02-01 ASIA     2631.       54533.       0     884   10547       6       0
## 3 2010-03-01 ASIA     4125.       66784.       0    1256   13469       7       1
## 4 2010-04-01 ASIA     3098.       67939.       0     774    9861       6       1
## 5 2010-05-01 ASIA     5763.       57968.       0     738    8848       5       0
## 6 2010-06-01 ASIA     8615.       87909.       0     985   10503      10       0
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Test agregacije podatkov s temami
merged_topic_ASIA <- merged_topic %>%
  filter(Country %in% countries_ASIA) %>%
  group_by(Date, Topic) %>%
  summarize(
    Country = "ASIA",
    Papers = sum(Papers_norm, na.rm = TRUE),
    Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
    Articles = sum(Articles, na.rm = TRUE),
    Patents = sum(Patents, na.rm = TRUE),
    Citations = sum(Citations, na.rm = TRUE)
  ) %>%
  ungroup()

## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.

head(merged_topic_ASIA, 12)

## # A tibble: 12 × 8
##    Date       Topic               Country Papers Paper…¹ Artic…² Patents Citat…³
##    <date>     <chr>               <chr>    <dbl>   <dbl>   <int>   <int>   <int>
##  1 2010-01-01 Agriculture         ASIA    4.41e2    272.       0       1       1
##  2 2010-01-01 Artificial intelli… ASIA    6.71e4 143606.       0     767    9065
##  3 2010-01-01 Automotive industry ASIA    1.32e2    186.       0       0       0
##  4 2010-01-01 Computer vision     ASIA    1.07e4  27900.       0     565    6779
##  5 2010-01-01 Food industry       ASIA    4   e0      0        0       0       0
##  6 2010-01-01 Higher education    ASIA    3.42e2    102.       0       0       0
##  7 2010-01-01 Internet of Things  ASIA    2.83e1    198        0       1      31
##  8 2010-01-01 Machine learning    ASIA    1.61e4  35751.       0      62     616
##  9 2010-01-01 Manufacturing       ASIA    5.6 e1     87        0       0       0
## 10 2010-01-01 Medicine            ASIA    5.81e3  16438.       0      82     920
## 11 2010-01-01 Natural language p… ASIA    2.06e3   3453.       0       9      85
## 12 2010-01-01 Pharmacology        ASIA    8.94e1    255.       0       1       2
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations

year_divisors_ASIA <- list(
  list(year = "2010", divisor = as.double(population_ASIA[1,3]/n_of_people)),
  list(year = "2011", divisor = as.double(population_ASIA[2,3]/n_of_people)),
  list(year = "2012", divisor = as.double(population_ASIA[3,3]/n_of_people)),
  list(year = "2013", divisor = as.double(population_ASIA[4,3]/n_of_people)),
  list(year = "2014", divisor = as.double(population_ASIA[5,3]/n_of_people)),
  list(year = "2015", divisor = as.double(population_ASIA[6,3]/n_of_people)),
  list(year = "2016", divisor = as.double(population_ASIA[7,3]/n_of_people)),
  list(year = "2017", divisor = as.double(population_ASIA[8,3]/n_of_people)),
  list(year = "2018", divisor = as.double(population_ASIA[9,3]/n_of_people)),
  list(year = "2019", divisor = as.double(population_ASIA[10,3]/n_of_people)),
  list(year = "2020", divisor = as.double(population_ASIA[11,3]/n_of_people)),
  list(year = "2021", divisor = as.double(population_ASIA[12,3]/n_of_people)),
  list(year = "2022", divisor = as.double(population_ASIA[13,3]/n_of_people))
)

# Initialize new columns for the divided values
merged_ASIA$norm_Papers <- NA
merged_ASIA$norm_Paper_Citations <- NA
merged_ASIA$norm_Articles <- NA
merged_ASIA$norm_Patents <- NA
merged_ASIA$norm_Citations <- NA
merged_ASIA$norm_AI_Projects <- NA
merged_ASIA$norm_Projects <- NA
merged_ASIA$norm_Funding <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_ASIA) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_ASIA %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_ASIA$norm_Papers[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_ASIA$norm_Paper_Citations[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_ASIA$norm_Articles[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_ASIA$norm_Patents[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_ASIA$norm_Citations[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor
  
  merged_ASIA$norm_AI_Projects[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$AI_Projects / divisor
  
  merged_ASIA$norm_Projects[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Projects / divisor
   
  merged_ASIA$norm_Funding[merged_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Funding / divisor
}

head(merged_ASIA, 16)

## # A tibble: 16 × 18
##    Date       Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
##    <date>     <chr>    <dbl>       <dbl>   <int>   <int>   <int>   <int>   <int>
##  1 2010-01-01 ASIA    76073.     156723.       0     781    9137       7       1
##  2 2010-02-01 ASIA     2631.      54533.       0     884   10547       6       0
##  3 2010-03-01 ASIA     4125.      66784.       0    1256   13469       7       1
##  4 2010-04-01 ASIA     3098.      67939.       0     774    9861       6       1
##  5 2010-05-01 ASIA     5763.      57968.       0     738    8848       5       0
##  6 2010-06-01 ASIA     8615.      87909.       0     985   10503      10       0
##  7 2010-07-01 ASIA     8456.      71482.       0     933   11062       2       0
##  8 2010-08-01 ASIA     8344.      68981.       0     930   10733      14       0
##  9 2010-09-01 ASIA     5456.      66306.       0    1216   11463      12       1
## 10 2010-10-01 ASIA    10518.      94118.       0     962   10289      11       2
## 11 2010-11-01 ASIA     5760.      59197.       0     948    9758       7       0
## 12 2010-12-01 ASIA     8913.      76870.       0    1168   11754      11       0
## 13 2011-01-01 ASIA    76733.     159363.       0     851    9184       9       1
## 14 2011-02-01 ASIA     3534.      63697.       0     927   10020      27       1
## 15 2011-03-01 ASIA     4578.      82307.       0    1448   13198      19       0
## 16 2011-04-01 ASIA     4979.      93269.       0     851    9289      38       0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## #   norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## #   norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## #   norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## #   ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects

# Initialize new columns for the divided values
merged_topic_ASIA$norm_Papers <- NA
merged_topic_ASIA$norm_Paper_Citations <- NA
merged_topic_ASIA$norm_Articles <- NA
merged_topic_ASIA$norm_Patents <- NA
merged_topic_ASIA$norm_Citations <- NA

# Apply the division operation and populate new columns
for (pair in year_divisors_ASIA) {
  selected_year <- pair$year
  divisor <- pair$divisor
  
  selected_rows <- merged_topic_ASIA %>%
    filter(format(Date, "%Y") == selected_year)
  
  merged_topic_ASIA$norm_Papers[merged_topic_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Papers / divisor
  
  merged_topic_ASIA$norm_Paper_Citations[merged_topic_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Paper_Citations / divisor
  
  merged_topic_ASIA$norm_Articles[merged_topic_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Articles / divisor

  merged_topic_ASIA$norm_Patents[merged_topic_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Patents / divisor
  
  merged_topic_ASIA$norm_Citations[merged_topic_ASIA$Date %in% selected_rows$Date] <- 
    selected_rows$Citations / divisor

}

head(merged_topic_ASIA, 16)

## # A tibble: 16 × 13
##    Date       Topic       Country Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
##    <date>     <chr>       <chr>    <dbl>   <dbl>   <int>   <int>   <int>   <dbl>
##  1 2010-01-01 Agriculture ASIA    4.41e2    272.       0       1       1 1.46e-1
##  2 2010-01-01 Artificial… ASIA    6.71e4 143606.       0     767    9065 2.22e+1
##  3 2010-01-01 Automotive… ASIA    1.32e2    186.       0       0       0 4.37e-2
##  4 2010-01-01 Computer v… ASIA    1.07e4  27900.       0     565    6779 3.56e+0
##  5 2010-01-01 Food indus… ASIA    4   e0      0        0       0       0 1.32e-3
##  6 2010-01-01 Higher edu… ASIA    3.42e2    102.       0       0       0 1.13e-1
##  7 2010-01-01 Internet o… ASIA    2.83e1    198        0       1      31 9.38e-3
##  8 2010-01-01 Machine le… ASIA    1.61e4  35751.       0      62     616 5.31e+0
##  9 2010-01-01 Manufactur… ASIA    5.6 e1     87        0       0       0 1.85e-2
## 10 2010-01-01 Medicine    ASIA    5.81e3  16438.       0      82     920 1.93e+0
## 11 2010-01-01 Natural la… ASIA    2.06e3   3453.       0       9      85 6.83e-1
## 12 2010-01-01 Pharmacolo… ASIA    8.94e1    255.       0       1       2 2.96e-2
## 13 2010-01-01 Robot       ASIA    2.92e3   6381.       0      13     225 9.66e-1
## 14 2010-01-01 Robotics    ASIA    1.21e2    558.       0       0       0 4.01e-2
## 15 2010-02-01 Agriculture ASIA    1   e1    174        0       0       0 3.31e-3
## 16 2010-02-01 Artificial… ASIA    2.36e3  46617.       0     866   10380 7.80e-1
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## #   norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## #   ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers

total <- rbind(merged_EU, merged_US, merged_JUS, merged_ANZ, merged_ASIA)
total_topic <- rbind(merged_topic_EU, merged_topic_US, merged_topic_JUS,
                     merged_topic_ANZ, merged_topic_ASIA)

write.csv(total, file = "TotalAggregated.csv", row.names = FALSE)
write.csv(total_topic, file = "TotalAggregatedTopic.csv", row.names = FALSE)