job <- read.csv("indicators_econ/Adzuna/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
job <- rename(job, Postings = X..Job.postings, Month = YearMonth)
job$Date <- as.Date(paste(job$Month, "-01", sep = ""), format = "%Y-%m-%d")
job <- job[job$Date >= as.Date("2010-01-01"), ]
job$Month <- NULL
project_fund <- read.csv("indicators_econ/Cordis/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
project_fund <- rename(project_fund, Month = YearMonth, Projects = X..Projects)
project_fund$Date <- as.Date(paste(project_fund$Month, "-01", sep = ""), format = "%Y-%m-%d")
project_fund <- project_fund[project_fund$Date >= as.Date("2010-01-01"), ]
project_fund$Month <- NULL
project_soft <- read.csv("indicators_econ/Github/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
project_soft <- rename(project_soft, Month = YearMonth, AI_Projects = X..AI.software.projects,
AI_Projects_norm = X..AI.software.projects..normalized.)
project_soft$Date <- as.Date(paste(project_soft$Month, "-01", sep = ""), format = "%Y-%m-%d")
project_soft <- project_soft[project_soft$Date >= as.Date("2010-01-01"), ]
project_soft$Month <- NULL
article <- read.csv("indicators_econ/News/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
article <- rename(article, Month = YearMonth, Articles = X..Articles)
article$Date <- as.Date(paste(article$Month, "-01", sep = ""), format = "%Y-%m-%d")
article <- article[article$Date >= as.Date("2010-01-01"), ]
article$Month <- NULL
article_topic <- read.csv("indicators_econ/News/CountryTopicTimeSeries.csv",
header=TRUE, sep=",", dec=".")
article_topic <- rename(article_topic, Month = YearMonth, Articles = X..Articles, Topic = concept)
article_topic$Date <- as.Date(paste(article_topic$Month, "-01", sep = ""), format = "%Y-%m-%d")
article_topic <- article_topic[article_topic$Date >= as.Date("2010-01-01"), ]
article_topic$Month <- NULL
patent <- read.csv("indicators_econ/Patents/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
patent <- rename(patent, Month = YearMonth, Patents = X..Patents, Patents_norm = X..Patents..normalized.,
Citations = X..Citations, Citations_norm = X..Citations..normalized.)
patent$Date <- as.Date(paste(patent$Month, "-01", sep = ""), format = "%Y-%m-%d")
patent <- patent[patent$Date >= as.Date("2010-01-01"), ]
patent$Month <- NULL
patent_topic <- read.csv("indicators_econ/Patents/CountryTopicTimeSeries.csv",
header=TRUE, sep=",", dec=".")
patent_topic <- rename(patent_topic, Month = YearMonth, Patents = X..Patents,
Patents_norm = X..Patents..normalized., Citations = X..Citations,
Citations_norm = X..Citations..normalized.)
patent_topic$Date <- as.Date(paste(patent_topic$Month, "-01", sep = ""), format = "%Y-%m-%d")
patent_topic <- patent_topic[patent_topic$Date >= as.Date("2010-01-01"), ]
patent_topic$Month <- NULL
research <- read.csv("indicators_econ/Research/CountryTimeSeries.csv", header=TRUE, sep=",", dec=".")
research <- rename(research, Month = YearMonth, Papers = X..Papers, Papers_norm = X..Papers..normalized.,
Paper_Citations = X..Citations, Paper_Citations_norm = X..Citations..normalized.)
research$Date <- as.Date(paste(research$Month, "-01", sep = ""), format = "%Y-%m-%d")
research <- research[research$Date >= as.Date("2010-01-01"), ]
research$Month <- NULL
research_topic <- read.csv("indicators_econ/Research/CountryTopicTimeSeries.csv",
header=TRUE, sep=",", dec=".")
research_topic <- rename(research_topic, Month = YearMonth, Papers = X..Papers,
Papers_norm = X..Papers..normalized., Paper_Citations = X..Citations,
Paper_Citations_norm = X..Citations..normalized.)
research_topic$Date <- as.Date(paste(research_topic$Month, "-01", sep = ""), format = "%Y-%m-%d")
research_topic <- research_topic[research_topic$Date >= as.Date("2010-01-01"), ]
research_topic$Month <- NULL
population <- read.csv("~/Desktop/PKP/Population.csv", header = TRUE, sep = ";", dec = ".")
population <- rename(population, Country = Country.Code)
# Define a function to merge datasets by "Country" and "Date" with an outer join
merge_datasets <- function(datasets) {
merged_dataset <- datasets[[1]]
for (i in 2:length(datasets)) {
merged_dataset <- merge(merged_dataset, datasets[[i]], by = c("Country", "Date"), all = TRUE)
}
return(merged_dataset)
}
merge_topic_datasets <- function(datasets) {
merged_dataset <- datasets[[1]]
for (i in 2:length(datasets)) {
merged_dataset <- merge(merged_dataset, datasets[[i]], by = c("Country", "Date", "Topic"), all = TRUE)
}
return(merged_dataset)
}
# Assuming you have datasets named research, article, patent, project_soft, and project_fund
all_datasets <- list(research, article, patent, project_soft, project_fund)
merged <- merge_datasets(all_datasets)
all_datasets_topic <- list(research_topic, article_topic, patent_topic)
merged_topic <- merge_topic_datasets(all_datasets_topic)
tail(merged)
## Country Date Papers Papers_norm Paper_Citations
## 27006 ZWE 2022-07-01 21 11.450000 4
## 27007 ZWE 2022-08-01 11 6.861111 14
## 27008 ZWE 2022-09-01 9 4.466667 21
## 27009 ZWE 2022-10-01 9 5.904762 5
## 27010 ZWE 2022-11-01 28 20.938636 12
## 27011 ZWE 2022-12-01 7 3.433333 3
## Paper_Citations_norm Articles Patents Patents_norm Citations
## 27006 2.2000000 34 NA NA NA
## 27007 6.3888889 10 NA NA NA
## 27008 16.5666667 16 NA NA NA
## 27009 3.3333333 21 NA NA NA
## 27010 4.0075758 20 NA NA NA
## 27011 0.7666667 27 NA NA NA
## Citations_norm AI_Projects AI_Projects_norm Projects Funding
## 27006 NA 18 17.5 NA NA
## 27007 NA 7 7.0 NA NA
## 27008 NA 8 8.0 NA NA
## 27009 NA 11 11.0 NA NA
## 27010 NA 11 11.0 NA NA
## 27011 NA 7 7.0 NA NA
tail(merged_topic)
## Country Date Topic Papers Papers_norm
## 176035 ZWE 2022-12-01 Agriculture NA NA
## 176036 ZWE 2022-12-01 Artificial intelligence 4 1.100000
## 176037 ZWE 2022-12-01 Internet of things NA NA
## 176038 ZWE 2022-12-01 Machine learning 3 2.333333
## 176039 ZWE 2022-12-01 Medicine 1 0.100000
## 176040 ZWE 2022-12-01 Robotics NA NA
## Paper_Citations Paper_Citations_norm Articles Patents Patents_norm
## 176035 NA NA 6 NA NA
## 176036 1 0.1000000 16 NA NA
## 176037 NA NA 2 NA NA
## 176038 2 0.6666667 1 NA NA
## 176039 1 0.1000000 NA NA NA
## 176040 NA NA 1 NA NA
## Citations Citations_norm
## 176035 NA NA
## 176036 NA NA
## 176037 NA NA
## 176038 NA NA
## 176039 NA NA
## 176040 NA NA
countries_EU <- c("BEL", "BGR", "CZE", "DNK", "DEU", "EST", "IRL", "GRC", "ESP",
"FRA", "HRV", "ITA", "CYP", "LTU", "LUX", "LVA", "POL", "HUN",
"NLD", "MLT", "AUT", "SVN", "SVK", "ROU", "PRT", "FIN", "SWE",
"GBR", "CHE", "NOR", "ISL", "LIE", "ALB", "TUR", "SRB", "MNE", "MKD")
countries_US <- c("USA", "CAN")
countries_ANZ <- c("AUS", "NZL")
countries_JUS <- c("ARG", "BRA", "CHL", "MEX", "PER", "URY", "COL")
countries_ASIA <- c("CHN", "JPN", "KOR", "IND", "SGP", "VNM", "MYS", "RUS", "TWN")
population_EU <- population %>%
filter(Country %in% countries_EU)%>%
group_by(Year) %>%
summarize(
Country = "EU",
Population = sum(Population)
) %>%
ungroup()
merged_EU <- merged %>%
filter(Country %in% countries_EU)%>%
group_by(Date) %>%
summarize(
Country = "EU",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE),
AI_Projects =sum(AI_Projects, na.rm = TRUE),
Projects = sum(Projects, na.rm = TRUE),
Funding = sum(Funding, na.rm = TRUE)
) %>%
ungroup()
head(merged_EU)
## # A tibble: 6 × 10
## Date Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 EU 26053. 325320. 0 616 6222 25 135
## 2 2010-02-01 EU 3181. 121857. 0 693 6488 24 18
## 3 2010-03-01 EU 5233. 148914. 0 880 9238 12 34
## 4 2010-04-01 EU 4332. 122116. 0 786 7347 19 32
## 5 2010-05-01 EU 5714. 140800. 0 718 7811 8 10
## 6 2010-06-01 EU 6998. 153567. 0 911 8024 33 30
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Test agregacije podatkov s temami
merged_topic_EU <- merged_topic %>%
filter(Country %in% countries_EU) %>%
group_by(Date, Topic) %>%
summarize(
Country = "EU",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.
head(merged_topic_EU, 12)
## # A tibble: 12 × 8
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int>
## 1 2010-01-01 Agriculture EU 6.45e+1 3.12e2 0 0 0
## 2 2010-01-01 Artificial intell… EU 2.36e+4 2.86e5 0 596 5987
## 3 2010-01-01 Automotive indust… EU 6.82e+1 8.15e2 0 0 0
## 4 2010-01-01 Computer vision EU 3.11e+3 5.11e4 0 332 3306
## 5 2010-01-01 Food industry EU 3.00e+0 6 e0 0 0 0
## 6 2010-01-01 Higher education EU 4.65e+1 4.96e2 0 0 0
## 7 2010-01-01 Industry 4.0 EU 5 e-1 0 0 0 0
## 8 2010-01-01 Internet of Things EU 1.97e+1 2.33e2 0 0 0
## 9 2010-01-01 Machine learning EU 5.11e+3 8.10e4 0 57 590
## 10 2010-01-01 Manufacturing EU 5.33e+0 8.33e0 0 0 0
## 11 2010-01-01 Medicine EU 2.56e+3 3.76e4 0 100 1172
## 12 2010-01-01 Natural language … EU 1.41e+3 1.03e4 0 4 67
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations
n_of_people <- 1000000
year_divisors_EU <- list(
list(year = "2010", divisor = as.double(population_EU[1,3]/n_of_people)),
list(year = "2011", divisor = as.double(population_EU[2,3]/n_of_people)),
list(year = "2012", divisor = as.double(population_EU[3,3]/n_of_people)),
list(year = "2013", divisor = as.double(population_EU[4,3]/n_of_people)),
list(year = "2014", divisor = as.double(population_EU[5,3]/n_of_people)),
list(year = "2015", divisor = as.double(population_EU[6,3]/n_of_people)),
list(year = "2016", divisor = as.double(population_EU[7,3]/n_of_people)),
list(year = "2017", divisor = as.double(population_EU[8,3]/n_of_people)),
list(year = "2018", divisor = as.double(population_EU[9,3]/n_of_people)),
list(year = "2019", divisor = as.double(population_EU[10,3]/n_of_people)),
list(year = "2020", divisor = as.double(population_EU[11,3]/n_of_people)),
list(year = "2021", divisor = as.double(population_EU[12,3]/n_of_people)),
list(year = "2022", divisor = as.double(population_EU[13,3]/n_of_people))
)
# Initialize new columns for the divided values
merged_EU$norm_Papers <- NA
merged_EU$norm_Paper_Citations <- NA
merged_EU$norm_Articles <- NA
merged_EU$norm_Patents <- NA
merged_EU$norm_Citations <- NA
merged_EU$norm_AI_Projects <- NA
merged_EU$norm_Projects <- NA
merged_EU$norm_Funding <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_EU) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_EU %>%
filter(format(Date, "%Y") == selected_year)
merged_EU$norm_Papers[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_EU$norm_Paper_Citations[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_EU$norm_Articles[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_EU$norm_Patents[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_EU$norm_Citations[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
merged_EU$norm_AI_Projects[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$AI_Projects / divisor
merged_EU$norm_Projects[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$Projects / divisor
merged_EU$norm_Funding[merged_EU$Date %in% selected_rows$Date] <-
selected_rows$Funding / divisor
}
head(merged_EU, 16)
## # A tibble: 16 × 18
## Date Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 EU 26053. 325320. 0 616 6222 25 135
## 2 2010-02-01 EU 3181. 121857. 0 693 6488 24 18
## 3 2010-03-01 EU 5233. 148914. 0 880 9238 12 34
## 4 2010-04-01 EU 4332. 122116. 0 786 7347 19 32
## 5 2010-05-01 EU 5714. 140800. 0 718 7811 8 10
## 6 2010-06-01 EU 6998. 153567. 0 911 8024 33 30
## 7 2010-07-01 EU 6352. 135912. 0 759 6797 29 9
## 8 2010-08-01 EU 6009. 129799. 0 673 6097 57 5
## 9 2010-09-01 EU 6887. 138418. 0 804 7388 44 48
## 10 2010-10-01 EU 5853. 132084. 0 823 7208 26 26
## 11 2010-11-01 EU 4642. 115096. 0 826 8757 39 25
## 12 2010-12-01 EU 5099. 119993. 0 992 9489 41 11
## 13 2011-01-01 EU 27038. 314411. 0 615 6211 51 47
## 14 2011-02-01 EU 3463. 113495. 0 614 5661 79 52
## 15 2011-03-01 EU 4763. 168440. 0 867 8522 64 43
## 16 2011-04-01 EU 4703. 120632. 0 682 6660 128 19
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## # norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## # norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## # norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## # ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Initialize new columns for the divided values
merged_topic_EU$norm_Papers <- NA
merged_topic_EU$norm_Paper_Citations <- NA
merged_topic_EU$norm_Articles <- NA
merged_topic_EU$norm_Patents <- NA
merged_topic_EU$norm_Citations <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_EU) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_topic_EU %>%
filter(format(Date, "%Y") == selected_year)
merged_topic_EU$norm_Papers[merged_topic_EU$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_topic_EU$norm_Paper_Citations[merged_topic_EU$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_topic_EU$norm_Articles[merged_topic_EU$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_topic_EU$norm_Patents[merged_topic_EU$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_topic_EU$norm_Citations[merged_topic_EU$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
}
head(merged_topic_EU, 16)
## # A tibble: 16 × 13
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int> <dbl>
## 1 2010-01-01 Agricultu… EU 6.45e+1 3.12e2 0 0 0 1.08e-1
## 2 2010-01-01 Artificia… EU 2.36e+4 2.86e5 0 596 5987 3.96e+1
## 3 2010-01-01 Automotiv… EU 6.82e+1 8.15e2 0 0 0 1.14e-1
## 4 2010-01-01 Computer … EU 3.11e+3 5.11e4 0 332 3306 5.22e+0
## 5 2010-01-01 Food indu… EU 3.00e+0 6 e0 0 0 0 5.04e-3
## 6 2010-01-01 Higher ed… EU 4.65e+1 4.96e2 0 0 0 7.81e-2
## 7 2010-01-01 Industry … EU 5 e-1 0 0 0 0 8.39e-4
## 8 2010-01-01 Internet … EU 1.97e+1 2.33e2 0 0 0 3.30e-2
## 9 2010-01-01 Machine l… EU 5.11e+3 8.10e4 0 57 590 8.58e+0
## 10 2010-01-01 Manufactu… EU 5.33e+0 8.33e0 0 0 0 8.95e-3
## 11 2010-01-01 Medicine EU 2.56e+3 3.76e4 0 100 1172 4.30e+0
## 12 2010-01-01 Natural l… EU 1.41e+3 1.03e4 0 4 67 2.37e+0
## 13 2010-01-01 Pharmacol… EU 3.01e+1 3.93e2 0 0 0 5.05e-2
## 14 2010-01-01 Robot EU 1.10e+3 9.05e3 0 10 238 1.84e+0
## 15 2010-01-01 Robotics EU 1.80e+2 1.54e3 0 1 37 3.02e-1
## 16 2010-02-01 Agricultu… EU 5.50e+0 2.22e2 0 0 0 9.23e-3
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## # norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers
population_US <- population %>%
filter(Country %in% countries_US)%>%
group_by(Year) %>%
summarize(
Country = "US",
Population = sum(Population)
) %>%
ungroup()
merged_US <- merged %>%
filter(Country %in% countries_US)%>%
group_by(Date) %>%
summarize(
Country = "US",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE),
AI_Projects =sum(AI_Projects, na.rm = TRUE),
Projects = sum(Projects, na.rm = TRUE),
Funding = sum(Funding, na.rm = TRUE)
) %>%
ungroup()
head(merged_US)
## # A tibble: 6 × 10
## Date Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 US 17698. 334861. 0 914 24793 12 0
## 2 2010-02-01 US 2751. 120632. 0 888 20785 10 0
## 3 2010-03-01 US 7005. 177088. 0 1218 26044 16 0
## 4 2010-04-01 US 4479. 134279. 0 1197 26978 27 0
## 5 2010-05-01 US 4180. 138240. 0 1053 26349 16 0
## 6 2010-06-01 US 6050. 208470. 0 1410 30543 12 1
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Test agregacije podatkov s temami
merged_topic_US <- merged_topic %>%
filter(Country %in% countries_US) %>%
group_by(Date, Topic) %>%
summarize(
Country = "US",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.
head(merged_topic_US, 12)
## # A tibble: 12 × 8
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int>
## 1 2010-01-01 Agriculture US 5.83e+1 1.07e3 0 0 0
## 2 2010-01-01 Artificial intell… US 1.55e+4 2.92e5 0 885 23923
## 3 2010-01-01 Automotive indust… US 2.06e+1 1.41e3 0 2 70
## 4 2010-01-01 Computer vision US 2.14e+3 3.12e4 0 466 11619
## 5 2010-01-01 Food industry US 5 e+0 6 e0 0 0 0
## 6 2010-01-01 Higher education US 5.78e+1 5.96e2 0 0 0
## 7 2010-01-01 Industry 4.0 US 5 e-1 0 0 0 0
## 8 2010-01-01 Internet of Things US 9 e+0 7.43e1 0 0 0
## 9 2010-01-01 Machine learning US 3.91e+3 8.45e4 0 149 3829
## 10 2010-01-01 Manufacturing US 6.67e-1 6.67e0 0 0 0
## 11 2010-01-01 Medicine US 2.35e+3 4.70e4 0 110 6364
## 12 2010-01-01 Natural language … US 6.56e+2 1.32e4 0 21 391
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations
year_divisors_US <- list(
list(year = "2010", divisor = as.double(population_US[1,3]/n_of_people)),
list(year = "2011", divisor = as.double(population_US[2,3]/n_of_people)),
list(year = "2012", divisor = as.double(population_US[3,3]/n_of_people)),
list(year = "2013", divisor = as.double(population_US[4,3]/n_of_people)),
list(year = "2014", divisor = as.double(population_US[5,3]/n_of_people)),
list(year = "2015", divisor = as.double(population_US[6,3]/n_of_people)),
list(year = "2016", divisor = as.double(population_US[7,3]/n_of_people)),
list(year = "2017", divisor = as.double(population_US[8,3]/n_of_people)),
list(year = "2018", divisor = as.double(population_US[9,3]/n_of_people)),
list(year = "2019", divisor = as.double(population_US[10,3]/n_of_people)),
list(year = "2020", divisor = as.double(population_US[11,3]/n_of_people)),
list(year = "2021", divisor = as.double(population_US[12,3]/n_of_people)),
list(year = "2022", divisor = as.double(population_US[13,3]/n_of_people))
)
# Initialize new columns for the divided values
merged_US$norm_Papers <- NA
merged_US$norm_Paper_Citations <- NA
merged_US$norm_Articles <- NA
merged_US$norm_Patents <- NA
merged_US$norm_Citations <- NA
merged_US$norm_AI_Projects <- NA
merged_US$norm_Projects <- NA
merged_US$norm_Funding <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_US) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_US %>%
filter(format(Date, "%Y") == selected_year)
merged_US$norm_Papers[merged_US$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_US$norm_Paper_Citations[merged_US$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_US$norm_Articles[merged_US$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_US$norm_Patents[merged_US$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_US$norm_Citations[merged_US$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
merged_US$norm_AI_Projects[merged_US$Date %in% selected_rows$Date] <-
selected_rows$AI_Projects / divisor
merged_US$norm_Projects[merged_US$Date %in% selected_rows$Date] <-
selected_rows$Projects / divisor
merged_US$norm_Funding[merged_US$Date %in% selected_rows$Date] <-
selected_rows$Funding / divisor
}
head(merged_US, 16)
## # A tibble: 16 × 18
## Date Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 US 17698. 334861. 0 914 24793 12 0
## 2 2010-02-01 US 2751. 120632. 0 888 20785 10 0
## 3 2010-03-01 US 7005. 177088. 0 1218 26044 16 0
## 4 2010-04-01 US 4479. 134279. 0 1197 26978 27 0
## 5 2010-05-01 US 4180. 138240. 0 1053 26349 16 0
## 6 2010-06-01 US 6050. 208470. 0 1410 30543 12 1
## 7 2010-07-01 US 4646. 135005. 0 1042 21101 9 0
## 8 2010-08-01 US 4741. 144007. 0 1041 19635 19 0
## 9 2010-09-01 US 4572. 143566. 0 1218 24876 28 1
## 10 2010-10-01 US 4515. 142516. 0 1219 24185 10 0
## 11 2010-11-01 US 3777. 123581. 0 1222 22517 16 0
## 12 2010-12-01 US 4375. 132346. 0 1657 33387 15 0
## 13 2011-01-01 US 17516. 277008. 0 946 19016 32 2
## 14 2011-02-01 US 3098. 116251. 0 982 20191 21 2
## 15 2011-03-01 US 4871. 149175. 0 1365 26053 40 0
## 16 2011-04-01 US 3923. 117758. 0 1153 22868 55 0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## # norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## # norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## # norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## # ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Initialize new columns for the divided values
merged_topic_US$norm_Papers <- NA
merged_topic_US$norm_Paper_Citations <- NA
merged_topic_US$norm_Articles <- NA
merged_topic_US$norm_Patents <- NA
merged_topic_US$norm_Citations <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_US) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_topic_US %>%
filter(format(Date, "%Y") == selected_year)
merged_topic_US$norm_Papers[merged_topic_US$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_topic_US$norm_Paper_Citations[merged_topic_US$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_topic_US$norm_Articles[merged_topic_US$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_topic_US$norm_Patents[merged_topic_US$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_topic_US$norm_Citations[merged_topic_US$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
}
head(merged_topic_US, 16)
## # A tibble: 16 × 13
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int> <dbl>
## 1 2010-01-01 Agricultu… US 5.83e+1 1.07e3 0 0 0 1.70e-1
## 2 2010-01-01 Artificia… US 1.55e+4 2.92e5 0 885 23923 4.52e+1
## 3 2010-01-01 Automotiv… US 2.06e+1 1.41e3 0 2 70 6.00e-2
## 4 2010-01-01 Computer … US 2.14e+3 3.12e4 0 466 11619 6.22e+0
## 5 2010-01-01 Food indu… US 5 e+0 6 e0 0 0 0 1.46e-2
## 6 2010-01-01 Higher ed… US 5.78e+1 5.96e2 0 0 0 1.68e-1
## 7 2010-01-01 Industry … US 5 e-1 0 0 0 0 1.46e-3
## 8 2010-01-01 Internet … US 9 e+0 7.43e1 0 0 0 2.62e-2
## 9 2010-01-01 Machine l… US 3.91e+3 8.45e4 0 149 3829 1.14e+1
## 10 2010-01-01 Manufactu… US 6.67e-1 6.67e0 0 0 0 1.94e-3
## 11 2010-01-01 Medicine US 2.35e+3 4.70e4 0 110 6364 6.86e+0
## 12 2010-01-01 Natural l… US 6.56e+2 1.32e4 0 21 391 1.91e+0
## 13 2010-01-01 Pharmacol… US 3.45e+1 5.43e2 0 0 0 1.00e-1
## 14 2010-01-01 Robot US 6.31e+2 6.01e3 0 25 4488 1.84e+0
## 15 2010-01-01 Robotics US 1.05e+2 6.69e2 0 0 0 3.05e-1
## 16 2010-02-01 Agricultu… US 9.17e+0 3.08e2 0 0 0 2.67e-2
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## # norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers
population_JUS <- population %>%
filter(Country %in% countries_JUS)%>%
group_by(Year) %>%
summarize(
Country = "JUS",
Population = sum(Population)
) %>%
ungroup()
merged_JUS <- merged %>%
filter(Country %in% countries_JUS)%>%
group_by(Date) %>%
summarize(
Country = "JUS",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE),
AI_Projects =sum(AI_Projects, na.rm = TRUE),
Projects = sum(Projects, na.rm = TRUE),
Funding = sum(Funding, na.rm = TRUE)
) %>%
ungroup()
head(merged_JUS)
## # A tibble: 6 × 10
## Date Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 JUS 1730. 11720. 0 0 0 0 0
## 2 2010-02-01 JUS 230. 4881. 0 2 19 4 0
## 3 2010-03-01 JUS 426. 5859. 0 3 15 4 0
## 4 2010-04-01 JUS 317. 4639. 0 1 2 2 2
## 5 2010-05-01 JUS 362. 4944. 0 4 30 1 0
## 6 2010-06-01 JUS 603. 6204. 0 0 0 2 0
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Test agregacije podatkov s temami
merged_topic_JUS <- merged_topic %>%
filter(Country %in% countries_JUS) %>%
group_by(Date, Topic) %>%
summarize(
Country = "JUS",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.
head(merged_topic_JUS, 12)
## # A tibble: 12 × 8
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int>
## 1 2010-01-01 Agriculture JUS 2.2 e0 8 e0 0 0 0
## 2 2010-01-01 Artificial intelli… JUS 1.58e3 1.06e4 0 0 0
## 3 2010-01-01 Automotive industry JUS 3 e0 4 e0 0 0 0
## 4 2010-01-01 Computer vision JUS 1.92e2 1.17e3 0 0 0
## 5 2010-01-01 Higher education JUS 4.17e0 3.33e0 0 0 0
## 6 2010-01-01 Internet of Things JUS 1 e0 7 e0 0 0 0
## 7 2010-01-01 Machine learning JUS 3.74e2 2.73e3 0 0 0
## 8 2010-01-01 Manufacturing JUS 2 e0 1.6 e1 0 0 0
## 9 2010-01-01 Medicine JUS 1.41e2 1.82e3 0 0 0
## 10 2010-01-01 Natural language p… JUS 8.75e1 3.73e2 0 0 0
## 11 2010-01-01 Pharmacology JUS 1.4 e0 1.52e1 0 0 0
## 12 2010-01-01 Robot JUS 9.11e1 5.61e2 0 0 0
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations
year_divisors_JUS <- list(
list(year = "2010", divisor = as.double(population_JUS[1,3]/n_of_people)),
list(year = "2011", divisor = as.double(population_JUS[2,3]/n_of_people)),
list(year = "2012", divisor = as.double(population_JUS[3,3]/n_of_people)),
list(year = "2013", divisor = as.double(population_JUS[4,3]/n_of_people)),
list(year = "2014", divisor = as.double(population_JUS[5,3]/n_of_people)),
list(year = "2015", divisor = as.double(population_JUS[6,3]/n_of_people)),
list(year = "2016", divisor = as.double(population_JUS[7,3]/n_of_people)),
list(year = "2017", divisor = as.double(population_JUS[8,3]/n_of_people)),
list(year = "2018", divisor = as.double(population_JUS[9,3]/n_of_people)),
list(year = "2019", divisor = as.double(population_JUS[10,3]/n_of_people)),
list(year = "2020", divisor = as.double(population_JUS[11,3]/n_of_people)),
list(year = "2021", divisor = as.double(population_JUS[12,3]/n_of_people)),
list(year = "2022", divisor = as.double(population_JUS[13,3]/n_of_people))
)
# Initialize new columns for the divided values
merged_JUS$norm_Papers <- NA
merged_JUS$norm_Paper_Citations <- NA
merged_JUS$norm_Articles <- NA
merged_JUS$norm_Patents <- NA
merged_JUS$norm_Citations <- NA
merged_JUS$norm_AI_Projects <- NA
merged_JUS$norm_Projects <- NA
merged_JUS$norm_Funding <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_JUS) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_JUS %>%
filter(format(Date, "%Y") == selected_year)
merged_JUS$norm_Papers[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_JUS$norm_Paper_Citations[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_JUS$norm_Articles[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_JUS$norm_Patents[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_JUS$norm_Citations[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
merged_JUS$norm_AI_Projects[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$AI_Projects / divisor
merged_JUS$norm_Projects[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$Projects / divisor
merged_JUS$norm_Funding[merged_JUS$Date %in% selected_rows$Date] <-
selected_rows$Funding / divisor
}
head(merged_JUS, 16)
## # A tibble: 16 × 18
## Date Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 JUS 1730. 11720. 0 0 0 0 0
## 2 2010-02-01 JUS 230. 4881. 0 2 19 4 0
## 3 2010-03-01 JUS 426. 5859. 0 3 15 4 0
## 4 2010-04-01 JUS 317. 4639. 0 1 2 2 2
## 5 2010-05-01 JUS 362. 4944. 0 4 30 1 0
## 6 2010-06-01 JUS 603. 6204. 0 0 0 2 0
## 7 2010-07-01 JUS 511. 6971. 0 0 0 1 0
## 8 2010-08-01 JUS 534. 6112. 0 1 0 7 0
## 9 2010-09-01 JUS 633. 5577. 0 2 34 4 0
## 10 2010-10-01 JUS 461. 5600. 0 2 10 1 0
## 11 2010-11-01 JUS 448. 4417. 0 3 66 6 0
## 12 2010-12-01 JUS 565. 5947. 0 5 153 0 0
## 13 2011-01-01 JUS 1949. 11435. 0 0 0 7 0
## 14 2011-02-01 JUS 273. 5200. 0 1 15 6 0
## 15 2011-03-01 JUS 435. 6407. 0 3 20 2 0
## 16 2011-04-01 JUS 386. 5346. 0 1 42 10 0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## # norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## # norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## # norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## # ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Initialize new columns for the divided values
merged_topic_JUS$norm_Papers <- NA
merged_topic_JUS$norm_Paper_Citations <- NA
merged_topic_JUS$norm_Articles <- NA
merged_topic_JUS$norm_Patents <- NA
merged_topic_JUS$norm_Citations <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_JUS) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_topic_JUS %>%
filter(format(Date, "%Y") == selected_year)
merged_topic_JUS$norm_Papers[merged_topic_JUS$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_topic_JUS$norm_Paper_Citations[merged_topic_JUS$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_topic_JUS$norm_Articles[merged_topic_JUS$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_topic_JUS$norm_Patents[merged_topic_JUS$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_topic_JUS$norm_Citations[merged_topic_JUS$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
}
head(merged_topic_JUS, 16)
## # A tibble: 16 × 13
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int> <dbl>
## 1 2010-01-01 Agriculture JUS 2.2 e0 8 e0 0 0 0 0.00495
## 2 2010-01-01 Artificial… JUS 1.58e3 1.06e4 0 0 0 3.55
## 3 2010-01-01 Automotive… JUS 3 e0 4 e0 0 0 0 0.00676
## 4 2010-01-01 Computer v… JUS 1.92e2 1.17e3 0 0 0 0.433
## 5 2010-01-01 Higher edu… JUS 4.17e0 3.33e0 0 0 0 0.00938
## 6 2010-01-01 Internet o… JUS 1 e0 7 e0 0 0 0 0.00225
## 7 2010-01-01 Machine le… JUS 3.74e2 2.73e3 0 0 0 0.843
## 8 2010-01-01 Manufactur… JUS 2 e0 1.6 e1 0 0 0 0.00450
## 9 2010-01-01 Medicine JUS 1.41e2 1.82e3 0 0 0 0.319
## 10 2010-01-01 Natural la… JUS 8.75e1 3.73e2 0 0 0 0.197
## 11 2010-01-01 Pharmacolo… JUS 1.4 e0 1.52e1 0 0 0 0.00315
## 12 2010-01-01 Robot JUS 9.11e1 5.61e2 0 0 0 0.205
## 13 2010-01-01 Robotics JUS 1.36e1 2.20e2 0 0 0 0.0306
## 14 2010-02-01 Artificial… JUS 2.04e2 4.29e3 0 2 19 0.460
## 15 2010-02-01 Automotive… JUS 1 e0 1 e1 0 0 0 0.00225
## 16 2010-02-01 Computer v… JUS 2.58e1 5.43e2 0 0 0 0.0581
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## # norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers
population_ANZ <- population %>%
filter(Country %in% countries_ANZ)%>%
group_by(Year) %>%
summarize(
Country = "ANZ",
Population = sum(Population)
) %>%
ungroup()
merged_ANZ <- merged %>%
filter(Country %in% countries_ANZ)%>%
group_by(Date) %>%
summarize(
Country = "ANZ",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE),
AI_Projects =sum(AI_Projects, na.rm = TRUE),
Projects = sum(Projects, na.rm = TRUE),
Funding = sum(Funding, na.rm = TRUE)
) %>%
ungroup()
head(merged_ANZ)
## # A tibble: 6 × 10
## Date Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 ANZ 2148. 31917. 0 7 63 1 0
## 2 2010-02-01 ANZ 287. 10542. 0 17 149 0 0
## 3 2010-03-01 ANZ 424. 13055. 0 16 187 1 0
## 4 2010-04-01 ANZ 297. 10811. 0 25 216 0 0
## 5 2010-05-01 ANZ 435. 12453. 0 16 139 0 0
## 6 2010-06-01 ANZ 531. 12745. 0 14 76 2 0
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Test agregacije podatkov s temami
merged_topic_ANZ <- merged_topic %>%
filter(Country %in% countries_ANZ) %>%
group_by(Date, Topic) %>%
summarize(
Country = "ANZ",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.
head(merged_topic_ANZ, 12)
## # A tibble: 12 × 8
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int>
## 1 2010-01-01 Agriculture ANZ 1.21e1 2.44e2 0 0 0
## 2 2010-01-01 Artificial intelli… ANZ 1.85e3 2.83e4 0 7 63
## 3 2010-01-01 Automotive industry ANZ 2.25e0 8.5 e0 0 0 0
## 4 2010-01-01 Computer vision ANZ 2.19e2 2.09e3 0 4 18
## 5 2010-01-01 Higher education ANZ 2.23e1 2.23e2 0 0 0
## 6 2010-01-01 Internet of Things ANZ 2 e0 5.2 e1 0 0 0
## 7 2010-01-01 Machine learning ANZ 5.24e2 6.86e3 0 0 0
## 8 2010-01-01 Manufacturing ANZ 1 e0 7 e0 0 0 0
## 9 2010-01-01 Medicine ANZ 3.28e2 4.17e3 0 1 1
## 10 2010-01-01 Natural language p… ANZ 5.81e1 3.46e2 0 0 0
## 11 2010-01-01 Pharmacology ANZ 1.33e0 5.33e0 0 0 0
## 12 2010-01-01 Robot ANZ 7.97e1 6.15e2 0 1 14
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations
year_divisors_ANZ <- list(
list(year = "2010", divisor = as.double(population_ANZ[1,3]/n_of_people)),
list(year = "2011", divisor = as.double(population_ANZ[2,3]/n_of_people)),
list(year = "2012", divisor = as.double(population_ANZ[3,3]/n_of_people)),
list(year = "2013", divisor = as.double(population_ANZ[4,3]/n_of_people)),
list(year = "2014", divisor = as.double(population_ANZ[5,3]/n_of_people)),
list(year = "2015", divisor = as.double(population_ANZ[6,3]/n_of_people)),
list(year = "2016", divisor = as.double(population_ANZ[7,3]/n_of_people)),
list(year = "2017", divisor = as.double(population_ANZ[8,3]/n_of_people)),
list(year = "2018", divisor = as.double(population_ANZ[9,3]/n_of_people)),
list(year = "2019", divisor = as.double(population_ANZ[10,3]/n_of_people)),
list(year = "2020", divisor = as.double(population_ANZ[11,3]/n_of_people)),
list(year = "2021", divisor = as.double(population_ANZ[12,3]/n_of_people)),
list(year = "2022", divisor = as.double(population_ANZ[13,3]/n_of_people))
)
# Initialize new columns for the divided values
merged_ANZ$norm_Papers <- NA
merged_ANZ$norm_Paper_Citations <- NA
merged_ANZ$norm_Articles <- NA
merged_ANZ$norm_Patents <- NA
merged_ANZ$norm_Citations <- NA
merged_ANZ$norm_AI_Projects <- NA
merged_ANZ$norm_Projects <- NA
merged_ANZ$norm_Funding <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_ANZ) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_ANZ %>%
filter(format(Date, "%Y") == selected_year)
merged_ANZ$norm_Papers[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_ANZ$norm_Paper_Citations[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_ANZ$norm_Articles[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_ANZ$norm_Patents[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_ANZ$norm_Citations[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
merged_ANZ$norm_AI_Projects[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$AI_Projects / divisor
merged_ANZ$norm_Projects[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Projects / divisor
merged_ANZ$norm_Funding[merged_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Funding / divisor
}
head(merged_ANZ, 16)
## # A tibble: 16 × 18
## Date Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 ANZ 2148. 31917. 0 7 63 1 0
## 2 2010-02-01 ANZ 287. 10542. 0 17 149 0 0
## 3 2010-03-01 ANZ 424. 13055. 0 16 187 1 0
## 4 2010-04-01 ANZ 297. 10811. 0 25 216 0 0
## 5 2010-05-01 ANZ 435. 12453. 0 16 139 0 0
## 6 2010-06-01 ANZ 531. 12745. 0 14 76 2 0
## 7 2010-07-01 ANZ 590. 14253. 0 15 482 0 0
## 8 2010-08-01 ANZ 493. 12197. 0 24 221 3 0
## 9 2010-09-01 ANZ 480. 17239. 0 23 454 0 0
## 10 2010-10-01 ANZ 452. 12061. 0 18 176 0 0
## 11 2010-11-01 ANZ 459. 13410. 0 24 515 3 0
## 12 2010-12-01 ANZ 716. 15637. 0 26 343 2 0
## 13 2011-01-01 ANZ 2241. 27010. 0 13 136 2 0
## 14 2011-02-01 ANZ 316. 10394. 0 18 182 7 0
## 15 2011-03-01 ANZ 387. 10715. 0 33 287 4 0
## 16 2011-04-01 ANZ 355. 12623. 0 26 372 10 0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## # norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## # norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## # norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## # ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Initialize new columns for the divided values
merged_topic_ANZ$norm_Papers <- NA
merged_topic_ANZ$norm_Paper_Citations <- NA
merged_topic_ANZ$norm_Articles <- NA
merged_topic_ANZ$norm_Patents <- NA
merged_topic_ANZ$norm_Citations <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_ANZ) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_topic_ANZ %>%
filter(format(Date, "%Y") == selected_year)
merged_topic_ANZ$norm_Papers[merged_topic_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_topic_ANZ$norm_Paper_Citations[merged_topic_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_topic_ANZ$norm_Articles[merged_topic_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_topic_ANZ$norm_Patents[merged_topic_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_topic_ANZ$norm_Citations[merged_topic_ANZ$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
}
head(merged_topic_ANZ, 16)
## # A tibble: 16 × 13
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int> <dbl>
## 1 2010-01-01 Agricultu… ANZ 12.1 2.44e2 0 0 0 0.459
## 2 2010-01-01 Artificia… ANZ 1852. 2.83e4 0 7 63 70.2
## 3 2010-01-01 Automotiv… ANZ 2.25 8.5 e0 0 0 0 0.0853
## 4 2010-01-01 Computer … ANZ 219. 2.09e3 0 4 18 8.32
## 5 2010-01-01 Higher ed… ANZ 22.3 2.23e2 0 0 0 0.845
## 6 2010-01-01 Internet … ANZ 2 5.2 e1 0 0 0 0.0758
## 7 2010-01-01 Machine l… ANZ 524. 6.86e3 0 0 0 19.9
## 8 2010-01-01 Manufactu… ANZ 1 7 e0 0 0 0 0.0379
## 9 2010-01-01 Medicine ANZ 328. 4.17e3 0 1 1 12.4
## 10 2010-01-01 Natural l… ANZ 58.1 3.46e2 0 0 0 2.20
## 11 2010-01-01 Pharmacol… ANZ 1.33 5.33e0 0 0 0 0.0505
## 12 2010-01-01 Robot ANZ 79.7 6.15e2 0 1 14 3.02
## 13 2010-01-01 Robotics ANZ 19.8 2.19e2 0 0 0 0.749
## 14 2010-02-01 Agricultu… ANZ 1 1.70e3 0 0 0 0.0379
## 15 2010-02-01 Artificia… ANZ 238. 8.85e3 0 16 148 9.02
## 16 2010-02-01 Automotiv… ANZ 0.5 0 0 0 0 0.0190
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## # norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers
population_ASIA <- population %>%
filter(Country %in% countries_ASIA)%>%
group_by(Year) %>%
summarize(
Country = "ASIA",
Population = sum(Population)
) %>%
ungroup()
merged_ASIA <- merged %>%
filter(Country %in% countries_ASIA)%>%
group_by(Date) %>%
summarize(
Country = "ASIA",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE),
AI_Projects =sum(AI_Projects, na.rm = TRUE),
Projects = sum(Projects, na.rm = TRUE),
Funding = sum(Funding, na.rm = TRUE)
) %>%
ungroup()
head(merged_ASIA)
## # A tibble: 6 × 10
## Date Country Papers Paper_Cita…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 ASIA 76073. 156723. 0 781 9137 7 1
## 2 2010-02-01 ASIA 2631. 54533. 0 884 10547 6 0
## 3 2010-03-01 ASIA 4125. 66784. 0 1256 13469 7 1
## 4 2010-04-01 ASIA 3098. 67939. 0 774 9861 6 1
## 5 2010-05-01 ASIA 5763. 57968. 0 738 8848 5 0
## 6 2010-06-01 ASIA 8615. 87909. 0 985 10503 10 0
## # … with 1 more variable: Funding <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Test agregacije podatkov s temami
merged_topic_ASIA <- merged_topic %>%
filter(Country %in% countries_ASIA) %>%
group_by(Date, Topic) %>%
summarize(
Country = "ASIA",
Papers = sum(Papers_norm, na.rm = TRUE),
Paper_Citations = sum(Paper_Citations_norm, na.rm = TRUE),
Articles = sum(Articles, na.rm = TRUE),
Patents = sum(Patents, na.rm = TRUE),
Citations = sum(Citations, na.rm = TRUE)
) %>%
ungroup()
## `summarise()` has grouped output by 'Date'. You can override using the
## `.groups` argument.
head(merged_topic_ASIA, 12)
## # A tibble: 12 × 8
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int>
## 1 2010-01-01 Agriculture ASIA 4.41e2 272. 0 1 1
## 2 2010-01-01 Artificial intelli… ASIA 6.71e4 143606. 0 767 9065
## 3 2010-01-01 Automotive industry ASIA 1.32e2 186. 0 0 0
## 4 2010-01-01 Computer vision ASIA 1.07e4 27900. 0 565 6779
## 5 2010-01-01 Food industry ASIA 4 e0 0 0 0 0
## 6 2010-01-01 Higher education ASIA 3.42e2 102. 0 0 0
## 7 2010-01-01 Internet of Things ASIA 2.83e1 198 0 1 31
## 8 2010-01-01 Machine learning ASIA 1.61e4 35751. 0 62 616
## 9 2010-01-01 Manufacturing ASIA 5.6 e1 87 0 0 0
## 10 2010-01-01 Medicine ASIA 5.81e3 16438. 0 82 920
## 11 2010-01-01 Natural language p… ASIA 2.06e3 3453. 0 9 85
## 12 2010-01-01 Pharmacology ASIA 8.94e1 255. 0 1 2
## # … with abbreviated variable names ¹Paper_Citations, ²Articles, ³Citations
year_divisors_ASIA <- list(
list(year = "2010", divisor = as.double(population_ASIA[1,3]/n_of_people)),
list(year = "2011", divisor = as.double(population_ASIA[2,3]/n_of_people)),
list(year = "2012", divisor = as.double(population_ASIA[3,3]/n_of_people)),
list(year = "2013", divisor = as.double(population_ASIA[4,3]/n_of_people)),
list(year = "2014", divisor = as.double(population_ASIA[5,3]/n_of_people)),
list(year = "2015", divisor = as.double(population_ASIA[6,3]/n_of_people)),
list(year = "2016", divisor = as.double(population_ASIA[7,3]/n_of_people)),
list(year = "2017", divisor = as.double(population_ASIA[8,3]/n_of_people)),
list(year = "2018", divisor = as.double(population_ASIA[9,3]/n_of_people)),
list(year = "2019", divisor = as.double(population_ASIA[10,3]/n_of_people)),
list(year = "2020", divisor = as.double(population_ASIA[11,3]/n_of_people)),
list(year = "2021", divisor = as.double(population_ASIA[12,3]/n_of_people)),
list(year = "2022", divisor = as.double(population_ASIA[13,3]/n_of_people))
)
# Initialize new columns for the divided values
merged_ASIA$norm_Papers <- NA
merged_ASIA$norm_Paper_Citations <- NA
merged_ASIA$norm_Articles <- NA
merged_ASIA$norm_Patents <- NA
merged_ASIA$norm_Citations <- NA
merged_ASIA$norm_AI_Projects <- NA
merged_ASIA$norm_Projects <- NA
merged_ASIA$norm_Funding <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_ASIA) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_ASIA %>%
filter(format(Date, "%Y") == selected_year)
merged_ASIA$norm_Papers[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_ASIA$norm_Paper_Citations[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_ASIA$norm_Articles[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_ASIA$norm_Patents[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_ASIA$norm_Citations[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
merged_ASIA$norm_AI_Projects[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$AI_Projects / divisor
merged_ASIA$norm_Projects[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Projects / divisor
merged_ASIA$norm_Funding[merged_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Funding / divisor
}
head(merged_ASIA, 16)
## # A tibble: 16 × 18
## Date Country Papers Paper_Cit…¹ Artic…² Patents Citat…³ AI_Pr…⁴ Proje…⁵
## <date> <chr> <dbl> <dbl> <int> <int> <int> <int> <int>
## 1 2010-01-01 ASIA 76073. 156723. 0 781 9137 7 1
## 2 2010-02-01 ASIA 2631. 54533. 0 884 10547 6 0
## 3 2010-03-01 ASIA 4125. 66784. 0 1256 13469 7 1
## 4 2010-04-01 ASIA 3098. 67939. 0 774 9861 6 1
## 5 2010-05-01 ASIA 5763. 57968. 0 738 8848 5 0
## 6 2010-06-01 ASIA 8615. 87909. 0 985 10503 10 0
## 7 2010-07-01 ASIA 8456. 71482. 0 933 11062 2 0
## 8 2010-08-01 ASIA 8344. 68981. 0 930 10733 14 0
## 9 2010-09-01 ASIA 5456. 66306. 0 1216 11463 12 1
## 10 2010-10-01 ASIA 10518. 94118. 0 962 10289 11 2
## 11 2010-11-01 ASIA 5760. 59197. 0 948 9758 7 0
## 12 2010-12-01 ASIA 8913. 76870. 0 1168 11754 11 0
## 13 2011-01-01 ASIA 76733. 159363. 0 851 9184 9 1
## 14 2011-02-01 ASIA 3534. 63697. 0 927 10020 27 1
## 15 2011-03-01 ASIA 4578. 82307. 0 1448 13198 19 0
## 16 2011-04-01 ASIA 4979. 93269. 0 851 9289 38 0
## # … with 9 more variables: Funding <dbl>, norm_Papers <dbl>,
## # norm_Paper_Citations <dbl>, norm_Articles <dbl>, norm_Patents <dbl>,
## # norm_Citations <dbl>, norm_AI_Projects <dbl>, norm_Projects <dbl>,
## # norm_Funding <dbl>, and abbreviated variable names ¹Paper_Citations,
## # ²Articles, ³Citations, ⁴AI_Projects, ⁵Projects
# Initialize new columns for the divided values
merged_topic_ASIA$norm_Papers <- NA
merged_topic_ASIA$norm_Paper_Citations <- NA
merged_topic_ASIA$norm_Articles <- NA
merged_topic_ASIA$norm_Patents <- NA
merged_topic_ASIA$norm_Citations <- NA
# Apply the division operation and populate new columns
for (pair in year_divisors_ASIA) {
selected_year <- pair$year
divisor <- pair$divisor
selected_rows <- merged_topic_ASIA %>%
filter(format(Date, "%Y") == selected_year)
merged_topic_ASIA$norm_Papers[merged_topic_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Papers / divisor
merged_topic_ASIA$norm_Paper_Citations[merged_topic_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Paper_Citations / divisor
merged_topic_ASIA$norm_Articles[merged_topic_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Articles / divisor
merged_topic_ASIA$norm_Patents[merged_topic_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Patents / divisor
merged_topic_ASIA$norm_Citations[merged_topic_ASIA$Date %in% selected_rows$Date] <-
selected_rows$Citations / divisor
}
head(merged_topic_ASIA, 16)
## # A tibble: 16 × 13
## Date Topic Country Papers Paper…¹ Artic…² Patents Citat…³ norm_…⁴
## <date> <chr> <chr> <dbl> <dbl> <int> <int> <int> <dbl>
## 1 2010-01-01 Agriculture ASIA 4.41e2 272. 0 1 1 1.46e-1
## 2 2010-01-01 Artificial… ASIA 6.71e4 143606. 0 767 9065 2.22e+1
## 3 2010-01-01 Automotive… ASIA 1.32e2 186. 0 0 0 4.37e-2
## 4 2010-01-01 Computer v… ASIA 1.07e4 27900. 0 565 6779 3.56e+0
## 5 2010-01-01 Food indus… ASIA 4 e0 0 0 0 0 1.32e-3
## 6 2010-01-01 Higher edu… ASIA 3.42e2 102. 0 0 0 1.13e-1
## 7 2010-01-01 Internet o… ASIA 2.83e1 198 0 1 31 9.38e-3
## 8 2010-01-01 Machine le… ASIA 1.61e4 35751. 0 62 616 5.31e+0
## 9 2010-01-01 Manufactur… ASIA 5.6 e1 87 0 0 0 1.85e-2
## 10 2010-01-01 Medicine ASIA 5.81e3 16438. 0 82 920 1.93e+0
## 11 2010-01-01 Natural la… ASIA 2.06e3 3453. 0 9 85 6.83e-1
## 12 2010-01-01 Pharmacolo… ASIA 8.94e1 255. 0 1 2 2.96e-2
## 13 2010-01-01 Robot ASIA 2.92e3 6381. 0 13 225 9.66e-1
## 14 2010-01-01 Robotics ASIA 1.21e2 558. 0 0 0 4.01e-2
## 15 2010-02-01 Agriculture ASIA 1 e1 174 0 0 0 3.31e-3
## 16 2010-02-01 Artificial… ASIA 2.36e3 46617. 0 866 10380 7.80e-1
## # … with 4 more variables: norm_Paper_Citations <dbl>, norm_Articles <dbl>,
## # norm_Patents <dbl>, norm_Citations <dbl>, and abbreviated variable names
## # ¹Paper_Citations, ²Articles, ³Citations, ⁴norm_Papers
total <- rbind(merged_EU, merged_US, merged_JUS, merged_ANZ, merged_ASIA)
total_topic <- rbind(merged_topic_EU, merged_topic_US, merged_topic_JUS,
merged_topic_ANZ, merged_topic_ASIA)
write.csv(total, file = "TotalAggregated.csv", row.names = FALSE)
write.csv(total_topic, file = "TotalAggregatedTopic.csv", row.names = FALSE)