total <- read.csv("TotalAggregatedSLO.csv", header=TRUE, sep=",", dec=".")
total$Year <- lubridate::year(total$Date)
summed_data <- total %>%
dplyr::group_by(Country, Year) %>%
dplyr::summarize(across(where(is.numeric), sum))
## `summarise()` has grouped output by 'Country'. You can override using the
## `.groups` argument.
summed_data <- summed_data[!(summed_data$Country == "HRV"),]
total_b <- summed_data[summed_data$Year == 2014, c(11, 13, 16, 18)]
total_bb <- summed_data[summed_data$Year == 2014, c(1, 2, 11, 13, 16, 18)]
total_ee <- summed_data[summed_data$Year == 2021, c(1, 2, 11, 13, 16, 18)]
total_e <- summed_data[summed_data$Year == 2021, c(11, 13, 16, 18)]
round(stat.desc(total_b, basic = FALSE), 2)
## norm_Papers norm_Articles norm_AI_Projects norm_Funding
## median 186.01 17.28 8.43 1.986106e+05
## mean 194.52 15.22 12.18 2.773213e+05
## SE.mean 24.91 3.39 3.54 1.062001e+05
## CI.mean.0.95 56.34 7.66 8.02 2.402414e+05
## var 6202.66 114.60 125.56 1.127847e+11
## std.dev 78.76 10.70 11.21 3.358343e+05
## coef.var 0.40 0.70 0.92 1.210000e+00
R <- cor(total_b) # At least 0.3
round(R, 3)
## norm_Papers norm_Articles norm_AI_Projects norm_Funding
## norm_Papers 1.000 0.240 0.960 0.914
## norm_Articles 0.240 1.000 0.337 0.328
## norm_AI_Projects 0.960 0.337 1.000 0.937
## norm_Funding 0.914 0.328 0.937 1.000
library(psych)
##
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
##
## describe
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
cortest.bartlett(R, n = nrow(total_b))
## $chisq
## [1] 33.47428
##
## $p.value
## [1] 8.497053e-06
##
## $df
## [1] 6
det(R)
## [1] 0.007456458
library(psych)
KMO(R)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R)
## Overall MSA = 0.74
## MSA for each item =
## norm_Papers norm_Articles norm_AI_Projects norm_Funding
## 0.72 0.58 0.69 0.88
library(FactoMineR)
components <- PCA(total_b,
scale.unit = TRUE,
graph = FALSE)
library(factoextra)
get_eigenvalue(components)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.01013193 75.2532982 75.25330
## Dim.2 0.87074822 21.7687055 97.02200
## Dim.3 0.08606590 2.1516476 99.17365
## Dim.4 0.03305395 0.8263486 100.00000
library(factoextra)
fviz_eig(components,
choice = "eigenvalue",
main = "Scree plot",
ylab = "Eigenvalue",
xlab = "Principal component",
addlabels = TRUE)

library(psych)
fa.parallel(total_b,
sim = F,
fa = "pc")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.

## Parallel analysis suggests that the number of factors = NA and the number of components = 1
library(FactoMineR)
components <- PCA(total_b,
scale.unit = TRUE,
graph = FALSE,
ncp = 1)
components
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 4 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
print(components$var$cor)
## Dim.1
## norm_Papers -0.9602830
## norm_Articles -0.4369144
## norm_AI_Projects -0.9819344
## norm_Funding -0.9658670
print(components$var$contrib)
## Dim.1
## norm_Papers 30.634653
## norm_Articles 6.341722
## norm_AI_Projects 32.031661
## norm_Funding 30.991965
round(stat.desc(total_e, basic = FALSE), 2)
## norm_Papers norm_Articles norm_AI_Projects norm_Funding
## median 235.56 370.28 64.42 1.905343e+06
## mean 273.96 511.72 77.27 2.123846e+06
## SE.mean 40.99 132.62 18.21 6.310766e+05
## CI.mean.0.95 92.73 300.00 41.20 1.427594e+06
## var 16802.02 175872.67 3317.80 3.982576e+12
## std.dev 129.62 419.37 57.60 1.995639e+06
## coef.var 0.47 0.82 0.75 9.400000e-01
R_e <- cor(total_e) # At least 0.3
round(R_e, 3)
## norm_Papers norm_Articles norm_AI_Projects norm_Funding
## norm_Papers 1.000 0.860 0.912 0.919
## norm_Articles 0.860 1.000 0.806 0.680
## norm_AI_Projects 0.912 0.806 1.000 0.882
## norm_Funding 0.919 0.680 0.882 1.000
library(psych)
cortest.bartlett(R_e, n = nrow(total_b))
## $chisq
## [1] 37.74658
##
## $p.value
## [1] 1.259047e-06
##
## $df
## [1] 6
det(R_e)
## [1] 0.003990296
library(psych)
KMO(R_e)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = R_e)
## Overall MSA = 0.71
## MSA for each item =
## norm_Papers norm_Articles norm_AI_Projects norm_Funding
## 0.68 0.65 0.89 0.66
library(FactoMineR)
components_e <- PCA(total_e,
scale.unit = TRUE,
graph = FALSE)
library(factoextra)
get_eigenvalue(components_e)
## eigenvalue variance.percent cumulative.variance.percent
## Dim.1 3.53428331 88.3570829 88.35708
## Dim.2 0.32821872 8.2054680 96.56255
## Dim.3 0.10461771 2.6154427 99.17799
## Dim.4 0.03288026 0.8220064 100.00000
library(factoextra)
fviz_eig(components_e,
choice = "eigenvalue",
main = "Scree plot",
ylab = "Eigenvalue",
xlab = "Principal component",
addlabels = TRUE)

library(psych)
fa.parallel(total_e,
sim = F,
fa = "pc")
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully

## Parallel analysis suggests that the number of factors = NA and the number of components = 1
library(FactoMineR)
components_e <- PCA(total_e,
scale.unit = TRUE,
graph = FALSE,
ncp = 1)
components_e
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 4 variables
## *The results are available in the following objects:
##
## name description
## 1 "$eig" "eigenvalues"
## 2 "$var" "results for the variables"
## 3 "$var$coord" "coord. for the variables"
## 4 "$var$cor" "correlations variables - dimensions"
## 5 "$var$cos2" "cos2 for the variables"
## 6 "$var$contrib" "contributions of the variables"
## 7 "$ind" "results for the individuals"
## 8 "$ind$coord" "coord. for the individuals"
## 9 "$ind$cos2" "cos2 for the individuals"
## 10 "$ind$contrib" "contributions of the individuals"
## 11 "$call" "summary statistics"
## 12 "$call$centre" "mean of the variables"
## 13 "$call$ecart.type" "standard error of the variables"
## 14 "$call$row.w" "weights for the individuals"
## 15 "$call$col.w" "weights for the variables"
print(components_e$var$cor)
## Dim.1
## norm_Papers -0.9827915
## norm_Articles -0.8873341
## norm_AI_Projects -0.9589102
## norm_Funding -0.9281882
print(components_e$var$contrib)
## Dim.1
## norm_Papers 27.32886
## norm_Articles 22.27784
## norm_AI_Projects 26.01684
## norm_Funding 24.37647
total_bb$Indicators_2014 <- (components$ind$coord[ , 1])*-1
head(total_bb, 3)
## # A tibble: 3 × 7
## # Groups: Country [3]
## Country Year norm_Papers norm_Articles norm_AI_Projects norm_Funding Indica…¹
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AUT 2014 223. 29.6 14.0 494341. 1.05
## 2 CHE 2014 372. 20.1 40.2 1112193. 4.39
## 3 CZE 2014 191. 20.5 9.41 33320. -0.466
## # … with abbreviated variable name ¹​Indicators_2014
library(ggplot2)
ggplot(total_bb, aes(y=Indicators_2014, x=Country)) +
theme_linedraw() +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

total_bb$Indicators_2021 <- (components_e$ind$coord[ , 1])*-1
total_ee$PCA2 <- (components_e$ind$coord[ , 1])*-1
head(total_ee, 3)
## # A tibble: 3 × 7
## # Groups: Country [3]
## Country Year norm_Papers norm_Articles norm_AI_Projects norm_Funding PCA2
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 AUT 2021 334. 344. 75.9 4473403. 0.656
## 2 CHE 2021 583. 1446. 226. 6539250. 4.97
## 3 CZE 2021 217. 292. 47.4 602850. -1.18
ggplot(total_ee, aes(y=PCA2, x=Country)) +
theme_linedraw() +
geom_bar(stat="identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

ggplot(total_bb,
aes(x = Indicators_2014, y = Indicators_2021, label = Country)) +
geom_point(shape = 16, size = 4, color = "black") +
geom_segment(aes(xend = Indicators_2014, yend = Indicators_2021), color = "grey80") +
geom_text_repel(nudge_x = 0, nudge_y = 0) +
labs(title = "PCA scatterplot",
x = "2014", y = "2021") +
theme_minimal() +
theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5, lineheight = 1.2),
axis.text = element_text(size = 12),
axis.title = element_text(size = 14),
legend.title = element_text(size = 12, face = "bold"),
legend.text = element_text(size = 12)) +
scale_y_continuous(labels = scales::comma, limits = c(-6, 6)) +
scale_x_continuous(labels = scales::comma, limits = c(-6, 6)) +
geom_vline(xintercept = 0, color = "black") +
geom_hline(yintercept = 0, color = "black")

total_bb[total_bb$Country == "CHE", 1] <- "CH"
total_long <- tidyr::gather(total_bb, "Variable", "Value", Indicators_2014, Indicators_2021)
# Define the custom order of countries
custom_order <- c("CH", "AUT", "DEU", "FRA", "ITA", "SVN", "CZE", "HUN", "SVK", "POL") # Replace with your desired order
# Reorder the "Country" variable as a factor with the custom order
total_long$Country <- factor(total_long$Country, levels = custom_order)
ggplot(total_long, aes(x = Country, y = Value, fill = Variable)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = c("Indicators_2014" = "gray30", "Indicators_2021" = "gray70")) +
labs(fill = "Variable") + # Rename the legend
ylab("Combined AI Indicators") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)) +
theme_linedraw() +
geom_vline(xintercept = which(total_long$Country == "HUN") - 0.5, linetype = "dashed") + # Vertical line before SVN
geom_vline(xintercept = which(total_long$Country == "HUN") + 0.5, linetype = "dashed") # Vertical line after SVN
