This is document reproduces the figures produced in the study ‘A value creation model for science-society interconnections: Archetypal analysis combining publications, survey and altmetric data’ authored by I. Ramos-Vielba, N. Robinson-Garcia /& R. Woolley. Data is available at doi:10.5281/zenodo.6393226. Code scripts are available in a (GitHub repository)[https://github.com/elrobin/value-components].
Data is imported from Zenodo. It includes 9,190 observations and 12 variables.
df <- read.csv(
"https://zenodo.org/record/6393226/files/dataset_archetypes_v3.csv?download=1", sep = ";"
)
str(df)
## 'data.frame': 9190 obs. of 12 variables:
## $ ï..id : int 2 4 6 7 8 10 11 12 13 14 ...
## $ field : chr "STEM" "STEM" "SSH" "SSH" ...
## $ production : int 0 0 0 0 0 0 3 0 2 0 ...
## $ engagement : int 0 0 0 0 2 1 2 0 1 0 ...
## $ translation : int 0 0 0 0 0 1 1 0 2 0 ...
## $ dissemination: int 2 1 1 1 3 2 1 2 4 1 ...
## $ transmission : int 2 3 1 1 4 3 4 4 4 1 ...
## $ PUBS : int 6 8 3 1 9 27 4 8 21 23 ...
## $ OA : int 4 1 1 1 7 20 1 6 7 17 ...
## $ p_twitter : int 1 2 1 0 4 15 0 4 16 17 ...
## $ p_news : int 1 0 0 0 1 1 0 0 2 1 ...
## $ p_policy : int 0 0 0 0 0 1 0 0 2 0 ...
We create non-size dependent variables based on publications by converting these into proportions and we then rename them to match the naming of the value creation model.
df$pp_news <- df$p_news / df$PUBS
df$pp_policy <- df$p_policy / df$PUBS
df$pp_oa <- df$OA / df$PUBS
df$pp_twitter <- df$p_twitter / df$PUBS
df <- df %>% # Select and rename variables
select(
ï..id,
translation,
dissemination,
engagement,
production,
pp_news,
pp_oa,
pp_policy,
pp_twitter,
transmission,
field
)
names(df) <- c(
"ID_FINAL",
"Commercialisation",
"Dissemination",
"Engagement",
"Joint research",
"Media promotion",
"Openness",
"Public Policy",
"Social visibility",
"Transmission",
"field"
)
The following table corresponds to Table 5 in the paper, and includes basic descriptives per field:
df.vars <- df %>%
tidyr::pivot_longer(!field & !ID_FINAL,
names_to = "variable",
values_to = "value")
df.vars %>%
group_by(field, variable) %>%
summarise(n = n(), mean = mean(value), sd = sd(value))
## `summarise()` has grouped output by 'field'. You can override using the `.groups` argument.
## # A tibble: 27 x 5
## # Groups: field [3]
## field variable n mean sd
## <chr> <chr> <int> <dbl> <dbl>
## 1 BIOMED Commercialisation 2130 0.516 1.02
## 2 BIOMED Dissemination 2130 1.53 0.657
## 3 BIOMED Engagement 2130 1.18 1.31
## 4 BIOMED Joint research 2130 0.843 1.15
## 5 BIOMED Media promotion 2130 0.0454 0.107
## 6 BIOMED Openness 2130 0.452 0.328
## 7 BIOMED Public Policy 2130 0.00876 0.0564
## 8 BIOMED Social visibility 2130 0.468 0.307
## 9 BIOMED Transmission 2130 2.64 0.966
## 10 SSH Commercialisation 1950 0.204 0.569
## # ... with 17 more rows
Next we create a correlation matrix including also the distribution of each variable. This figure corresponds to Figure 2.
ggpairs(df, columns=2:10, aes(colour = field)) +
theme_bw() + scale_color_brewer(palette = "Dark2")
Following we include the archetypal analyses constructed for each field and overall.
The archetypal analysis for all cases is displayed in the manuscript in Figure 3.
We first identify the ideal number of archetypes needed. We search for the best model looking at up to 5 archetypes and 5 repetitions.
all <- df %>%
select(!field & !ID_FINAL)
set.seed(1986)
arc.k <- stepArchetypes(data = all,
k = 1:5,
verbose =F,
nrep = 5)
The optimal number of archetypes is selected based on the Residual Sum of Squares (RSS) for which we apply an elbow rule.
screeplot(arc.k) # 3 archetypes
These archetypes have the following parameters
arc.model <- bestModel(arc.k[[3]])
t(parameters(arc.model))
## [,1] [,2] [,3]
## Commercialisation -7.757858e-05 2.161637740 -1.001852e-05
## Dissemination 1.059851e+00 2.569063945 1.636289e+00
## Engagement -1.887014e-04 4.720686563 1.612700e-01
## Joint research -1.217999e-04 3.682235114 -1.572927e-05
## Media promotion -4.329948e-06 0.002978228 1.604448e-01
## Openness 2.671435e-01 0.293303339 8.886213e-01
## Public Policy -9.299296e-07 0.002915692 3.286608e-02
## Social visibility -4.577995e-05 0.211122389 1.000014e+00
## Transmission 2.149042e+00 4.484772213 2.249856e+00
Which can be better interpreted when plotted in percentiles:
barplot(arc.model, all, percentiles = TRUE)
In the manuscript these percentiles are shown using a radar chart for which we first have to recalculate those percentiles as computed in the archetypes package.
.perc <- function(x, data, digits = 2) { # Calculates percentiles
Fn <- ecdf(data)
round(Fn(x) * 1, digits = digits)
}
param <- parameters(arc.model)
atypes <- param # extract parameters
rownames(atypes) <- sprintf('Archetype %s',
seq(length = nrow(atypes)))
atypes <- sapply(seq(length = ncol(all)), # Identify percentile for param
function(i) # based on a data.frame
.perc(atypes[, i], all[, i]))
colnames(atypes) <- colnames(all)
atypes <- as.data.frame(atypes)
atypes$archetype <- c(1,2,3)
We can then produce the radar charts after converting from wide to long format
params <-
tidyr::pivot_longer(atypes,
!c("archetype"),
names_to = "variable",
values_to = "value")
a1 <-
ggplot(subset(params, archetype == 1)) +
geom_col(aes(x = variable, y = value), fill = "#F25200") +
ylim(0, 1) +
labs(title = "Archetype 1",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a2 <-
ggplot(subset(params, archetype == 2)) +
geom_col(aes(x = variable, y = value), fill = "#F25200") +
ylim(0, 1) +
labs(title = "Archetype 2",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a3 <-
ggplot(subset(params, archetype == 3)) +
geom_col(aes(x = variable, y = value), fill = "#F25200") +
ylim(0, 1) +
labs(title = "Archetype 3",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
par(mfrow = c(3, 1))
a1
a2
a3
Co-occurrence term maps overlaying alpha scores for researchers’ scientific output is available online here: (https://sl.ugr.es/allarchetypes)[https://sl.ugr.es/allarchetypes]. Below find the code to extract the alpha score of each observation for the three archetypes:
# alphas <- arc.model[["alphas"]]
#
# alphas <- cbind.data.frame(df$ID_FINAL, alphas)
# write.csv(alphas, file = "alpha-all-respondents.csv", quote = F,
# row.names = F,)
The archetypal analysis for STEM cases is displayed in the manuscript in Figure 4.
We first identify the ideal number of archetypes needed. We search for the best model looking at up to 5 archetypes and 5 repetitions.
stem <- df %>%
filter(field=="STEM") %>%
select(!field & !ID_FINAL)
set.seed(1986)
arc.k <- stepArchetypes(data = stem,
k = 1:5,
verbose =F,
nrep = 5)
The optimal number of archetypes is selected based on the Residual Sum of Squares (RSS) for which we apply an elbow rule.
screeplot(arc.k) # 3 archetypes
These archetypes have the following parameters
arc.model <- bestModel(arc.k[[3]])
t(parameters(arc.model))
## [,1] [,2] [,3]
## Commercialisation 0.01604539 3.768879e+00 -2.583331e-05
## Dissemination 2.81910316 2.256802e+00 9.999774e-01
## Engagement 0.60094009 6.174208e+00 2.661862e-02
## Joint research 0.08654360 4.996007e+00 -3.404266e-05
## Media promotion 0.15813300 9.279789e-03 -9.314300e-07
## Openness 0.95195089 1.965541e-01 3.342719e-01
## Public Policy 0.02884634 -5.550278e-07 -1.570726e-07
## Social visibility 0.99619348 1.074052e-01 3.384643e-02
## Transmission 3.52332288 4.527561e+00 1.998948e+00
Which can be better interpreted when plotted in percentiles:
barplot(arc.model, stem, percentiles = TRUE)
In the manuscript these percentiles are shown using a radar chart for which we first have to recalculate those percentiles as computed in the archetypes package.
stemtypes <- parameters(arc.model) # extract parameters
rownames(stemtypes) <- sprintf('Archetype %s',
seq(length = nrow(stemtypes)))
stemtypes <-
sapply(seq(length = ncol(stem)), # Identify percentile for param
function(i)
# based on a data.frame
.perc(stemtypes[, i], stem[, i]))
colnames(stemtypes) <- colnames(stem)
stemtypes <- as.data.frame(stemtypes)
stemtypes$archetype <- c(1, 2, 3)
We can then produce the radar charts after converting from wide to long format
params <-
tidyr::pivot_longer(stemtypes,
!c("archetype"),
names_to = "variable",
values_to = "value")
a1 <-
ggplot(subset(params, archetype == 1)) +
geom_col(aes(x = variable, y = value), fill = "#00BAB4") +
ylim(0, 1) +
labs(title = "Archetype STEM1",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a2 <-
ggplot(subset(params, archetype == 2)) +
geom_col(aes(x = variable, y = value), fill = "#00BAB4") +
ylim(0, 1) +
labs(title = "Archetype STEM2",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a3 <-
ggplot(subset(params, archetype == 3)) +
geom_col(aes(x = variable, y = value), fill = "#00BAB4") +
ylim(0, 1) +
labs(title = "Archetype STEM3",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
par(mfrow = c(3, 1))
a1
a2
a3
Co-occurrence term maps overlaying alpha scores for researchers’ scientific output is available online here: (https://sl.ugr.es/stemarchetypes)[https://sl.ugr.es/stemarchetypes]. Below find the code to extract the alpha score of each observation for the three archetypes:
alphas <- arc.model[["alphas"]]
df.stem <- subset (df,
field=="STEM",
select =
c("ID_FINAL"))
alphas <- cbind.data.frame(df.stem, alphas)
write.csv(alphas, file = "alpha-stem-respondents.csv", quote = F,
row.names = F)
The archetypal analysis for all cases is displayed in the manuscript in Figure 5.
We first identify the ideal number of archetypes needed. We search for the best model looking at up to 5 archetypes and 5 repetitions.
ssh <- df %>%
filter(field=="SSH") %>%
select(!field & !ID_FINAL)
set.seed(1986)
arc.k <- stepArchetypes(data = ssh,
k = 1:5,
verbose =F,
nrep = 5)
## Warning in method(..., k = k[i]): k=5: Error in qr.solve(alphas %*% t(alphas)): singular matrix 'a' in solve
The optimal number of archetypes is selected based on the Residual Sum of Squares (RSS) for which we apply an elbow rule.
screeplot(arc.k) # 3 archetypes
These archetypes have the following parameters
arc.model <- bestModel(arc.k[[3]])
t(parameters(arc.model))
## [,1] [,2] [,3]
## Commercialisation -3.908118e-05 -9.167116e-05 1.332944e+00
## Dissemination 1.836354e+00 1.228259e+00 3.354170e+00
## Engagement 4.353993e-01 1.203056e-02 5.540208e+00
## Joint research -1.305979e-04 -3.063383e-04 4.553506e+00
## Media promotion 2.683900e-01 -1.093877e-05 -5.440266e-07
## Openness 8.491478e-01 2.276740e-01 3.456086e-01
## Public Policy 1.067694e-01 -4.008930e-06 -1.993792e-07
## Social visibility 1.000144e+00 6.682204e-03 2.387494e-01
## Transmission 2.986753e+00 2.186739e+00 4.708219e+00
Which can be better interpreted when plotted in percentiles:
barplot(arc.model, stem, percentiles = TRUE)
In the manuscript these percentiles are shown using a radar chart for which we first have to recalculate those percentiles as computed in the archetypes package.
sshtypes <- parameters(arc.model) # extract parameters
rownames(sshtypes) <- sprintf('Archetype %s',
seq(length = nrow(sshtypes)))
sshtypes <-
sapply(seq(length = ncol(ssh)), # Identify percentile for param
function(i)
# based on a data.frame
.perc(sshtypes[, i], ssh[, i]))
colnames(sshtypes) <- colnames(ssh)
sshtypes <- as.data.frame(sshtypes)
sshtypes$archetype <- c(1, 2, 3)
We can then produce the radar charts after converting from wide to long format
params <-
tidyr::pivot_longer(sshtypes,
!c("archetype"),
names_to = "variable",
values_to = "value")
a1 <-
ggplot(subset(params, archetype == 1)) +
geom_col(aes(x = variable, y = value), fill = "#304160") +
ylim(0, 1) +
labs(title = "Archetype SSH1",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a2 <-
ggplot(subset(params, archetype == 2)) +
geom_col(aes(x = variable, y = value), fill = "#304160") +
ylim(0, 1) +
labs(title = "Archetype SSH2",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a3 <-
ggplot(subset(params, archetype == 3)) +
geom_col(aes(x = variable, y = value), fill = "#304160") +
ylim(0, 1) +
labs(title = "Archetype SSH3",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
par(mfrow = c(3, 1))
a1
a2
a3
Co-occurrence term maps overlaying alpha scores for researchers’ scientific output is available online here: (https://sl.ugr.es/ssharchetypes)[https://sl.ugr.es/ssharchetypes].
Below find the code to extract the alpha score of each observation for the three archetypes:
alphas <- arc.model[["alphas"]]
df.ssh <- subset (df,
field=="SSH",
select =
c("ID_FINAL"))
alphas <- cbind.data.frame(df.ssh, alphas)
write.csv(alphas, file = "alpha-ssh-respondents.csv", quote = F,
row.names = F)
The archetypal analysis for BIOMED cases is displayed in the manuscript in Figure 6.
We first identify the ideal number of archetypes needed. We search for the best model looking at up to 5 archetypes and 5 repetitions.
biomed <- df %>%
filter(field=="BIOMED") %>%
select(!field & !ID_FINAL)
set.seed(1986)
arc.k <- stepArchetypes(data = biomed,
k = 1:5,
verbose =F,
nrep = 5)
The optimal number of archetypes is selected based on the Residual Sum of Squares (RSS) for which we apply an elbow rule.
screeplot(arc.k) # 3 archetypes
These archetypes have the following parameters
arc.model <- bestModel(arc.k[[3]])
t(parameters(arc.model))
## [,1] [,2] [,3]
## Commercialisation 3.07979927 -1.767756e-04 0.0843188406
## Dissemination 2.81549356 9.998176e-01 1.5483899379
## Engagement 6.25715223 1.420711e-02 0.0425973732
## Joint research 5.28038138 -2.884656e-04 -0.0001042772
## Media promotion 0.03903413 -1.553638e-05 0.1766949736
## Openness 0.36402621 3.680102e-03 1.0000678203
## Public Policy 0.02486743 -2.998000e-06 0.0188674871
## Social visibility 0.42116353 2.958519e-02 1.0000658582
## Transmission 4.71199755 2.004295e+00 2.3233634364
Which can be better interpreted when plotted in percentiles:
barplot(arc.model, biomed, percentiles = TRUE)
In the manuscript these percentiles are shown using a radar chart for which we first have to recalculate those percentiles as computed in the archetypes package.
biomedtypes <- parameters(arc.model) # extract parameters
rownames(biomedtypes) <- sprintf('Archetype %s',
seq(length = nrow(biomedtypes)))
biomedtypes <-
sapply(seq(length = ncol(biomed)), # Identify percentile for param
function(i)
# based on a data.frame
.perc(biomedtypes[, i], biomed[, i]))
colnames(biomedtypes) <- colnames(biomed)
biomedtypes <- as.data.frame(biomedtypes)
biomedtypes$archetype <- c(1, 2, 3)
We can then produce the radar charts after converting from wide to long format
params <-
tidyr::pivot_longer(biomedtypes,
!c("archetype"),
names_to = "variable",
values_to = "value")
a1 <-
ggplot(subset(params, archetype == 1)) +
geom_col(aes(x = variable, y = value), fill = "#ADA16B") +
ylim(0, 1) +
labs(title = "Archetype BIOMED1",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a2 <-
ggplot(subset(params, archetype == 2)) +
geom_col(aes(x = variable, y = value), fill = "#ADA16B") +
ylim(0, 1) +
labs(title = "Archetype BIOMED2",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
a3 <-
ggplot(subset(params, archetype == 3)) +
geom_col(aes(x = variable, y = value), fill = "#ADA16B") +
ylim(0, 1) +
labs(title = "Archetype BIOMED3",
x = element_blank(),
y = element_blank()) +
coord_polar() +
theme_minimal()
par(mfrow = c(3, 1))
a1
a2
a3
Co-occurrence term maps overlaying alpha scores for researchers’ scientific output is available online here: (https://sl.ugr.es/biomedarchetypes)[https://sl.ugr.es/biomedarchetypes]. Below find the code to extract the alpha score of each observation for the three archetypes:
alphas <- arc.model[["alphas"]]
df.biomed <- subset (df,
field=="BIOMED",
select =
c("ID_FINAL"))
alphas <- cbind.data.frame(df.biomed, alphas)
write.csv(alphas, file = "alpha-biomed-respondents.csv", quote = F,
row.names = F)