Here we look at the difference in the distributions of low and high scoring esssays along two measures, mean and variance. We do two versions of this analysis: (A) Pooling across all languages, and (B) By language. For each, we downsample so that there are the same number in each lang x prompt x score bin.
SUMMARY: Lower scoring languages are closer to the mean (both pooling across languages and by language). Lower scoring essays have more variance.
Parameters
CUTOFF <- 4
N_PER_L_SCORE_PROMPT <- 10
MIN_SCORE <- 2
Read in doctag indices, docvecs, and metadata
doctag_indices <- read_feather("../../../data/processed/models/all_model/doctag_indices_all_model.feather")
docvecs <- read_feather("../../../data/processed/models/all_model/docvecs_all_model.feather") %>%
as.data.frame() %>%
mutate(offset = 0:(n()-1)) %>%
select(offset, everything())
metadata <- read_csv("../../../data/raw/merged_metadata.csv")
metadata_clean <- metadata %>%
mutate_if(is.character, as.factor) %>%
mutate(essay_id = as.character(essay_id))
n_scores_not_na <- metadata_clean %>%
filter(!is.na(score)) %>%
nrow()
Of all essays, we have scores for 100% of essays. Note that unlike Study 1, we only have whole scores (5 values).
Merge all data sources together
d <- doctag_indices %>%
left_join(metadata_clean) %>%
left_join(docvecs) %>%
nest(-1:-10, .key = "doc_vector") %>%
select(-doc_count, -offset, -test_center_country_code)
Get TSNE coordinates. These are the same for both the all-languages and by-language analyses.
#mats <- d %>%
# select(doc_vector) %>%
# unnest() %>%
# as.matrix()
#tsne_out = Rtsne::Rtsne(mats)
#tsne_dims <- tsne_out$Y %>%
# as.data.frame() %>%
# rename(tsne_X = V1,
# tsne_Y = V2) %>%
# bind_cols(d %>% select(essay_id, score, age, gender, L1_code, prompt_id)) %>%
# select(everything(), tsne_X, tsne_Y)
# write_csv(tsne_dims, "../all_data/tsne_dims_cached.csv")
tsne_dims <- read_csv("../../../data/processed/tsne/tsne_dims_cached_all.csv")
tsne_dims %>%
ggplot(aes(x = score)) +
geom_histogram(binwidth = 1) +
geom_vline(aes(xintercept = mean(tsne_dims$score)), color = "red") +
ggtitle("overall score distribution") +
theme_bw()
tsne_dims %>%
ggplot(aes(x = score, fill = L1_code)) +
geom_histogram(binwidth = 1) +
facet_wrap(~L1_code) +
#geom_density() +
ggtitle("Score distribution by language") +
theme_bw() +
theme(legend.position = "none")
tsne_dims %>%
count(score, L1_code, prompt_id) %>%
ggplot(aes(x = n)) +
geom_histogram(binwidth = 1) +
ggtitle("Number of essays per language, score, prompt bin, including score = 1") +
theme_bw()
tsne_dims %>%
filter(score >= MIN_SCORE) %>%
count(score, L1_code, prompt_id) %>%
ggplot(aes(x = n)) +
geom_histogram(binwidth = 1) +
ggtitle("Number of essays per language, score, prompt bin, excluding score = 1") +
theme_bw()
MEAN_N_PER_L_SCORE_PROMPT <- tsne_dims %>%
filter(score >= MIN_SCORE) %>%
count(score, L1_code, prompt_id) %>%
summarize(n = mean(n))
Scores are unevenly distributed across languages; need to downsample. Here were doing 10 per language, score_bin and prompt (mean = 10.1295406)
Downsample essays.
tsne_dims_downsampled <- tsne_dims %>%
nest(1:2, .key = "doc_vector") %>%
filter(score >= MIN_SCORE) %>%
group_by(L1_code, score, prompt_id) %>%
sample_n(N_PER_L_SCORE_PROMPT, replace = T) %>%
mutate(score_bin = ifelse(score < CUTOFF, "low", "high"),
score_prompt = paste(score_bin, prompt_id, sep = "_"),
L1_score_prompt = paste(L1_code, score_bin, prompt_id, sep = "_"),
L1_prompt = paste(L1_code, prompt_id, sep = "_")) %>%
ungroup()
The bins:
tsne_dims_downsampled %>%
group_by(score_bin) %>%
distinct(score)
## # A tibble: 4 x 2
## # Groups: score_bin [2]
## score score_bin
## <int> <chr>
## 1 2 low
## 2 3 low
## 3 4 high
## 4 5 high
Centroid = grand center
get_group_centroid <- function(doc_vecs_df, group_name){
centroid <- doc_vecs_df %>%
select(doc_vector) %>%
unnest(doc_vector) %>%
colMeans()
this_group_name <- doc_vecs_df %>%
select_(group_name) %>%
slice(1) %>%
unlist()
df <- data.frame(t(centroid)) %>%
mutate(x = this_group_name) %>%
select(x, everything())
names(df)[1] = c(as.name(group_name))
df
}
# L1_score_prompt centers
tsne_centroids_BL_score_prompt <- tsne_dims_downsampled %>%
split(.$L1_score_prompt) %>%
map_df(get_group_centroid, "L1_score_prompt") %>%
separate(L1_score_prompt, c("L1_code", "score_bin", "prompt_id"), "_") %>%
nest(4:5, .key = "doc_vector")
grand_centroids <- tsne_centroids_BL_score_prompt %>% # SPL
mutate(L1_prompt = paste0(L1_code, "_", prompt_id )) %>%
split(.$L1_prompt) %>% #
map_df(get_group_centroid, "L1_prompt") %>% # PL
separate(L1_prompt, c("L1_code", "prompt_id"), "_") %>%
group_by(prompt_id) %>%
summarize(tsne_X = mean(tsne_X), #P
tsne_Y = mean(tsne_Y)) %>%
mutate(group = "all") %>%
group_by(group) %>%
summarize(center_tsne_X = mean(tsne_X), # 1
center_tsne_Y = mean(tsne_Y))
group_centroids <- tsne_centroids_BL_score_prompt %>% # SPL
mutate(prompt_score = paste0(prompt_id, "_" , score_bin)) %>%
split(.$prompt_score) %>%
map_df(get_group_centroid, "prompt_score") %>% # SP
mutate(group = "all") %>%
separate(prompt_score, c("prompt_id", "score_bin"), "_")
distance_measures <- group_centroids %>%
left_join(grand_centroids) %>%
rowwise() %>%
mutate(dist_from_grand_mean = dist(rbind(c(center_tsne_X,
center_tsne_Y),
c(tsne_X, tsne_Y)), method = "euclidean")[1])
bin_dists <- distance_measures %>%
group_by(score_bin) %>%
multi_boot_standard(col = "dist_from_grand_mean")
kable(bin_dists)
score_bin | ci_lower | ci_upper | mean |
---|---|---|---|
high | 14.9029 | 20.63314 | 17.76401 |
low | 13.6166 | 19.07722 | 16.35866 |
ggplot(bin_dists, aes(x = score_bin,
y = mean,
fill = score_bin)) +
geom_bar(stat = "identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper)) +
ggtitle("Distance from grand centroid (bootstrapping across prompts)") +
ylab("distance from grand centroid") +
theme_minimal()
Model
model_data <- tsne_dims_downsampled %>%
mutate(group = "all") %>%
left_join(grand_centroids) %>%
unnest() %>%
rowwise() %>%
mutate(dist_from_grand_mean =
dist(rbind(c(center_tsne_X, center_tsne_Y),
c(tsne_X, tsne_Y)), method = "euclidean")[1])
# these don't converge
#model <- lmer(dist_from_grand_mean ~ score_bin + (score_bin|prompt_id) + (prompt_id|L1_code) + (score_bin|L1_code), data= model_data, control=lmerControl(optimizer = "bobyqa"))
#model <- lmer(dist_from_grand_mean ~ score_bin + (score_bin|prompt_id) + (prompt_id|L1_code) , data= model_data, control=lmerControl(optimizer = "bobyqa"))
#model <- lmer(dist_from_grand_mean ~ score_bin + (prompt_id|L1_code) , data= model_data, control=lmerControl(optimizer = "bobyqa"))
model <- lmer(dist_from_grand_mean ~ score_bin + (1|L1_code) + (1|prompt_id) , data= model_data, control=lmerControl(optimizer = "bobyqa"))
summary(model)
## Linear mixed model fit by REML ['lmerMod']
## Formula:
## dist_from_grand_mean ~ score_bin + (1 | L1_code) + (1 | prompt_id)
## Data: model_data
## Control: lmerControl(optimizer = "bobyqa")
##
## REML criterion at convergence: 196041.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -8.1975 -0.3182 0.0811 0.4600 7.2130
##
## Random effects:
## Groups Name Variance Std.Dev.
## L1_code (Intercept) 0.1221 0.3494
## prompt_id (Intercept) 52.4049 7.2391
## Residual 10.9067 3.3025
## Number of obs: 37440, groups: L1_code, 35; prompt_id, 28
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 18.28218 1.36955 13.35
## score_binlow -1.25799 0.03418 -36.81
##
## Correlation of Fixed Effects:
## (Intr)
## score_binlw -0.012
Lowering scoring languages are farther away from the overall group mean.
Here are scatter and 2D density plots for each prompt and score bin.
ggplot(tsne_dims_downsampled %>% unnest(),
aes(color = score_bin, x = tsne_X, y = tsne_Y)) +
facet_wrap(~prompt_id, nrow = 4) +
geom_point(size = .2, alpha = .3) +
theme_minimal()
Here is diameter as the average distance to all other points:
tsne_diameters_LA_mean <- tsne_dims_downsampled %>%
split(.$score_prompt) %>%
map_df(get_group_diameter, "score_prompt", "mean_dist") %>%
separate(score_prompt, c("score_bin","prompt_id"), sep = "_")
tsne_diameters_LA_mean %>%
group_by(score_bin) %>%
multi_boot_standard(col = "diameter") %>%
ggplot(aes(x = score_bin,
y = mean,
fill = score_bin)) +
geom_bar(stat = "identity") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper)) +
ggtitle("Diameter, averaging across prompts") +
ylab("mean diameter (average dist to all within-group points)") +
theme_minimal()
With this measure, low scoring essays have more variance.
low = filter(tsne_diameters_LA_mean, score_bin == "low")
high = filter(tsne_diameters_LA_mean, score_bin == "high")
t.test(low$diameter, high$diameter, paired = TRUE)
##
## Paired t-test
##
## data: low$diameter and high$diameter
## t = 4.2199, df = 27, p-value = 0.0002468
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.4745154 1.3726586
## sample estimates:
## mean of the differences
## 0.923587
By languages - Language centroid
grand_centroids <- tsne_centroids_BL_score_prompt %>% # SPL
mutate(L1_prompt = paste0(L1_code, "_", prompt_id )) %>%
split(.$L1_prompt) %>%
map_df(get_group_centroid, "L1_prompt") %>% # PL
separate(L1_prompt, c("L1_code", "prompt_id"), "_") %>%
nest(3:4, .key = "doc_vector") %>%
split(.$L1_code) %>%
map_df(get_group_centroid, "L1_code") %>%
rename(center_tsne_X = tsne_X,
center_tsne_Y = tsne_Y)
distance_measures <- tsne_centroids_BL_score_prompt %>%
unnest() %>%
left_join(grand_centroids) %>%
rowwise () %>%
mutate(dist_from_grand_mean =
dist(rbind(c(center_tsne_X, center_tsne_Y),
c(tsne_X, tsne_Y)), method = "euclidean")[1])
bin_dists <- distance_measures %>%
group_by(L1_code, score_bin) %>%
multi_boot_standard(col = "dist_from_grand_mean")
ggplot(bin_dists, aes(x = L1_code, y = mean, group = score_bin, fill = score_bin)) +
geom_bar(stat = "identity", position = position_dodge() ) +
xlab("Native language") +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper), position = position_dodge(.9)) +
ylab("Euclidean distance from language centroid") +
theme_minimal() +
theme(text = element_text(size = 15),
axis.text.x = element_text(angle = 90, hjust = 1)) +
scale_fill_brewer(palette = "Set1", name = "Score")
kable(bin_dists)
L1_code | score_bin | ci_lower | ci_upper | mean |
---|---|---|---|---|
ARA | high | 14.50854 | 20.31290 | 17.35065 |
ARA | low | 13.42778 | 18.60312 | 15.92734 |
BEN | high | 16.05779 | 21.90380 | 18.89651 |
BEN | low | 13.78415 | 19.82368 | 16.86073 |
BUL | high | 14.24274 | 19.81432 | 17.21946 |
BUL | low | 13.05452 | 19.16474 | 16.12481 |
CHI | high | 15.38462 | 21.06080 | 18.21525 |
CHI | low | 13.39906 | 18.50594 | 16.04284 |
DUT | high | 14.96819 | 20.59136 | 17.85774 |
DUT | low | 12.82454 | 18.80713 | 15.88590 |
ENG | high | 15.35845 | 21.17618 | 18.16705 |
ENG | low | 14.13516 | 19.40165 | 16.76981 |
FAS | high | 14.14614 | 20.07018 | 17.29799 |
FAS | low | 12.82309 | 18.41262 | 15.52350 |
FRE | high | 14.61921 | 20.40549 | 17.65681 |
FRE | low | 13.23382 | 18.72757 | 15.97212 |
GER | high | 14.97270 | 20.68973 | 17.82031 |
GER | low | 13.62390 | 19.17379 | 16.39672 |
GRE | high | 14.45674 | 20.36418 | 17.56723 |
GRE | low | 12.71918 | 18.38236 | 15.73442 |
GUJ | high | 14.36836 | 20.57892 | 17.63308 |
GUJ | low | 14.19541 | 19.59639 | 16.82177 |
HIN | high | 15.41619 | 21.09405 | 18.33358 |
HIN | low | 14.59187 | 20.14962 | 17.35096 |
IBO | high | 15.07161 | 21.07239 | 18.06111 |
IBO | low | 13.64845 | 18.74564 | 16.27427 |
IND | high | 13.95436 | 20.12811 | 17.13321 |
IND | low | 13.07838 | 18.61289 | 15.93580 |
ITA | high | 14.40358 | 20.30740 | 17.37258 |
ITA | low | 13.40478 | 18.74980 | 16.14432 |
JPN | high | 14.33399 | 20.21688 | 17.25782 |
JPN | low | 13.47457 | 19.01434 | 16.17036 |
KAN | high | 15.60864 | 21.75202 | 18.74677 |
KAN | low | 13.54344 | 19.61928 | 16.61888 |
KOR | high | 15.36967 | 20.57560 | 17.91790 |
KOR | low | 13.56424 | 18.81132 | 16.32295 |
MAL | high | 14.89661 | 20.97647 | 18.02225 |
MAL | low | 14.38193 | 20.32834 | 17.22895 |
MAR | high | 15.17457 | 21.11376 | 18.06121 |
MAR | low | 14.38692 | 20.03161 | 17.23579 |
NEP | high | 15.03110 | 21.51021 | 18.32608 |
NEP | low | 14.85312 | 20.22407 | 17.50173 |
PAN | high | 14.79346 | 20.67256 | 17.98117 |
PAN | low | 14.38446 | 19.61306 | 17.18925 |
POL | high | 14.55938 | 20.46271 | 17.58511 |
POL | low | 13.63650 | 18.82258 | 16.33064 |
POR | high | 14.66998 | 20.24575 | 17.65120 |
POR | low | 13.03345 | 18.45268 | 15.69171 |
RUM | high | 14.69363 | 20.56084 | 17.67140 |
RUM | low | 12.95566 | 18.20951 | 15.76591 |
RUS | high | 15.07623 | 20.55559 | 17.84764 |
RUS | low | 14.01565 | 19.44334 | 16.70459 |
SPA | high | 14.78474 | 20.35636 | 17.62049 |
SPA | low | 12.85185 | 18.51199 | 15.65550 |
TAM | high | 15.09655 | 21.02593 | 18.22260 |
TAM | low | 14.46361 | 19.94942 | 17.12117 |
TEL | high | 15.01908 | 20.73831 | 17.98919 |
TEL | low | 14.13862 | 19.65750 | 16.88112 |
TGL | high | 14.65664 | 20.06857 | 17.51809 |
TGL | low | 14.23585 | 19.65155 | 16.85779 |
THA | high | 15.28873 | 20.75167 | 17.81894 |
THA | low | 13.81847 | 19.33315 | 16.60962 |
TUR | high | 14.37129 | 20.26415 | 17.40229 |
TUR | low | 14.12079 | 19.27540 | 16.55622 |
URD | high | 15.20931 | 21.26715 | 18.21548 |
URD | low | 14.28098 | 19.57639 | 17.00679 |
VIE | high | 14.82609 | 20.32882 | 17.67133 |
VIE | low | 12.98558 | 18.37910 | 15.69448 |
YOR | high | 15.40881 | 21.29999 | 18.31916 |
YOR | low | 13.54515 | 19.39071 | 16.53499 |
Model
model_data <- tsne_dims_downsampled %>%
left_join(grand_centroids) %>%
unnest() %>%
rowwise() %>%
mutate(dist_from_grand_mean =
dist(rbind(c(center_tsne_X, center_tsne_Y),
c(tsne_X, tsne_Y)), method = "euclidean")[1])
model_data %>%
group_by(L1_code) %>%
do(tidy(lmer(dist_from_grand_mean ~ score_bin + (score_bin|prompt_id), data=.,control=lmerControl(optimizer = "bobyqa")))) %>%
filter(term == "score_binlow") %>%
arrange(statistic) %>%
kable()
L1_code | term | estimate | std.error | statistic | group |
---|---|---|---|---|---|
BEN | score_binlow | -2.0482763 | 0.3133251 | -6.5372246 | fixed |
JPN | score_binlow | -1.3008434 | 0.2276088 | -5.7152607 | fixed |
POR | score_binlow | -1.9791854 | 0.3535199 | -5.5985123 | fixed |
KOR | score_binlow | -1.1535076 | 0.2146429 | -5.3740769 | fixed |
FRE | score_binlow | -1.0834534 | 0.2105391 | -5.1460904 | fixed |
SPA | score_binlow | -1.3773475 | 0.2989218 | -4.6077185 | fixed |
RUM | score_binlow | -1.8094388 | 0.4096531 | -4.4170021 | fixed |
ENG | score_binlow | -1.8065191 | 0.4389514 | -4.1155333 | fixed |
DUT | score_binlow | -1.3683001 | 0.3400837 | -4.0234216 | fixed |
CHI | score_binlow | -1.6130749 | 0.4252086 | -3.7936086 | fixed |
VIE | score_binlow | -1.5968721 | 0.4245418 | -3.7614013 | fixed |
YOR | score_binlow | -1.7928780 | 0.4818305 | -3.7209723 | fixed |
BUL | score_binlow | -1.4105576 | 0.3802423 | -3.7096282 | fixed |
TAM | score_binlow | -1.3301323 | 0.3595957 | -3.6989661 | fixed |
URD | score_binlow | -1.1841224 | 0.3247621 | -3.6461227 | fixed |
GER | score_binlow | -1.2124774 | 0.3427081 | -3.5379300 | fixed |
ARA | score_binlow | -1.4748536 | 0.4258047 | -3.4636853 | fixed |
POL | score_binlow | -1.1744431 | 0.3450250 | -3.4039360 | fixed |
GRE | score_binlow | -1.6208330 | 0.4967683 | -3.2627547 | fixed |
TEL | score_binlow | -1.1784583 | 0.3646043 | -3.2321566 | fixed |
FAS | score_binlow | -1.4194862 | 0.4558191 | -3.1141435 | fixed |
IBO | score_binlow | -1.3715324 | 0.4635671 | -2.9586492 | fixed |
THA | score_binlow | -1.0404292 | 0.3799435 | -2.7383789 | fixed |
KAN | score_binlow | -1.5238304 | 0.5570942 | -2.7353189 | fixed |
HIN | score_binlow | -0.9098248 | 0.3566241 | -2.5512152 | fixed |
TUR | score_binlow | -0.7708002 | 0.3064908 | -2.5149213 | fixed |
ITA | score_binlow | -0.9183046 | 0.3806489 | -2.4124713 | fixed |
TGL | score_binlow | -0.6183860 | 0.2580783 | -2.3961182 | fixed |
NEP | score_binlow | -0.8070802 | 0.3420564 | -2.3594948 | fixed |
GUJ | score_binlow | -1.0060198 | 0.4411161 | -2.2806237 | fixed |
MAL | score_binlow | -0.8865862 | 0.4395371 | -2.0170907 | fixed |
RUS | score_binlow | -0.7835864 | 0.3926972 | -1.9953957 | fixed |
IND | score_binlow | -0.9541267 | 0.5379901 | -1.7735024 | fixed |
MAR | score_binlow | -0.6941345 | 0.4249078 | -1.6336121 | fixed |
PAN | score_binlow | -0.2638990 | 0.3569011 | -0.7394177 | fixed |
Across all languages, lower scoring essays are closer to group mean.
bin_dists_low <- distance_measures %>%
group_by(L1_code, score_bin) %>%
summarize(sd = sd(dist_from_grand_mean),
mean = mean(dist_from_grand_mean),
n = n()) %>%
filter(score_bin == "low") %>%
rename(low_sd = sd,
low_mean = mean,
low_n = n)
bin_dists_high <- distance_measures %>%
group_by(L1_code, score_bin) %>%
summarize(sd = sd(dist_from_grand_mean),
mean = mean(dist_from_grand_mean),
n = n()) %>%
filter(score_bin == "high") %>%
rename(high_sd = sd,
high_mean = mean,
high_n = n)
all_dists = inner_join(bin_dists_low, bin_dists_high, by = "L1_code")
effect_sizes = all_dists %>%
rowwise() %>%
mutate(d = compute.es::mes( high_mean, low_mean, high_sd, low_sd, high_n * 10, low_n * 10, verbose = FALSE)$d)
effect_sizes %>%
select(L1_code, d) %>%
arrange(-d) %>%
kable()
L1_code | d |
---|---|
CHI | 0.29 |
KAN | 0.26 |
VIE | 0.26 |
BEN | 0.25 |
DUT | 0.25 |
POR | 0.25 |
RUM | 0.25 |
SPA | 0.25 |
GRE | 0.24 |
FAS | 0.23 |
YOR | 0.23 |
IBO | 0.22 |
FRE | 0.21 |
KOR | 0.21 |
ENG | 0.19 |
ARA | 0.18 |
GER | 0.18 |
ITA | 0.16 |
POL | 0.16 |
RUS | 0.16 |
THA | 0.16 |
IND | 0.15 |
URD | 0.15 |
JPN | 0.14 |
TAM | 0.14 |
TEL | 0.14 |
BUL | 0.13 |
HIN | 0.12 |
TUR | 0.11 |
GUJ | 0.10 |
MAL | 0.10 |
MAR | 0.10 |
NEP | 0.10 |
PAN | 0.10 |
TGL | 0.09 |
Diameter as the average distance to all other points:
tsne_diameters_LB_mean <- tsne_dims_downsampled %>%
split(.$L1_score_prompt) %>%
map_df(get_group_diameter, "L1_score_prompt", "mean_dist") %>%
separate(L1_score_prompt, c("L1_code" ,"score_bin", "prompt_id"), sep = "_")
tsne_diameters_LB_mean %>%
group_by(score_bin, L1_code) %>%
multi_boot_standard(col = "diameter", na.rm = T) %>%
ggplot(aes(x = score_bin,
y = mean,
fill = score_bin)) +
geom_bar(stat = "identity") +
facet_wrap(~L1_code, nrow = 5) +
geom_linerange(aes(ymin = ci_lower, ymax = ci_upper)) +
ggtitle("Diameter, averaging across prompts") +
ylab("mean diameter (average dist to all within-group points)") +
theme_minimal()
get_uneven_tscore <- function(current_lang, tsne_diameters_LB_mean){
m <- tsne_diameters_LB_mean %>%
filter(L1_code == current_lang)
good_ids <- m %>%
count(prompt_id) %>%
as.data.frame() %>%
filter(n == 2)
k <- m %>%
filter(prompt_id %in% good_ids$prompt_id)
t.value <- t.test(k$diameter ~ k$score_bin,
paired = TRUE)$statistic
data.frame(t.value, L1_code = current_lang)
}
map_df(unique(tsne_diameters_LB_mean$L1_code), get_uneven_tscore,tsne_diameters_LB_mean) %>%
gather("L1_code", "t.value") %>%
arrange(t.value) %>%
kable()
t.value | L1_code |
---|---|
-4.3482652 | CHI |
-2.7806569 | IBO |
-2.4520653 | ITA |
-2.4110134 | FAS |
-1.9970025 | BEN |
-1.9844677 | URD |
-1.9796668 | RUM |
-1.9622424 | VIE |
-1.9094973 | FRE |
-1.8241657 | KAN |
-1.7478173 | SPA |
-1.6417110 | YOR |
-1.4032851 | GRE |
-1.3187022 | RUS |
-1.2189869 | PAN |
-1.1763872 | MAR |
-1.1432129 | KOR |
-0.9390124 | POL |
-0.8545246 | ARA |
-0.8229936 | GUJ |
-0.7858954 | HIN |
-0.6728167 | IND |
-0.6575782 | TAM |
-0.5593251 | TUR |
-0.5472320 | TEL |
-0.5067167 | NEP |
-0.4851766 | JPN |
-0.2190358 | THA |
-0.1851714 | GER |
-0.0495870 | MAL |
0.0962739 | ENG |
0.1134695 | POR |
0.4225591 | TGL |
0.5786750 | DUT |
2.5375746 | BUL |
There’s no effect here, except for a few languages.