library(data.table)
library(ggplot2)
library(purrr)
##
## Attaching package: 'purrr'
## The following object is masked from 'package:data.table':
##
## transpose
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(shiny)
## Warning: package 'shiny' was built under R version 4.2.3
library(shinydashboard)
## Warning: package 'shinydashboard' was built under R version 4.2.3
##
## Attaching package: 'shinydashboard'
## The following object is masked from 'package:graphics':
##
## box
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(htmlwidgets)
library(IRdisplay)
## Warning: package 'IRdisplay' was built under R version 4.2.3
library(corrplot)
## corrplot 0.92 loaded
library(googlesheets4)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
df <- read.csv("SciSearch.csv")
# Convert the 'Collection_date' column to the Date format
df$Collection.date <- as.Date(df$Collection.date, format = "%d.%m.%Y")
# Identify the rows where the year is greater than 2023
matching_rows <- month(df$Collection.date) == 03
# Update the matching dates to 2022
df$Collection.date[matching_rows] <- as.Date(paste0("2023", format(df$Collection.date[matching_rows], "-%m-%d")))
# Convert the 'Collection_date' column back to the desired format
df$Collection.date <- as.character(df$Collection.date)
# Convert the columns to the Date format
df$Collection.date <- as.Date(df$Collection.date)
df$coding.completion.date <- as.Date(df$coding.completion.date, format = "%d.%m.%Y")
setDT(df)
# Group the data.table by 'Collection_date' and 'coding_completion_date' and order by 'Collection_date'
check_times <- df[, .N, by = c("Collection.date", "coding.completion.date")][order(coding.completion.date, Collection.date)]
check_times
## Collection.date coding.completion.date N
## 1: 2022-07-28 2022-08-27 59
## 2: 2022-08-08 2022-08-28 47
## 3: 2022-07-28 2022-08-30 58
## 4: 2022-07-28 2022-08-31 55
## 5: 2022-07-28 2022-09-01 51
## 6: 2022-08-08 2022-09-02 52
## 7: 2022-06-28 2022-09-09 53
## 8: 2022-06-28 2022-09-20 57
## 9: 2022-08-18 2022-09-23 55
## 10: 2022-08-18 2022-10-14 50
## 11: 2022-08-18 2022-11-03 42
## 12: 2022-07-28 2022-11-21 148
## 13: 2022-08-18 2022-11-28 89
## 14: 2022-07-28 2022-12-10 147
## 15: 2022-07-28 2022-12-19 137
## 16: 2022-12-04 2022-12-25 72
## 17: 2022-12-04 2022-12-27 71
## 18: 2022-07-28 2023-01-11 140
## 19: 2022-08-08 2023-01-11 296
## 20: 2022-12-04 2023-02-08 63
## 21: 2022-12-04 2023-02-12 156
## 22: 2022-12-04 2023-02-24 249
## 23: 2022-12-04 2023-03-20 155
## 24: 2022-12-14 2023-03-20 23
## 25: 2023-03-14 2023-03-30 176
## 26: 2023-03-14 2023-04-01 185
## 27: 2022-12-04 2023-04-08 228
## 28: 2022-08-18 <NA> 1
## Collection.date coding.completion.date N
# Create a new column 'Collection_month' representing the month from 'coding.completion.date'
df[, coding_month := month(coding.completion.date)]
Accessibility
Accessibility_quality <- df[, c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "תקינות.הלינק..1..תקין..2..לא.זמין..3..בשפה.אחרת..4..לא.רלוונטי.מבחינה.מדעית", "Site.type", "How.recent.is.the.information..פרופורציה", "What.is.the.author.background..פרופורציה", "Are.there.major.scientific.errors.in.the.link.פרופורציה.טעויות.מדעיות.", "Accessibility.mean.פרופורציה", "coding_month")]
# change column names
names(Accessibility_quality) <- c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "validity", "Site.type", "recent.information", "author.background", "scientific.errors", "Accessibility.mean", "coding_month")
Accessibility_quality <- Accessibility_quality[!is.na(Accessibility.mean)]
Mean Accessibility for each month and country, langs
Mean_Accessibility_month_country_langs<- Accessibility_quality[, .(mean_Accessibility = mean(Accessibility.mean)), by = .(langs, country, coding_month)][order(langs, coding_month, country)]
Mean_Accessibility_month_country_langs
## langs country coding_month
## 1: Chinese Bangkok,Bangkok,Thailand 8
## 2: Chinese Washington,District of Columbia,United States 8
## 3: Chinese Washington,District of Columbia,United States 12
## 4: English Washington,District of Columbia,United States 4
## 5: English Abuja,Federal Capital Territory,Nigeria 9
## 6: English Washington,District of Columbia,United States 9
## 7: German Berlin,Berlin,Germany 9
## 8: German Berlin,Berlin,Germany 11
## 9: German Berlin,Berlin,Germany 12
## 10: Hebrew Jerusalem District,Israel 3
## 11: Hebrew Jerusalem District,Israel 9
## 12: Hindi New Delhi,Delhi,India 3
## 13: Hindi New Delhi,Delhi,India 11
## 14: Indonesian Central Jakarta,Jakarta,Indonesia 1
## 15: Indonesian Central Jakarta,Jakarta,Indonesia 9
## 16: Italian Rome,Lazio,Italy 2
## 17: Italian Rome,Lazio,Italy 10
## 18: Japanese Tokyo,Tokyo,Japan 8
## 19: Japanese Tokyo,Tokyo,Japan 12
## 20: Korean Seoul,South Korea 1
## 21: Korean Seoul,South Korea 9
## 22: Portuguese Brasilia,Federal District,Brazil 2
## 23: Portuguese Lisbon,Lisbon,Portugal 2
## 24: Portuguese Brasilia,Federal District,Brazil 12
## 25: Portuguese Lisbon,Lisbon,Portugal 12
## 26: Spanish Madrid,Community of Madrid,Spain 2
## 27: Spanish Mexico City,Mexico City,Mexico 2
## 28: Spanish Madrid,Community of Madrid,Spain 4
## 29: Spanish Mexico City,Mexico City,Mexico 4
## 30: Taiwanese Taipei City,Taiwan 8
## 31: Taiwanese Taipei City,Taiwan 11
## 32: Vietnamese Hanoi,Hanoi,Vietnam 1
## 33: Vietnamese Hanoi,Hanoi,Vietnam 8
## langs country coding_month
## mean_Accessibility
## 1: 0.7500000
## 2: 0.6941489
## 3: 0.6776336
## 4: 0.6775000
## 5: 0.7083333
## 6: 0.6450321
## 7: 0.6209936
## 8: 0.7537453
## 9: 0.6751543
## 10: 0.6217857
## 11: 0.6915064
## 12: 0.6426523
## 13: 0.6114583
## 14: 0.6994891
## 15: 0.6848958
## 16: 0.6027422
## 17: 0.6791667
## 18: 0.5882768
## 19: 0.6823704
## 20: 0.6483226
## 21: 0.6633987
## 22: 0.6237693
## 23: 0.7144703
## 24: 0.7107843
## 25: 0.7426471
## 26: 0.6968085
## 27: 0.7339744
## 28: 0.7657233
## 29: 0.7416143
## 30: 0.6352041
## 31: 0.5999306
## 32: 0.6827338
## 33: 0.6204710
## mean_Accessibility
# Unite language and country together
Mean_Accessibility_month_country_langs[, language_country := paste(langs, country, sep = "_")]
# Create subplots based on coding_month
subplots <- lapply(unique(Mean_Accessibility_month_country_langs$coding_month), function(month) {
data_month <- Mean_Accessibility_month_country_langs[coding_month == month]
subplot(
plot_ly(data = data_month,
x = ~language_country,
y = ~mean_Accessibility,
color = ~language_country,
colors = c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999"), # Custom color palette
type = "bar") %>%
layout(xaxis = list(title = "Language and Country"),
yaxis = list(title = "Average Accessibility"),
title = paste("Month:", month)))})
# Display
subplots
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
##
## [[6]]
##
## [[7]]
##
## [[8]]
##
## [[9]]
Mean Accessibility for each month and language
Mean_Accessibility_month_language <- Accessibility_quality[, .(mean_Accessibility = mean(Accessibility.mean)), by = .(langs, coding_month)][order(langs, coding_month)]
Mean_Accessibility_month_language
## langs coding_month mean_Accessibility
## 1: Chinese 8 0.7013889
## 2: Chinese 12 0.6776336
## 3: English 4 0.6775000
## 4: English 9 0.6484848
## 5: German 9 0.6209936
## 6: German 11 0.7537453
## 7: German 12 0.6751543
## 8: Hebrew 3 0.6217857
## 9: Hebrew 9 0.6915064
## 10: Hindi 3 0.6426523
## 11: Hindi 11 0.6114583
## 12: Indonesian 1 0.6994891
## 13: Indonesian 9 0.6848958
## 14: Italian 2 0.6027422
## 15: Italian 10 0.6791667
## 16: Japanese 8 0.5882768
## 17: Japanese 12 0.6823704
## 18: Korean 1 0.6483226
## 19: Korean 9 0.6633987
## 20: Portuguese 2 0.6557377
## 21: Portuguese 12 0.7187500
## 22: Spanish 2 0.7048611
## 23: Spanish 4 0.7596960
## 24: Taiwanese 8 0.6352041
## 25: Taiwanese 11 0.5999306
## 26: Vietnamese 1 0.6827338
## 27: Vietnamese 8 0.6204710
## langs coding_month mean_Accessibility
ggplot(Mean_Accessibility_month_language, aes(x = coding_month, y = mean_Accessibility, group = langs, color = langs)) +
geom_point(size = 3) +
geom_line(size = 1, alpha = 0.6) +
facet_wrap(~langs) +
xlim(1, 12) +
ylim(min(Mean_Accessibility_month_language$mean_Accessibility),max(Mean_Accessibility_month_language$mean_Accessibility)
) +
labs(x = "Month", y = "Mean Accessibility") +
theme_bw() +
theme(legend.position = "top")

Mean Accessibility for each month and country
Mean_Accessibility_month_country <- Accessibility_quality[, .(mean_Accessibility = mean(Accessibility.mean)), by = .(country, coding_month)][order(country, coding_month)]
Mean_Accessibility_month_country
## country coding_month
## 1: Abuja,Federal Capital Territory,Nigeria 9
## 2: Bangkok,Bangkok,Thailand 8
## 3: Berlin,Berlin,Germany 9
## 4: Berlin,Berlin,Germany 11
## 5: Berlin,Berlin,Germany 12
## 6: Brasilia,Federal District,Brazil 2
## 7: Brasilia,Federal District,Brazil 12
## 8: Central Jakarta,Jakarta,Indonesia 1
## 9: Central Jakarta,Jakarta,Indonesia 9
## 10: Hanoi,Hanoi,Vietnam 1
## 11: Hanoi,Hanoi,Vietnam 8
## 12: Jerusalem District,Israel 3
## 13: Jerusalem District,Israel 9
## 14: Lisbon,Lisbon,Portugal 2
## 15: Lisbon,Lisbon,Portugal 12
## 16: Madrid,Community of Madrid,Spain 2
## 17: Madrid,Community of Madrid,Spain 4
## 18: Mexico City,Mexico City,Mexico 2
## 19: Mexico City,Mexico City,Mexico 4
## 20: New Delhi,Delhi,India 3
## 21: New Delhi,Delhi,India 11
## 22: Rome,Lazio,Italy 2
## 23: Rome,Lazio,Italy 10
## 24: Seoul,South Korea 1
## 25: Seoul,South Korea 9
## 26: Taipei City,Taiwan 8
## 27: Taipei City,Taiwan 11
## 28: Tokyo,Tokyo,Japan 8
## 29: Tokyo,Tokyo,Japan 12
## 30: Washington,District of Columbia,United States 4
## 31: Washington,District of Columbia,United States 8
## 32: Washington,District of Columbia,United States 9
## 33: Washington,District of Columbia,United States 12
## country coding_month
## mean_Accessibility
## 1: 0.7083333
## 2: 0.7500000
## 3: 0.6209936
## 4: 0.7537453
## 5: 0.6751543
## 6: 0.6237693
## 7: 0.7107843
## 8: 0.6994891
## 9: 0.6848958
## 10: 0.6827338
## 11: 0.6204710
## 12: 0.6217857
## 13: 0.6915064
## 14: 0.7144703
## 15: 0.7426471
## 16: 0.6968085
## 17: 0.7657233
## 18: 0.7339744
## 19: 0.7416143
## 20: 0.6426523
## 21: 0.6114583
## 22: 0.6027422
## 23: 0.6791667
## 24: 0.6483226
## 25: 0.6633987
## 26: 0.6352041
## 27: 0.5999306
## 28: 0.5882768
## 29: 0.6823704
## 30: 0.6775000
## 31: 0.6941489
## 32: 0.6450321
## 33: 0.6776336
## mean_Accessibility
scientific_quality
# scientific quality
scientific_quality <- df[, c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "תקינות.הלינק..1..תקין..2..לא.זמין..3..בשפה.אחרת..4..לא.רלוונטי.מבחינה.מדעית", "Site.type", "How.recent.is.the.information..פרופורציה", "What.is.the.author.background..פרופורציה", "Are.there.major.scientific.errors.in.the.link.פרופורציה.טעויות.מדעיות.", "How.accurate.is.the.scientific.content.presented.in.the.link..פרופורציה", "Quality.component.score.פרופורציה", "Quality.mean.פרופורציה_new", "coding_month")]
# change column names
names(scientific_quality) <- c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "validity", "Site.type", "recent.information", "author.background", "scientific.errors", "accurate.content", "Quality.component.score", "Quality.mean", "coding_month")
scientific_quality <- scientific_quality[!is.na(Quality.mean)]
Mean quality for each month and country, langs
Mean_quality_month_country_langs <- scientific_quality[, .(mean_quality = mean(Quality.mean)), by = .(langs, country, coding_month)][order(langs, coding_month, country)]
Mean_quality_month_country_langs
## langs country coding_month
## 1: Chinese Bangkok,Bangkok,Thailand 8
## 2: Chinese Washington,District of Columbia,United States 8
## 3: Chinese Washington,District of Columbia,United States 12
## 4: English Washington,District of Columbia,United States 4
## 5: English Abuja,Federal Capital Territory,Nigeria 9
## 6: English Washington,District of Columbia,United States 9
## 7: German Berlin,Berlin,Germany 9
## 8: German Berlin,Berlin,Germany 11
## 9: German Berlin,Berlin,Germany 12
## 10: Hebrew Jerusalem District,Israel 3
## 11: Hebrew Jerusalem District,Israel 9
## 12: Hindi New Delhi,Delhi,India 3
## 13: Hindi New Delhi,Delhi,India 11
## 14: Indonesian Central Jakarta,Jakarta,Indonesia 1
## 15: Indonesian Central Jakarta,Jakarta,Indonesia 9
## 16: Italian Rome,Lazio,Italy 2
## 17: Italian Rome,Lazio,Italy 10
## 18: Japanese Tokyo,Tokyo,Japan 8
## 19: Japanese Tokyo,Tokyo,Japan 12
## 20: Korean Seoul,South Korea 1
## 21: Korean Seoul,South Korea 9
## 22: Portuguese Brasilia,Federal District,Brazil 2
## 23: Portuguese Lisbon,Lisbon,Portugal 2
## 24: Portuguese Brasilia,Federal District,Brazil 12
## 25: Portuguese Lisbon,Lisbon,Portugal 12
## 26: Spanish Madrid,Community of Madrid,Spain 2
## 27: Spanish Mexico City,Mexico City,Mexico 2
## 28: Spanish Madrid,Community of Madrid,Spain 4
## 29: Spanish Mexico City,Mexico City,Mexico 4
## 30: Taiwanese Taipei City,Taiwan 8
## 31: Taiwanese Taipei City,Taiwan 11
## 32: Vietnamese Hanoi,Hanoi,Vietnam 1
## 33: Vietnamese Hanoi,Hanoi,Vietnam 8
## langs country coding_month
## mean_quality
## 1: 0.7678571
## 2: 0.6808511
## 3: 0.7416031
## 4: 0.6845455
## 5: 0.8472222
## 6: 0.7584135
## 7: 0.7095353
## 8: 0.6211610
## 9: 0.6326389
## 10: 0.6319643
## 11: 0.7207532
## 12: 0.6162366
## 13: 0.6927083
## 14: 0.7140146
## 15: 0.6705729
## 16: 0.6135684
## 17: 0.7454167
## 18: 0.6903249
## 19: 0.5830370
## 20: 0.6685161
## 21: 0.7189542
## 22: 0.6707806
## 23: 0.6635659
## 24: 0.6580882
## 25: 0.7352941
## 26: 0.7318262
## 27: 0.7403846
## 28: 0.6654088
## 29: 0.7572327
## 30: 0.7062075
## 31: 0.6441667
## 32: 0.6903597
## 33: 0.7653986
## mean_quality
Mean quality for each month and language
Mean_quality_month_language <- scientific_quality[, .(mean_quality = mean(Quality.mean)), by = .(langs, coding_month)][order(langs, coding_month)]
Mean_quality_month_language
## langs coding_month mean_quality
## 1: Chinese 8 0.6921296
## 2: Chinese 12 0.7416031
## 3: English 4 0.6845455
## 4: English 9 0.7632576
## 5: German 9 0.7095353
## 6: German 11 0.6211610
## 7: German 12 0.6326389
## 8: Hebrew 3 0.6319643
## 9: Hebrew 9 0.7207532
## 10: Hindi 3 0.6162366
## 11: Hindi 11 0.6927083
## 12: Indonesian 1 0.7140146
## 13: Indonesian 9 0.6705729
## 14: Italian 2 0.6135684
## 15: Italian 10 0.7454167
## 16: Japanese 8 0.6903249
## 17: Japanese 12 0.5830370
## 18: Korean 1 0.6685161
## 19: Korean 9 0.7189542
## 20: Portuguese 2 0.6682377
## 21: Portuguese 12 0.6773897
## 22: Spanish 2 0.7336806
## 23: Spanish 4 0.6883648
## 24: Taiwanese 8 0.7062075
## 25: Taiwanese 11 0.6441667
## 26: Vietnamese 1 0.6903597
## 27: Vietnamese 8 0.7653986
## langs coding_month mean_quality
Mean quality for each month and country
Mean_quality_month_country <- scientific_quality[, .(mean_quality = mean(Quality.mean)), by = .(country, coding_month)][order(country, coding_month)]
Mean_quality_month_country
## country coding_month mean_quality
## 1: Abuja,Federal Capital Territory,Nigeria 9 0.8472222
## 2: Bangkok,Bangkok,Thailand 8 0.7678571
## 3: Berlin,Berlin,Germany 9 0.7095353
## 4: Berlin,Berlin,Germany 11 0.6211610
## 5: Berlin,Berlin,Germany 12 0.6326389
## 6: Brasilia,Federal District,Brazil 2 0.6707806
## 7: Brasilia,Federal District,Brazil 12 0.6580882
## 8: Central Jakarta,Jakarta,Indonesia 1 0.7140146
## 9: Central Jakarta,Jakarta,Indonesia 9 0.6705729
## 10: Hanoi,Hanoi,Vietnam 1 0.6903597
## 11: Hanoi,Hanoi,Vietnam 8 0.7653986
## 12: Jerusalem District,Israel 3 0.6319643
## 13: Jerusalem District,Israel 9 0.7207532
## 14: Lisbon,Lisbon,Portugal 2 0.6635659
## 15: Lisbon,Lisbon,Portugal 12 0.7352941
## 16: Madrid,Community of Madrid,Spain 2 0.7318262
## 17: Madrid,Community of Madrid,Spain 4 0.6654088
## 18: Mexico City,Mexico City,Mexico 2 0.7403846
## 19: Mexico City,Mexico City,Mexico 4 0.7572327
## 20: New Delhi,Delhi,India 3 0.6162366
## 21: New Delhi,Delhi,India 11 0.6927083
## 22: Rome,Lazio,Italy 2 0.6135684
## 23: Rome,Lazio,Italy 10 0.7454167
## 24: Seoul,South Korea 1 0.6685161
## 25: Seoul,South Korea 9 0.7189542
## 26: Taipei City,Taiwan 8 0.7062075
## 27: Taipei City,Taiwan 11 0.6441667
## 28: Tokyo,Tokyo,Japan 8 0.6903249
## 29: Tokyo,Tokyo,Japan 12 0.5830370
## 30: Washington,District of Columbia,United States 4 0.6845455
## 31: Washington,District of Columbia,United States 8 0.6808511
## 32: Washington,District of Columbia,United States 9 0.7584135
## 33: Washington,District of Columbia,United States 12 0.7416031
## country coding_month mean_quality