library(data.table)
library(ggplot2)
library(purrr)
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:data.table':
## 
##     transpose
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(shiny)
## Warning: package 'shiny' was built under R version 4.2.3
library(shinydashboard)
## Warning: package 'shinydashboard' was built under R version 4.2.3
## 
## Attaching package: 'shinydashboard'
## The following object is masked from 'package:graphics':
## 
##     box
library(plotly)
## Warning: package 'plotly' was built under R version 4.2.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(htmlwidgets)
library(IRdisplay)
## Warning: package 'IRdisplay' was built under R version 4.2.3
library(corrplot)
## corrplot 0.92 loaded
library(googlesheets4)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:data.table':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
df <- read.csv("SciSearch.csv")
# Convert the 'Collection_date' column to the Date format
df$Collection.date <- as.Date(df$Collection.date, format = "%d.%m.%Y")
# Identify the rows where the year is greater than 2023
matching_rows <- month(df$Collection.date) == 03

# Update the matching dates to 2022
df$Collection.date[matching_rows] <- as.Date(paste0("2023", format(df$Collection.date[matching_rows], "-%m-%d")))

# Convert the 'Collection_date' column back to the desired format
df$Collection.date <- as.character(df$Collection.date)
# Convert the  columns to the Date format
df$Collection.date <- as.Date(df$Collection.date)
df$coding.completion.date <- as.Date(df$coding.completion.date, format = "%d.%m.%Y")
setDT(df)
# Group the data.table by 'Collection_date' and 'coding_completion_date' and order by 'Collection_date'
check_times <- df[, .N, by = c("Collection.date", "coding.completion.date")][order(coding.completion.date, Collection.date)]

check_times
##     Collection.date coding.completion.date   N
##  1:      2022-07-28             2022-08-27  59
##  2:      2022-08-08             2022-08-28  47
##  3:      2022-07-28             2022-08-30  58
##  4:      2022-07-28             2022-08-31  55
##  5:      2022-07-28             2022-09-01  51
##  6:      2022-08-08             2022-09-02  52
##  7:      2022-06-28             2022-09-09  53
##  8:      2022-06-28             2022-09-20  57
##  9:      2022-08-18             2022-09-23  55
## 10:      2022-08-18             2022-10-14  50
## 11:      2022-08-18             2022-11-03  42
## 12:      2022-07-28             2022-11-21 148
## 13:      2022-08-18             2022-11-28  89
## 14:      2022-07-28             2022-12-10 147
## 15:      2022-07-28             2022-12-19 137
## 16:      2022-12-04             2022-12-25  72
## 17:      2022-12-04             2022-12-27  71
## 18:      2022-07-28             2023-01-11 140
## 19:      2022-08-08             2023-01-11 296
## 20:      2022-12-04             2023-02-08  63
## 21:      2022-12-04             2023-02-12 156
## 22:      2022-12-04             2023-02-24 249
## 23:      2022-12-04             2023-03-20 155
## 24:      2022-12-14             2023-03-20  23
## 25:      2023-03-14             2023-03-30 176
## 26:      2023-03-14             2023-04-01 185
## 27:      2022-12-04             2023-04-08 228
## 28:      2022-08-18                   <NA>   1
##     Collection.date coding.completion.date   N
# Create a new column 'Collection_month' representing the month from 'coding.completion.date'
df[, coding_month := month(coding.completion.date)]

Accessibility

Accessibility_quality <- df[, c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "תקינות.הלינק..1..תקין..2..לא.זמין..3..בשפה.אחרת..4..לא.רלוונטי.מבחינה.מדעית", "Site.type", "How.recent.is.the.information..פרופורציה", "What.is.the.author.background..פרופורציה", "Are.there.major.scientific.errors.in.the.link.פרופורציה.טעויות.מדעיות.", "Accessibility.mean.פרופורציה", "coding_month")]
# change column names
names(Accessibility_quality) <- c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "validity", "Site.type", "recent.information", "author.background", "scientific.errors", "Accessibility.mean", "coding_month")
Accessibility_quality <- Accessibility_quality[!is.na(Accessibility.mean)]

Mean Accessibility for each month and country, langs

Mean_Accessibility_month_country_langs<- Accessibility_quality[, .(mean_Accessibility = mean(Accessibility.mean)), by = .(langs, country, coding_month)][order(langs, coding_month, country)]
Mean_Accessibility_month_country_langs
##          langs                                       country coding_month
##  1:    Chinese                      Bangkok,Bangkok,Thailand            8
##  2:    Chinese Washington,District of Columbia,United States            8
##  3:    Chinese Washington,District of Columbia,United States           12
##  4:    English Washington,District of Columbia,United States            4
##  5:    English       Abuja,Federal Capital Territory,Nigeria            9
##  6:    English Washington,District of Columbia,United States            9
##  7:     German                         Berlin,Berlin,Germany            9
##  8:     German                         Berlin,Berlin,Germany           11
##  9:     German                         Berlin,Berlin,Germany           12
## 10:     Hebrew                     Jerusalem District,Israel            3
## 11:     Hebrew                     Jerusalem District,Israel            9
## 12:      Hindi                         New Delhi,Delhi,India            3
## 13:      Hindi                         New Delhi,Delhi,India           11
## 14: Indonesian             Central Jakarta,Jakarta,Indonesia            1
## 15: Indonesian             Central Jakarta,Jakarta,Indonesia            9
## 16:    Italian                              Rome,Lazio,Italy            2
## 17:    Italian                              Rome,Lazio,Italy           10
## 18:   Japanese                             Tokyo,Tokyo,Japan            8
## 19:   Japanese                             Tokyo,Tokyo,Japan           12
## 20:     Korean                             Seoul,South Korea            1
## 21:     Korean                             Seoul,South Korea            9
## 22: Portuguese              Brasilia,Federal District,Brazil            2
## 23: Portuguese                        Lisbon,Lisbon,Portugal            2
## 24: Portuguese              Brasilia,Federal District,Brazil           12
## 25: Portuguese                        Lisbon,Lisbon,Portugal           12
## 26:    Spanish              Madrid,Community of Madrid,Spain            2
## 27:    Spanish                Mexico City,Mexico City,Mexico            2
## 28:    Spanish              Madrid,Community of Madrid,Spain            4
## 29:    Spanish                Mexico City,Mexico City,Mexico            4
## 30:  Taiwanese                            Taipei City,Taiwan            8
## 31:  Taiwanese                            Taipei City,Taiwan           11
## 32: Vietnamese                           Hanoi,Hanoi,Vietnam            1
## 33: Vietnamese                           Hanoi,Hanoi,Vietnam            8
##          langs                                       country coding_month
##     mean_Accessibility
##  1:          0.7500000
##  2:          0.6941489
##  3:          0.6776336
##  4:          0.6775000
##  5:          0.7083333
##  6:          0.6450321
##  7:          0.6209936
##  8:          0.7537453
##  9:          0.6751543
## 10:          0.6217857
## 11:          0.6915064
## 12:          0.6426523
## 13:          0.6114583
## 14:          0.6994891
## 15:          0.6848958
## 16:          0.6027422
## 17:          0.6791667
## 18:          0.5882768
## 19:          0.6823704
## 20:          0.6483226
## 21:          0.6633987
## 22:          0.6237693
## 23:          0.7144703
## 24:          0.7107843
## 25:          0.7426471
## 26:          0.6968085
## 27:          0.7339744
## 28:          0.7657233
## 29:          0.7416143
## 30:          0.6352041
## 31:          0.5999306
## 32:          0.6827338
## 33:          0.6204710
##     mean_Accessibility
# Unite language and country together
Mean_Accessibility_month_country_langs[, language_country := paste(langs, country, sep = "_")]
# Create subplots based on coding_month
subplots <- lapply(unique(Mean_Accessibility_month_country_langs$coding_month), function(month) {
  data_month <- Mean_Accessibility_month_country_langs[coding_month == month]
  
  subplot(
    plot_ly(data = data_month, 
            x = ~language_country, 
            y = ~mean_Accessibility, 
            color = ~language_country,
            colors =  c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7", "#999999"),  # Custom color palette
            type = "bar") %>%
      layout(xaxis = list(title = "Language and Country"),
             yaxis = list(title = "Average Accessibility"),
             title = paste("Month:", month)))})


# Display 
subplots
## [[1]]
## 
## [[2]]
## 
## [[3]]
## 
## [[4]]
## 
## [[5]]
## 
## [[6]]
## 
## [[7]]
## 
## [[8]]
## 
## [[9]]

Mean Accessibility for each month and language

Mean_Accessibility_month_language <- Accessibility_quality[, .(mean_Accessibility = mean(Accessibility.mean)), by = .(langs, coding_month)][order(langs, coding_month)]
Mean_Accessibility_month_language
##          langs coding_month mean_Accessibility
##  1:    Chinese            8          0.7013889
##  2:    Chinese           12          0.6776336
##  3:    English            4          0.6775000
##  4:    English            9          0.6484848
##  5:     German            9          0.6209936
##  6:     German           11          0.7537453
##  7:     German           12          0.6751543
##  8:     Hebrew            3          0.6217857
##  9:     Hebrew            9          0.6915064
## 10:      Hindi            3          0.6426523
## 11:      Hindi           11          0.6114583
## 12: Indonesian            1          0.6994891
## 13: Indonesian            9          0.6848958
## 14:    Italian            2          0.6027422
## 15:    Italian           10          0.6791667
## 16:   Japanese            8          0.5882768
## 17:   Japanese           12          0.6823704
## 18:     Korean            1          0.6483226
## 19:     Korean            9          0.6633987
## 20: Portuguese            2          0.6557377
## 21: Portuguese           12          0.7187500
## 22:    Spanish            2          0.7048611
## 23:    Spanish            4          0.7596960
## 24:  Taiwanese            8          0.6352041
## 25:  Taiwanese           11          0.5999306
## 26: Vietnamese            1          0.6827338
## 27: Vietnamese            8          0.6204710
##          langs coding_month mean_Accessibility
ggplot(Mean_Accessibility_month_language, aes(x = coding_month, y = mean_Accessibility, group = langs, color = langs)) +
  geom_point(size = 3) +
  geom_line(size = 1, alpha = 0.6) +
  facet_wrap(~langs) +
  xlim(1, 12) +
  ylim(min(Mean_Accessibility_month_language$mean_Accessibility),max(Mean_Accessibility_month_language$mean_Accessibility)
) +
  labs(x = "Month", y = "Mean Accessibility") +
  theme_bw() +
  theme(legend.position = "top")

Mean Accessibility for each month and country

Mean_Accessibility_month_country <- Accessibility_quality[, .(mean_Accessibility = mean(Accessibility.mean)), by = .(country, coding_month)][order(country, coding_month)]
Mean_Accessibility_month_country
##                                           country coding_month
##  1:       Abuja,Federal Capital Territory,Nigeria            9
##  2:                      Bangkok,Bangkok,Thailand            8
##  3:                         Berlin,Berlin,Germany            9
##  4:                         Berlin,Berlin,Germany           11
##  5:                         Berlin,Berlin,Germany           12
##  6:              Brasilia,Federal District,Brazil            2
##  7:              Brasilia,Federal District,Brazil           12
##  8:             Central Jakarta,Jakarta,Indonesia            1
##  9:             Central Jakarta,Jakarta,Indonesia            9
## 10:                           Hanoi,Hanoi,Vietnam            1
## 11:                           Hanoi,Hanoi,Vietnam            8
## 12:                     Jerusalem District,Israel            3
## 13:                     Jerusalem District,Israel            9
## 14:                        Lisbon,Lisbon,Portugal            2
## 15:                        Lisbon,Lisbon,Portugal           12
## 16:              Madrid,Community of Madrid,Spain            2
## 17:              Madrid,Community of Madrid,Spain            4
## 18:                Mexico City,Mexico City,Mexico            2
## 19:                Mexico City,Mexico City,Mexico            4
## 20:                         New Delhi,Delhi,India            3
## 21:                         New Delhi,Delhi,India           11
## 22:                              Rome,Lazio,Italy            2
## 23:                              Rome,Lazio,Italy           10
## 24:                             Seoul,South Korea            1
## 25:                             Seoul,South Korea            9
## 26:                            Taipei City,Taiwan            8
## 27:                            Taipei City,Taiwan           11
## 28:                             Tokyo,Tokyo,Japan            8
## 29:                             Tokyo,Tokyo,Japan           12
## 30: Washington,District of Columbia,United States            4
## 31: Washington,District of Columbia,United States            8
## 32: Washington,District of Columbia,United States            9
## 33: Washington,District of Columbia,United States           12
##                                           country coding_month
##     mean_Accessibility
##  1:          0.7083333
##  2:          0.7500000
##  3:          0.6209936
##  4:          0.7537453
##  5:          0.6751543
##  6:          0.6237693
##  7:          0.7107843
##  8:          0.6994891
##  9:          0.6848958
## 10:          0.6827338
## 11:          0.6204710
## 12:          0.6217857
## 13:          0.6915064
## 14:          0.7144703
## 15:          0.7426471
## 16:          0.6968085
## 17:          0.7657233
## 18:          0.7339744
## 19:          0.7416143
## 20:          0.6426523
## 21:          0.6114583
## 22:          0.6027422
## 23:          0.6791667
## 24:          0.6483226
## 25:          0.6633987
## 26:          0.6352041
## 27:          0.5999306
## 28:          0.5882768
## 29:          0.6823704
## 30:          0.6775000
## 31:          0.6941489
## 32:          0.6450321
## 33:          0.6776336
##     mean_Accessibility

scientific_quality

# scientific quality
scientific_quality <- df[, c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "תקינות.הלינק..1..תקין..2..לא.זמין..3..בשפה.אחרת..4..לא.רלוונטי.מבחינה.מדעית", "Site.type", "How.recent.is.the.information..פרופורציה", "What.is.the.author.background..פרופורציה", "Are.there.major.scientific.errors.in.the.link.פרופורציה.טעויות.מדעיות.", "How.accurate.is.the.scientific.content.presented.in.the.link..פרופורציה", "Quality.component.score.פרופורציה", "Quality.mean.פרופורציה_new", "coding_month")]
# change column names
names(scientific_quality) <- c("term_id", "langs", "country", "term", "type", "code_lang", "hl", "gl", "Collection.date", "Result.number", "validity", "Site.type", "recent.information", "author.background", "scientific.errors", "accurate.content", "Quality.component.score", "Quality.mean", "coding_month")
scientific_quality <- scientific_quality[!is.na(Quality.mean)]

Mean quality for each month and country, langs

Mean_quality_month_country_langs <- scientific_quality[, .(mean_quality = mean(Quality.mean)), by = .(langs, country, coding_month)][order(langs, coding_month, country)]
Mean_quality_month_country_langs
##          langs                                       country coding_month
##  1:    Chinese                      Bangkok,Bangkok,Thailand            8
##  2:    Chinese Washington,District of Columbia,United States            8
##  3:    Chinese Washington,District of Columbia,United States           12
##  4:    English Washington,District of Columbia,United States            4
##  5:    English       Abuja,Federal Capital Territory,Nigeria            9
##  6:    English Washington,District of Columbia,United States            9
##  7:     German                         Berlin,Berlin,Germany            9
##  8:     German                         Berlin,Berlin,Germany           11
##  9:     German                         Berlin,Berlin,Germany           12
## 10:     Hebrew                     Jerusalem District,Israel            3
## 11:     Hebrew                     Jerusalem District,Israel            9
## 12:      Hindi                         New Delhi,Delhi,India            3
## 13:      Hindi                         New Delhi,Delhi,India           11
## 14: Indonesian             Central Jakarta,Jakarta,Indonesia            1
## 15: Indonesian             Central Jakarta,Jakarta,Indonesia            9
## 16:    Italian                              Rome,Lazio,Italy            2
## 17:    Italian                              Rome,Lazio,Italy           10
## 18:   Japanese                             Tokyo,Tokyo,Japan            8
## 19:   Japanese                             Tokyo,Tokyo,Japan           12
## 20:     Korean                             Seoul,South Korea            1
## 21:     Korean                             Seoul,South Korea            9
## 22: Portuguese              Brasilia,Federal District,Brazil            2
## 23: Portuguese                        Lisbon,Lisbon,Portugal            2
## 24: Portuguese              Brasilia,Federal District,Brazil           12
## 25: Portuguese                        Lisbon,Lisbon,Portugal           12
## 26:    Spanish              Madrid,Community of Madrid,Spain            2
## 27:    Spanish                Mexico City,Mexico City,Mexico            2
## 28:    Spanish              Madrid,Community of Madrid,Spain            4
## 29:    Spanish                Mexico City,Mexico City,Mexico            4
## 30:  Taiwanese                            Taipei City,Taiwan            8
## 31:  Taiwanese                            Taipei City,Taiwan           11
## 32: Vietnamese                           Hanoi,Hanoi,Vietnam            1
## 33: Vietnamese                           Hanoi,Hanoi,Vietnam            8
##          langs                                       country coding_month
##     mean_quality
##  1:    0.7678571
##  2:    0.6808511
##  3:    0.7416031
##  4:    0.6845455
##  5:    0.8472222
##  6:    0.7584135
##  7:    0.7095353
##  8:    0.6211610
##  9:    0.6326389
## 10:    0.6319643
## 11:    0.7207532
## 12:    0.6162366
## 13:    0.6927083
## 14:    0.7140146
## 15:    0.6705729
## 16:    0.6135684
## 17:    0.7454167
## 18:    0.6903249
## 19:    0.5830370
## 20:    0.6685161
## 21:    0.7189542
## 22:    0.6707806
## 23:    0.6635659
## 24:    0.6580882
## 25:    0.7352941
## 26:    0.7318262
## 27:    0.7403846
## 28:    0.6654088
## 29:    0.7572327
## 30:    0.7062075
## 31:    0.6441667
## 32:    0.6903597
## 33:    0.7653986
##     mean_quality

Mean quality for each month and language

Mean_quality_month_language <- scientific_quality[, .(mean_quality = mean(Quality.mean)), by = .(langs, coding_month)][order(langs, coding_month)]
Mean_quality_month_language
##          langs coding_month mean_quality
##  1:    Chinese            8    0.6921296
##  2:    Chinese           12    0.7416031
##  3:    English            4    0.6845455
##  4:    English            9    0.7632576
##  5:     German            9    0.7095353
##  6:     German           11    0.6211610
##  7:     German           12    0.6326389
##  8:     Hebrew            3    0.6319643
##  9:     Hebrew            9    0.7207532
## 10:      Hindi            3    0.6162366
## 11:      Hindi           11    0.6927083
## 12: Indonesian            1    0.7140146
## 13: Indonesian            9    0.6705729
## 14:    Italian            2    0.6135684
## 15:    Italian           10    0.7454167
## 16:   Japanese            8    0.6903249
## 17:   Japanese           12    0.5830370
## 18:     Korean            1    0.6685161
## 19:     Korean            9    0.7189542
## 20: Portuguese            2    0.6682377
## 21: Portuguese           12    0.6773897
## 22:    Spanish            2    0.7336806
## 23:    Spanish            4    0.6883648
## 24:  Taiwanese            8    0.7062075
## 25:  Taiwanese           11    0.6441667
## 26: Vietnamese            1    0.6903597
## 27: Vietnamese            8    0.7653986
##          langs coding_month mean_quality

Mean quality for each month and country

Mean_quality_month_country <- scientific_quality[, .(mean_quality = mean(Quality.mean)), by = .(country, coding_month)][order(country, coding_month)]
Mean_quality_month_country
##                                           country coding_month mean_quality
##  1:       Abuja,Federal Capital Territory,Nigeria            9    0.8472222
##  2:                      Bangkok,Bangkok,Thailand            8    0.7678571
##  3:                         Berlin,Berlin,Germany            9    0.7095353
##  4:                         Berlin,Berlin,Germany           11    0.6211610
##  5:                         Berlin,Berlin,Germany           12    0.6326389
##  6:              Brasilia,Federal District,Brazil            2    0.6707806
##  7:              Brasilia,Federal District,Brazil           12    0.6580882
##  8:             Central Jakarta,Jakarta,Indonesia            1    0.7140146
##  9:             Central Jakarta,Jakarta,Indonesia            9    0.6705729
## 10:                           Hanoi,Hanoi,Vietnam            1    0.6903597
## 11:                           Hanoi,Hanoi,Vietnam            8    0.7653986
## 12:                     Jerusalem District,Israel            3    0.6319643
## 13:                     Jerusalem District,Israel            9    0.7207532
## 14:                        Lisbon,Lisbon,Portugal            2    0.6635659
## 15:                        Lisbon,Lisbon,Portugal           12    0.7352941
## 16:              Madrid,Community of Madrid,Spain            2    0.7318262
## 17:              Madrid,Community of Madrid,Spain            4    0.6654088
## 18:                Mexico City,Mexico City,Mexico            2    0.7403846
## 19:                Mexico City,Mexico City,Mexico            4    0.7572327
## 20:                         New Delhi,Delhi,India            3    0.6162366
## 21:                         New Delhi,Delhi,India           11    0.6927083
## 22:                              Rome,Lazio,Italy            2    0.6135684
## 23:                              Rome,Lazio,Italy           10    0.7454167
## 24:                             Seoul,South Korea            1    0.6685161
## 25:                             Seoul,South Korea            9    0.7189542
## 26:                            Taipei City,Taiwan            8    0.7062075
## 27:                            Taipei City,Taiwan           11    0.6441667
## 28:                             Tokyo,Tokyo,Japan            8    0.6903249
## 29:                             Tokyo,Tokyo,Japan           12    0.5830370
## 30: Washington,District of Columbia,United States            4    0.6845455
## 31: Washington,District of Columbia,United States            8    0.6808511
## 32: Washington,District of Columbia,United States            9    0.7584135
## 33: Washington,District of Columbia,United States           12    0.7416031
##                                           country coding_month mean_quality