#Init

options(digits = 2)
library(pacman)
p_load(kirkegaard, googlesheets, dplyr)
googlesheets::gs_auth()
gs = googlesheets::gs_url("https://docs.google.com/spreadsheets/d/1wlF3Xzco5yCqXsf8SJvFIusOAFrzio13XWQ5bIUDJVc/edit#gid=146436350")
## Sheet-identifying info appears to be a browser URL.
## googlesheets will attempt to extract sheet key from the URL.
## Putative key: 1wlF3Xzco5yCqXsf8SJvFIusOAFrzio13XWQ5bIUDJVc
## Sheet successfully identified: "Personality, temperament, interests and other non-cognitive (PTINC) data"
d = googlesheets::gs_read(gs)
## Accessing worksheet titled 'Data'.
## Parsed with column specification:
## cols(
##   .default = col_integer(),
##   Rater = col_character(),
##   Rated = col_character(),
##   Date = col_date(format = ""),
##   `002a_Honesty-Humility` = col_double(),
##   `002b_H_Sincerity` = col_double(),
##   `002b_H_Fairness` = col_double(),
##   `002b_H_Greed-Avoidance` = col_double(),
##   `002b_H_Modesty` = col_double(),
##   `002a_Emotionality` = col_double(),
##   `002b_E_Fearfulness` = col_double(),
##   `002b_E_Anxiety` = col_double(),
##   `002b_E_Dependence` = col_double(),
##   `002b_E_Sentimentality` = col_double(),
##   `002a_eXtraversion` = col_double(),
##   `002b_X_Social Self-Esteem` = col_double(),
##   `002b_X_Social Boldness` = col_double(),
##   `002b_X_Sociability` = col_double(),
##   `002b_X_Liveliness` = col_double(),
##   `002a_Agreeableness` = col_double(),
##   `002b_A_Forgivingness` = col_double()
##   # ... with 22 more columns
## )
## See spec(...) for full column specifications.

#Compare self-rated vs. Twitter-estimated personality Based on this tool.

#get self-rated data
d %>% filter(Rater == "Emil", is.na(Rated)) %>% df_subset_by_pattern("^001b") %>% na.omit %>% df_t %>% rownames_to_column(var = "trait") -> emil_self_rate
emil_self_rate
##                          trait  1  2  3
## 1          001b_E_Friendliness 28 53 53
## 2        001b_E_Gregariousness 74 93 84
## 3         001b_E_Assertiveness 98 99 98
## 4        001b_E_Activity Level 92 95 75
## 5    001b_E_Excitement-Seeking 60 55 88
## 6          001b_E_Cheerfulness 37 63 88
## 7                 001b_A_Trust 56 74 74
## 8              001b_A_Morality 17 22 17
## 9              001b_A_Altruism 12 20 20
## 10          001b_A_Cooperation 17  1 41
## 11              001b_A_Modesty  8  0  1
## 12             001b_A_Sympathy 54 38 33
## 13        001b_C_Self-Efficacy 93 96 85
## 14          001b_C_Orderliness 36 27 20
## 15          001b_C_Dutifulness 39 51 22
## 16 001b_C_Achievement-Striving 64 93 79
## 17      001b_C_Self-Discipline 69 69 69
## 18         001b_C_Cautiousness 90 95 38
## 19              001b_N_Anxiety 18 22 15
## 20                001b_N_Anger 49 46 18
## 21           001b_N_Depression 23  9 20
## 22   001b_N_Self-Consciousness 28  7 28
## 23         001b_N_Immoderation  7 10 44
## 24        001b_N_Vulnerability 24  9  9
## 25          001b_O_Imagination 94 91 70
## 26   001b_O_Artistic Interests 19  0  1
## 27         001b_O_Emotionality 42 22  4
## 28      001b_O_Adventurousness 76 45 66
## 29            001b_O_Intellect 91 91 88
## 30           001b_O_Liberalism 79 89 63
#average
emil_self_rate$average = df_subset_by_pattern(emil_self_rate, pattern = "\\d") %>%  df_rowFunc(progress = "none")

#twitter
emil_twitter = d %>% filter(Rater == "Emil", is.na(Rated)) %>% df_subset_by_pattern("^010b") %>% na.omit %>% df_t %>% rownames_to_column(var = "trait") %>% `[`(seq_along_rows(emil_self_rate), )

#merge
emil_pers = cbind(emil_self_rate,
                  twitter = emil_twitter$`1`,
                  jens_rated = d %>% filter(Rater == "Arhøj", Rated == "Emil") %>% df_subset_by_pattern("^001b") %>% na.omit %>% df_t %>% unlist,
                  ayn_rated = d %>% filter(Rater == "'Ayn'", Rated == "Emil") %>% df_subset_by_pattern("^001b") %>% na.omit %>% df_t %>% unlist)

#clean names
emil_pers$trait %<>% str_replace(pattern = "\\d+b_\\w_", replacement = "")

#plots
GG_scatter(emil_pers, "average", "twitter", case_names = "trait", check_overlap = F) +
  xlab("Average of 3 self-assessments (across years)") +
  ylab("Estimate based on Twitter activity")

GG_scatter(emil_pers, "average", "jens_rated", case_names = "trait", check_overlap = F) +
  xlab("Average of 3 self-assessments (across years)") +
  ylab("Rated by long-time friend (does not use Twitter)")

GG_scatter(emil_pers, "jens_rated", "twitter", case_names = "trait", check_overlap = F) +
  xlab("Rated by long-time friend (does not use Twitter)") +
  ylab("Estimate based on Twitter activity")

Ayn guesses

#Emil self vs. Ayn rated
GG_scatter(emil_pers, "average", "ayn_rated", case_names = "trait", repel_names = T)

#Jens vs. Ayn ratings
GG_scatter(emil_pers, "jens_rated", "ayn_rated", case_names = "trait", repel_names = T)