#########this is the script containing checks of the updated scaling for the summaries after the irrelevant sections were removed from the summaries checks carried out by EA on 05 MAy 2021

##Step 1 ##load the new file

#######################
scale_new=read_csv("C:/Users/nasta/Dropbox/____Nordface_POst_doc/data/EurLex/Scaled summaries/_________________Adjustment_of_the_scaling_12052021/210512_scaling/210512/210512_lr_proposal-to-act_scaling.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   COD = col_character(),
##   LR_Proposal_Prediction = col_double(),
##   LR_Proposal_Probability = col_double(),
##   LR_Final_Act_Prediction = col_double(),
##   LR_Same_Class = col_logical(),
##   LR_Prob_dif = col_double()
## )
#colnames(scale_new) 

# there are screw ups in the names of the variables ==> LR prediction shouldbe LR_probability 
#LR_same_class should be redone as it is all is by default set to FALSE 
# to do that the dummy with the predictions should be generated using a strict rule: IFF  Lr_prob>05==> LR_prediction=1

scale_new =scale_new%>% 
          rename(cod=COD,
                 LR_final_probability=LR_Final_Act_Prediction)

scale_new=scale_new %>% 
  mutate(LR_Final_prediction=ifelse(LR_final_probability>0.5 ,1,0)) %>%
  mutate(LR_Same_Class= ifelse(LR_Final_prediction==LR_Proposal_Prediction, 'TRUE', 'FALSE'))

##Step 2 load the file with all info on the procedures

##################################Step 2



full_file=read.csv('C:/Users/nasta/Dropbox/____Nordface_POst_doc/data/EurLex/Scaled summaries/_________________Adjustment_of_the_scaling_12052021/210512_scaling/210512/EurLex_CAP_Lobby_Scaling_main_03052021.csv')

# keep only relevant variables==>  CAP, capcode, Celex, committee, lr_scaling positive, match year= indicator of the year proposal submitted
#colnames(full_file)

full_trim=full_file %>% 
 select("cod", "CELEXnumber",  "Final_CAP1", "capcode",
                       "LR_Predictions_posit", "LR_Probabilities_posit" , "SIF_LR_Predictions_posit"   ,     
                       "SIF_LR_Probs_posit",   "SIF_Pos_Similarity_posit" ,  "SIF_Neg_Similarity_posit", 
                       "match_year")

Step 3

#merge old scaling with new one

##################

data=left_join(full_trim, scale_new, by="cod")
 data=data %>%  rename( OLD_LR_Predict_proposal= LR_Predictions_posit, OLD_LR_Probab_proposal= LR_Probabilities_posit)


#import juust summary scaling 

proposal=read.csv('C:/Users/nasta/Dropbox/____Nordface_POst_doc/data/EurLex/Scaled summaries/_________________Adjustment_of_the_scaling_12052021/210512_scaling/210512/210512_legis_sum_scaling.csv')

#colnames(proposal)
proposal$Unnamed..0_x=NULL
proposal=proposal %>% 
  rename(cod=COD) 


data=left_join(data, proposal, by="cod")
#colnames(data)

# drop SIFs for now- kind of useless for us anyways
data_all= data


data$SIF_LR_Predictions_posit=NULL
data$SIF_LR_Probs_posit=NULL
data$SIF_LR_Predictions=NULL
data$SIF_LR_Probs=NULL
data$SIF_Pos_Similarity=NULL
data$SIF_Pos_Similarity_posit=NULL
data$SIF_Neg_Similarity_posit=NULL
data$SIF_Neg_Similarity=NULL
data$LR_SIF_Agree=NULL
data$Unnamed..0_y=NULL

#colnames(data)

#check distributions for the proposal scaling

hist(data$OLD_LR_Probab_proposal)

hist(data$LR_Proposal_Probability)

hist(data$LR_Probabilities_wo.bp)

#replace missing proposal probabilities in the "LR_Proposal_Probability" with the values from 'LR_Predictions_wo.bp'
data= data %>% mutate(LR_Proposal_Probability=ifelse(is.na(LR_Proposal_Probability), LR_Probabilities_wo.bp, LR_Proposal_Probability )) %>% 
              mutate(LR_Proposal_Prediction=ifelse(LR_Proposal_Probability>0.5, 1, 0))

#let’s overlay

#transform into long
data_long=gather(data,  type, probability, LR_Proposal_Probability, LR_final_probability, OLD_LR_Probab_proposal)
#colnames(data_long)

#pudra
lipstick='#FF99CC'
roses='#FFCCFF'
seawater='#009999' 
font="#606060"
berries='#CC99FF'



#LR_Proposal_Probability, LR_final_probability, OLD_LR_Probab_proposal

ggplot(data_long , aes(x=probability))+
  geom_histogram(data = subset(data_long, type=="OLD_LR_Probab_proposal"), aes(fill=type), alpha=1 )+
  geom_histogram(data = subset(data_long, type=="LR_Proposal_Probability"), aes(fill=type), alpha=0.3) +
  #geom_histogram(data = subset(data_long, type=="LR_final_probability"), aes(fill=type), alpha=0.5) +
 scale_fill_manual(name="type", values=c( seawater,  berries),
                    
                   labels=c( "New scaling", "Old Scaling" ))+
  
  theme(legend.title = element_text(color=font, size=12), 
        plot.title = element_text(color=font, size=12, face="italic", hjust=0.5),
        plot.subtitle = element_text(color=font, size=12, face="italic"), 
        axis.title.y = element_blank(), axis.title.x = element_text(color=font, hjust=0))+
  
  theme_minimal()+
  labs(
    title = "Figure 1. Distribution of Pr: OLD vs Adjusted scaling",
    x = "LR Probability",
    y = "Count"  )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).

#New proposal and Final summary scaling

ggplot(data_long , aes(x=probability))+
 
  geom_histogram(data = subset(data_long, type=="LR_Proposal_Probability"), aes(fill=type), alpha=0.3) +
  geom_histogram(data = subset(data_long, type=="LR_final_probability"), aes(fill=type), alpha=0.5) +
  scale_fill_manual(name="type", values=c(  lipstick, seawater),
                    
                    labels=c(  "Final", "New scaling"))+
  
  theme(legend.title = element_text(color=font, size=12), 
        plot.title = element_text(color=font, size=12, face="italic", hjust=0.5),
        plot.subtitle = element_text(color=font, size=12, face="italic"), 
        axis.title.y = element_blank(), axis.title.x = element_text(color=font, hjust=0))+
  
  theme_minimal()+
  labs(
    title = "Figure 2. Scaling for Proposal sumamries and Final sumamries",
    x = "LR Probability",
    y = "Count"  )
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing non-finite values (stat_bin).
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 26 rows containing non-finite values (stat_bin).

##overlay as ridges fro clarity There are some changes in the scaling of the proposal summaries. Slightly more tilt towards SQ.

d=ggplot(data_long, aes(x=probability, y=type,  fill=type,  color=type)) +
  geom_density_ridges(scale = 4) + 
  scale_fill_cyclical(values = c(col1a, col2a, col3)) +
  scale_color_cyclical(values = c(col1,col2, col3)) +
  scale_x_continuous(limits=c(min(data_long$probability ,  na.rm=T),max(data_long$probability,  na.rm=T)), expand = c(0.01, 0)) +
  scale_y_discrete(expand = c(0.001, 0.1)) + labs(title = 'Figure 3. Scaling: OLD, New, and Final summaries', x='LR models')+
  theme_ridges(font_size = 12, grid = TRUE) + theme(plot.margin = unit(c(1,1,1,1), "lines"), 
                                                    
                                                    plot.title = element_text(color=dark.color, size=14, face="italic", hjust=0),
                                                    
                                                    plot.subtitle = element_text(color=dark.color, size=12, face="italic"),
                                                    axis.title.y = element_blank(), axis.title.x = element_text(color=dark.color, hjust=0.5))+
  coord_cartesian(ylim = c(1, 10), clip = 'off') 

print(d)
## Picking joint bandwidth of 0.0515
## Warning: Removed 30 rows containing non-finite values (stat_density_ridges).

#calculate the (average) difference between old and new proposal summary scaling

data= data %>% 
            mutate(proposal_differnece=(LR_Proposal_Probability - OLD_LR_Probab_proposal))

hist(data$proposal_differnece)   # Negative values indicate that New Scaling assigns lower value to the proposal// ==> old over-estimats 

                                # Positive values suggests  New scaling have a higher value==> OLD one underestimates

summary(data$proposal_differnece)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max.      NA's 
## -0.440000 -0.050000  0.010000  0.002991  0.070000  0.410000         3
#



diff=ggplot(data, aes(x=proposal_differnece, y=Final_CAP1,  group=Final_CAP1)) +
  geom_point(alpha=0.5, aes(color =Final_CAP1))+
  
   theme_minimal()+
  theme( plot.title = element_text(color=dark.color, size=9, face="italic", hjust=0.5),
         plot.subtitle = element_text(color=dark.color, size=9), 
         axis.title.y = element_text(size = 9),
         axis.title.x = element_text(color=dark.color, hjust=0.5, size=9))+
  labs(
    title = "Figure 4.Differences in scaling b/n OLD and Updated scaling by CAP",
    x = "Predicted probability :  IF (-) ==> Old Scaling over-estimates ",
    y = " CAP category"
  ) 




diff
## Warning: Removed 3 rows containing missing values (geom_point).

Overall, results suggests that the majority of the cases which had a change to their scaling shifted their location along the dimension to a limited extent==> [-.2, .2]. However, there are also quite a few proposal summaries which altered their scaling more substantially

those should be checked by CAP they are possible cases for re-validation of the proposal summary scaling

# lets' check how many and within which CAP changed the scaling with a change being above 0.2

large_diff=data%>% filter(proposal_differnece>=0.2 | proposal_differnece<=-0.2) %>%
  group_by(Final_CAP1) %>%
 summarise(n=n())     


p=ggplot(large_diff, aes(x=n, y=Final_CAP1,  group=Final_CAP1, size=n)) +
          geom_point(aes(size=n, color=Final_CAP1), alpha=0.5 )+
          theme_minimal()+
                    theme(legend.title = element_text(color=dark.color, size=10.5), 
                         plot.title = element_text(color=dark.color, size=10, face="italic", hjust=0.5),
                         plot.subtitle = element_text(color=dark.color, size=10, face="italic"), 
                          axis.title.y = element_text(size = 8, face='italic'),
                          axis.title.x = element_text(color=dark.color, hjust=0.5, size=8, face='italic'), 
                          legend.text=element_text(size=8), legend.key.size = unit(0.4, 'cm'))+
                     labs(
                    title = "Figure 5. N of procedures with large change (>0.2) in scaling estimates",
                    x = "Count of procedures ",
                    y = " Committee")

               
p+guides(fill=guide_legend(title="CAP", reverse = TRUE))

Draw the samples for validation ==> grab exteme cases in the top 10 % of the distribution, but not a part of the training data// the borderline cases withi0.25 points from 0.5; and the cases where the scaling shifted drastcally (the change >=|0.4|)

extremes=data%>% 
   filter(LR_Proposal_Probability <0.1 | LR_Proposal_Probability >0.90 ) %>% 
  filter( OLD_LR_Probab_proposal>0 &  OLD_LR_Probab_proposal<1)  # exclude the proposal which were in the training set before ==> total 23


#borderline cases

borderline=data%>% 
  filter(LR_Proposal_Probability <0.525 & LR_Proposal_Probability >0.475)  #67 procedures



#bind the valdation set 

validation_set=bind_rows(extremes, borderline)  # 86 observations


#case of extreme magnitude of change   ==> i really think this should be checked - they cannot just jump so much for no reason at all
change=data%>% filter(proposal_differnece >=0.4 | proposal_differnece<=-0.4)


validation_set_full=bind_rows(validation_set, change)
#drop variables irelevant for validation


#validation_set_full= validation_set_full  %>%
   #  select("cod",  "CELEXnumber"  ,"Final_CAP1", "LR_Proposal_Prediction"   )   ##93 proposals to review=> slightly above 10% of the population

#write.csv(validation_set_full, 'C:/Users/nasta/Dropbox/____Nordface_POst_doc/data/EurLex/Scaled #summaries/_________________Adjustment_of_the_scaling_12052021/210512_scaling/210512/Validation/20210512_Validation_set.csv' )   

Scaling VS Validated results : First look

here we compare the newly obtained resutls from scaling vis-a-vis the results of the validation carried out by EA in May 2021

#remove all unnecessary dfs 
#rm(borderline, change, data_long, extremes, full_trim, large_diff, scale_new)
#rm(proposal, validation_set)

library(readxl)

validated=read_excel("C:/Users/nasta/Dropbox/____Nordface_POst_doc/data/EurLex/Scaled summaries/_________________Adjustment_of_the_scaling_12052021/210512_scaling/210512/Validation/20210512_Validation_set_EA_finished_20210518.xlsx")
## New names:
## * `` -> ...1
## * `` -> ...4
## * `` -> ...5
validated=validated %>% select(cod, CELEXnumber, Coder1_EA , Notes_EA, Justification_EA) %>%
                                  rename(celex=CELEXnumber)


#transform the validation notations into a dummy  where (+)==1 and zero otherwise

validated=validated %>% mutate(code_ea=ifelse(Coder1_EA=="(+)", 1 , 0)) 



validation_set_full= validation_set_full %>% select("cod", "CELEXnumber" ,      "OLD_LR_Predict_proposal",
                                              "OLD_LR_Probab_proposal"  ,   "LR_Proposal_Prediction" , "LR_Proposal_Probability",
                                       "LR_final_probability"   , "LR_Same_Class"    ,    "LR_Prob_dif"   , 
                                       "LR_Final_prediction",  "LR_Predictions_wo.bp", "LR_Probabilities_wo.bp","proposal_differnece"  ) %>%                                      rename("celex"="CELEXnumber")
                              

table(validated$code_ea)
## 
##  0  1 
## 51 42
#merge the validated data and the scaling results

validation_merged=left_join(validation_set_full, validated)
## Joining, by = c("cod", "celex")
# re-order in a more clear way
colnames(validation_merged)
##  [1] "cod"                     "celex"                  
##  [3] "OLD_LR_Predict_proposal" "OLD_LR_Probab_proposal" 
##  [5] "LR_Proposal_Prediction"  "LR_Proposal_Probability"
##  [7] "LR_final_probability"    "LR_Same_Class"          
##  [9] "LR_Prob_dif"             "LR_Final_prediction"    
## [11] "LR_Predictions_wo.bp"    "LR_Probabilities_wo.bp" 
## [13] "proposal_differnece"     "Coder1_EA"              
## [15] "Notes_EA"                "Justification_EA"       
## [17] "code_ea"
validation_merged =validation_merged %>%
                    relocate(LR_Final_prediction, .after = LR_Proposal_Probability  ) %>%
                     relocate(code_ea, .after =LR_final_probability )  %>%
                       mutate(Correctly_scaled=ifelse(code_ea == LR_Proposal_Prediction, "TRUE", "FALSE"))                   

library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
NEW=tabyl(validation_merged$Correctly_scaled)   # KKT!  

##lets check how this would look like in comparison with the previous scaling

#lets check how this would look like in comparison with the previous scaling 
validation_merged =validation_merged %>% 
                    mutate(old_vs_validation=ifelse(code_ea == OLD_LR_Predict_proposal, "TRUE", "FALSE"))

OLD=tabyl(validation_merged$old_vs_validation)   # So actually new scaling seems to be performing somewhat better

Let’s print the result of 1) validation vs NEW scaling and 2) validation VS OLD scaling

NEW
##  validation_merged$Correctly_scaled  n   percent
##                               FALSE 21 0.2258065
##                                TRUE 72 0.7741935
OLD
##  validation_merged$old_vs_validation  n   percent
##                                FALSE 27 0.2903226
##                                 TRUE 66 0.7096774

Ok, so seems like new scaling categorises slightly more proposals correctly, when we look at the dummy classification. Let’s check how many false-categorised are VERY close to the boderline value of 0.5 and across CAP

#sample the misclassified cases and their continuous indicators

false_categ=validation_merged %>% filter(Correctly_scaled=='FALSE') 


# grab new cap categry from the eurvoc file and merge
cap_match= read.csv('C:/Users/nasta/Dropbox/____Nordface_POst_doc/data/EurLex/Scaled summaries/_________________Adjustment_of_the_scaling_12052021/210512_scaling/210512/Validation/cap_match.csv')
cap=cap_match %>% select(cod, celex, comm, Final_CAP1, capcode)

false_categ=left_join(false_categ, cap)
## Joining, by = c("cod", "celex")
#table the % of the false classified by CAP

tabyl(false_categ$Final_CAP1)
##  false_categ$Final_CAP1 n    percent
##             Agriculture 1 0.04761905
##             Environment 3 0.14285714
##            EUgovernance 1 0.04761905
##            ForeignTrade 2 0.09523810
##             Immigration 1 0.04761905
##  InternationalRelations 1 0.04761905
##                  Labour 1 0.04761905
##                LawCrime 2 0.09523810
##          Macroeconomics 1 0.04761905
##                  Market 4 0.19047619
##          RegionalPolicy 1 0.04761905
##              Technology 1 0.04761905
##          Transportation 2 0.09523810

Lets plot these and see how it looks like more clearly

false_categ1=false_categ %>% group_by(Final_CAP1, LR_Proposal_Probability ) %>%
                          summarize(n=n()) %>%
                            ungroup()
## `summarise()` has grouped output by 'Final_CAP1'. You can override using the `.groups` argument.
  p=ggplot(false_categ1, aes( x=LR_Proposal_Probability, y=Final_CAP1))+
   geom_point (aes(size=factor(n),  color=Final_CAP1), alpha=0.8 )+
            theme_minimal()+
            theme(axis.text.x = element_text(angle = 45, hjust = 1, size=8))+
                theme( plot.title = element_text(color=font, size=10,  hjust=0.5),
                        axis.title.y = element_text(size = 8),
                       axis.title.x =element_text(size=8))+
   guides(col=guide_legend("CAP"),
           size=guide_legend("N of COD")) +
                       labs(
                      title = "Figure 6. Misslcassified proposals over the predicted probabilities",
                      y = "CAP",
                      x = "Scaled probabilities")
  p
## Warning: Using size for a discrete variable is not advised.

So it looks like most of the misclassified cases area quite on the border-line, which makes it challenging for both HUMANS and the MODEL to categorize. However, 4 somewhat ‘extreme’ cases were missclassified as well. ==> in MARKET, LABOUR, IMMIGR, and LAW. IN addition one, close to a mediocre position from TECH seemed to be misplaced as well.

Finalizing Validation

AK has completed the validation as a second coder. She has reviewed every 5th case, as well as the procedures which EA marked as problematic to categorize. In total, AK has second coded 29 procedures. Out, the coders disagreed on 6 cases. After carrying out additional checks, the conciliation results, EA agrees that procedure ‘2011/0435(COD)’ should be coded as ‘expanding the EU authority’. The result of the disagreements concern borderline cases, which are challenging to definitivey allocate into dichotomous categorization. Overall agreement between the coders: coders agree that the challenging procedures should be scaled somewhere close to the middle of the distribution, considering the conceptual definition of the extremes along the the dimension of interest. The differences in the dummy coding do not underline this point.

Below, I summarize the results of the validation and quickly plot the ‘non-agreement’ procedures along the scaled dimension.

ea_ak= read_excel("Validation/20210512_Validation_set_EA_finished_20210518_AK20210519.xlsx")
## New names:
## * `` -> ...1
## * `` -> ...4
## * `` -> ...5
colnames(ea_ak) 
##  [1] "...1"             "cod"              "CELEXnumber"      "...4"            
##  [5] "...5"             "Coder1_EA"        "Notes_EA"         "Justification_EA"
##  [9] "AK"               "Justification AK"
#do quick transformations of the coding for oth ea and ak entries
ea_ak=ea_ak %>% mutate(code_ea=ifelse(Coder1_EA=="(+)", 1 , 0)) %>%
                              mutate(code_ak=ifelse(AK==1, 1, ifelse(is.na(AK), NA , 0 ))) %>%
                              select( "cod", "CELEXnumber", "code_ea" ,"code_ak", "Notes_EA") %>%
                              mutate(code_ea=ifelse(cod=='2011/0435(COD)', 1 , code_ea)) %>%  # introduce the agreed alteration
                              rename(celex=CELEXnumber )


#merge with the validation set containing the scaling results

final_results=left_join(ea_ak, validation_set_full)
## Joining, by = c("cod", "celex")
#add cap_categories

final_results=left_join(final_results, cap_match)
## Joining, by = c("cod", "celex")
#filter ut useless vars

final_results= final_results %>% select("cod"  ,"celex" ,"code_ea"    ,  "code_ak"   ,    "Notes_EA" ,              
                                      "OLD_LR_Predict_proposal", "OLD_LR_Probab_proposal"  ,
                                      "LR_Proposal_Prediction", "LR_Proposal_Probability", 
                                        "Final_CAP1"   ,           "capcode" )


final_results=final_results %>% mutate(coder_agree=ifelse(code_ea == code_ak, 'TRUE', 'FALSE')) %>% #generating var for coder agreement
   mutate(c1c2_scale_agree=ifelse( LR_Proposal_Prediction == code_ak & code_ak==code_ea & !is.na(code_ak) , 'TRUE', 'FALSE'))%>% 
  
                    mutate(c1c2_scale_agree=ifelse( code_ea==LR_Proposal_Prediction & is.na(code_ak) , 'TRUE', c1c2_scale_agree))

tabyl(final_results$c1c2_scale_agree)  #not too shabby
##  final_results$c1c2_scale_agree  n   percent
##                           FALSE 24 0.2580645
##                            TRUE 69 0.7419355
#checking coder reliability more formally
library(irr)
## Loading required package: lpSolve
final_krip=final_results%>% select("cod" ,"code_ea"    ,  "code_ak"   ) %>% # filter(!is.na(code_ak)) %>%  this can be added if wanna test only codede cases// results will not change
                    pivot_longer(!cod, names_to = 'coders', values_to = 'code')%>%
                    pivot_wider(id_cols = coders, names_from = cod, values_from = code) %>%
                       select(-coders) %>%
                      as.matrix() %>%
                          kripp.alpha(method = "interval")

final_krip  
##  Krippendorff's alpha
## 
##  Subjects = 93 
##    Raters = 2 
##     alpha = 0.649

Let’s plot the case on which EA dna AK disagree

coders_false_categ1=final_results %>% filter(coder_agree=="FALSE")  %>%
                       group_by(Final_CAP1, LR_Proposal_Probability) %>%
                          summarize(n=n()) %>%
                            ungroup()
## `summarise()` has grouped output by 'Final_CAP1'. You can override using the `.groups` argument.
ggplot(coders_false_categ1, aes( x=LR_Proposal_Probability, y=Final_CAP1))+
   geom_point (aes(size=factor(n),  color=Final_CAP1), alpha=0.8 )+
    xlim(0.25, 0.75)+
            theme_minimal()+
            theme(axis.text.x = element_text(angle = 45, hjust = 1, size=8))+
                theme( plot.title = element_text(color=font, size=10,  hjust=0.5),
                        axis.title.y = element_text(size = 8),
                       axis.title.x =element_text(size=8))+
   guides(col=guide_legend("CAP"),
           size=guide_legend("N of COD")) +
                       labs(
                      title = "Figure 7. Proposals on which EA and AK disagree over the predicted probabilities",
                      y = "CAP",
                      x = "Scaled probabilities")
## Warning: Using size for a discrete variable is not advised.

# well if these are not borderline and challenging cases, I don't know which ones are!
#table cases where ea and ak agree against the scaling results
tabyl(final_results$c1c2_scale_agree)
##  final_results$c1c2_scale_agree  n   percent
##                           FALSE 24 0.2580645
##                            TRUE 69 0.7419355
#gen a var that is TRUE if ea and ak agreed, and the ea coding agrees with the scalling

final_results=final_results %>%
                  mutate(agreement=ifelse(code_ea!=LR_Proposal_Prediction, 'False_EA', 'TRUE')) %>%
                  mutate(agreement=ifelse(LR_Proposal_Prediction!=code_ak  & !is.na(code_ak), 'False_AK', agreement))
 tabyl(final_results$agreement)
##  final_results$agreement  n   percent
##                 False_AK 10 0.1075269
##                 False_EA 14 0.1505376
##                     TRUE 69 0.7419355

Different categorisation by coder and model

Here we can see the location of all disagreements between the categorization by each coder, and the model categorization over the probabilities. False_EA= cases where the scaling the EA disagree, while FALSE _AK are the cases where the AK and the model disagree; I plot them over CAP Let’s plot them

full_agreement=final_results %>% filter(agreement=="False_AK" | agreement== "False_EA") %>%
                          group_by(agreement, LR_Proposal_Probability, Final_CAP1 ) %>%
                          summarize(n=n()) %>%
                            ungroup()
## `summarise()` has grouped output by 'agreement', 'LR_Proposal_Probability'. You can override using the `.groups` argument.
ggplot(full_agreement, aes( x=LR_Proposal_Probability, y=Final_CAP1))+
   geom_point (aes(size=factor(n),  color=agreement), alpha=0.5,  position=position_jitter(h=0.15,w=0.015) )+
    xlim(0, 1)+
            theme_minimal()+
            theme(axis.text.x = element_text(angle = 45, hjust = 1, size=8))+
                theme( plot.title = element_text(color=font, size=10,  hjust=0.5),
                        axis.title.y = element_text(size = 8),
                       axis.title.x =element_text(size=8))+
   guides(col=guide_legend("H0: The model is correct"),
           size=guide_legend("N of COD")) +
                       labs(
                      title = "Figure 8. Proposals where coders and model disagree over the predicted probabilities",
                      y = "CAP",
                      x = "Scaled probabilities")
## Warning: Using size for a discrete variable is not advised.

Last plot:

The VAR is only TRUE if EA==AK coding when applicable and otherwise the model agrees with the EA.

 strct_agreement= final_results %>% 
                          group_by(c1c2_scale_agree, LR_Proposal_Probability, Final_CAP1 ) %>%
                          summarize(n=n()) %>%
                            ungroup()
## `summarise()` has grouped output by 'c1c2_scale_agree', 'LR_Proposal_Probability'. You can override using the `.groups` argument.
ggplot( strct_agreement, aes( x=LR_Proposal_Probability, y=Final_CAP1))+
   geom_point (aes(size=factor(n),  color=c1c2_scale_agree), alpha=0.5,  position=position_jitter(h=0.15,w=0.015) )+
    xlim(0, 1)+
            theme_minimal()+
            theme(axis.text.x = element_text(angle = 45, hjust = 1, size=8))+
                theme( plot.title = element_text(color=font, size=10,  hjust=0.5),
                        axis.title.y = element_text(size = 8),
                       axis.title.x =element_text(size=8))+
   guides(col=guide_legend("H0: The model is correct"),
           size=guide_legend("N of COD")) +
                       labs(
                      title = "Figure 9. All proposals: Agreement and disagreement with coders",
                      y = "CAP ",
                      x = "Scaled probabilities")
## Warning: Using size for a discrete variable is not advised.