This document contains the analysis for the paper “The minimum information for the reporting of an ontology (MIRO) guidelines”, submitted to the Journal of Biomedical Semantics (JBMS) in March 2017.

Demographics

fontsize<-16

## Role Analysis
(count(df_results$role_user))
##   x freq
## 1     18
## 2 U   92
(count(df_results$role_developer))
##   x freq
## 1     29
## 2 D   81
(count(df_results$role_review))
##   x freq
## 1     48
## 2 W   62
(count(df_results$role_reader))
##   x freq
## 1     17
## 2 R   93
(count(df_results$role_author))
##   x freq
## 1     35
## 2 A   75
colctrole<-colSums(df_results[c("r_user","r_developer","r_review","r_reader","r_author")])
names(colctrole)<-c("User","Developer","Reviewer","Reader","Author")
colctrole<-melt(colctrole)
colctrole$id<-rownames(colctrole)

ggplot(colctrole,aes(id,value)) +geom_bar(stat = "identity") + xlab("") + ylab("")  + theme(text = element_text(size=fontsize)) + coord_flip() + theme(plot.margin = unit(c(0,0,-0.5,-0.5), "cm"))

ggsave(paste(chartdir,"role.pdf",sep = ""), width = 3, height = 2.5)

### Cross-correlation matrix for role
names(df_role)<-c("Developer","User","Reader","Author","Reviewer")
corMat <- cor(df_role)
kable(corMat)
Developer User Reader Author Reviewer
Developer 1.0000000 0.2930597 0.3149787 0.4329147 0.2223921
User 0.2930597 1.0000000 0.6266743 0.2781847 0.3045090
Reader 0.3149787 0.6266743 1.0000000 0.4098792 0.4352007
Author 0.4329147 0.2781847 0.4098792 1.0000000 0.4615392
Reviewer 0.2223921 0.3045090 0.4352007 0.4615392 1.0000000
melted_cormat <- melt(corMat)

ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) + geom_tile()+ xlab("") + ylab("") + scale_fill_gradient(low="white", high="black") + theme(text = element_text(size=fontsize)) + theme(plot.margin = unit(c(0,0,-0.5,0), "cm"))

ggsave(paste(chartdir,"role_cormat.pdf",sep = ""), width = 7, height = 2.5)

### Combinations of ontology roles
ct_r<-count(df_results$role)
ct_r$pc<-round(ct_r$freq/nrow(df_results)*100,2)
kable(ct_r[order(-ct_r$freq),])
x freq pc
11 DURAW 48 43.64
10 DURA 13 11.82
9 DUR 9 8.18
1 5 4.55
17 UR 5 4.55
20 URW 5 4.55
19 URAW 4 3.64
3 D 3 2.73
16 U 3 2.73
2 A 2 1.82
5 DRA 2 1.82
15 RAW 2 1.82
18 URA 2 1.82
4 DA 1 0.91
6 DRW 1 0.91
7 DU 1 0.91
8 DUA 1 0.91
12 DURW 1 0.91
13 DW 1 0.91
14 R 1 0.91
##TDL Analysis
ct_tdl<-count(df_results$tdl)
ct_tdl<-ct_tdl[order(-ct_tdl$freq),]

ggplot(ct_tdl,aes(reorder(x,freq),freq)) +geom_bar(stat = "identity") + xlab("") + ylab("")  + theme(text = element_text(size=fontsize)) + theme(plot.margin = unit(c(0,0,-0.5,0), "cm"))

ggsave(paste(chartdir,"tdl.pdf",sep = ""), width = 6, height = 2.5)

##Job analysis
ct<-count(df_results$job)
ggplot(ct,aes(x=reorder(x,freq),y=freq)) + geom_bar(stat="identity") + ylab("") + xlab("") + coord_flip() + theme(text = element_text(size=fontsize)) + theme(plot.margin = unit(c(0,0,-0.5,-0.5), "cm"))

ggsave(paste(chartdir,"job.pdf",sep = ""), width = 4, height = 2.5)


write.csv(corMat,file = "correlation_matrix_usergroups.csv")

Overall ranking

##Create ranking by mean
df<-df_results[ , grepl( "^o_" , names( df_results )) ]
df$id<-df_results$id

df_long<-melt(df,id.vars="id")
df_long<-merge(df_long,df_results[ , c("id","r_developer","r_user","r_author","r_review","r_reader","role") ],by="id")

df_long<-merge(df_long,dfl[c("col","label")],by.x = "variable",by.y = "col")
df_long$label<-as.factor(df_long$label)

df_long_agg<-aggregate(df_long$value,by=list(df_long$label),FUN=function(x) mean(x, na.rm=TRUE))
df_long_agg<-df_long_agg[order(-df_long_agg$x),]
df_long$label <- factor(df_long$label, levels = df_long_agg$Group.1)

#head(df_long[c("variable","value")])

###Ordered mean rating of information items across all participants
ggplot(df_long,aes(x=label,y=value)) + stat_summary(fun.y = mean, geom = "bar") + geom_hline(yintercept = 5)+ ylab("Mean Rating") + xlab("")+ geom_hline(yintercept = 3.0) + geom_hline(yintercept = 3.5) + coord_flip(ylim=c(2.6,5)) + theme(text = element_text(size=10)) + theme(plot.margin = unit(c(0,0,-0.5,-0.5), "cm")) +   geom_text(aes(x=29.81, y=3.1,label = "SHOULD"), hjust = 0, vjust = 0, size=2.8) +   geom_text(aes(x=29.81, y=2.5,label = "OPTIONAL"), hjust = 0, vjust = 0, size=2.8,colour="white") +  geom_text(aes(x=29.81, y=4.1,label = "MUST"), hjust = 0, vjust = 0, size=2.8)
## Warning: Removed 38 rows containing non-finite values (stat_summary).

ggsave(paste(chartdir,"overall_mean_results.pdf",sep = ""), width = 6, height = 6)
## Warning: Removed 38 rows containing non-finite values (stat_summary).
## Rankings across roles
df_long_agg<-aggregate_by_role(df_long)
write.csv(df_long_agg,file = "priority_by_group.csv")
options(scipen = 100)
df_long_agg<-df_long_agg[order(df_long_agg$sd.all),]
kable(df_long_agg[c("ontology_feature","All","mean.all","median.all","sd.all")])
ontology_feature All mean.all median.all sd.all
1 Basics: Ontology URL 1 4.715596 5 0.6816611
2 Basics: Ontology name 2 4.706422 5 0.6979169
4 Basics: Ontology license 4 4.504587 5 0.7890906
6 SRD: Scope and coverage 6 4.148148 4 0.8407253
25 SRD: Development community 25 3.768518 4 0.8604116
3 Basics: Ontology owner 3 4.527778 5 0.8696152
7 Content: Ontology relationships 7 4.128440 4 0.8829760
9 Content: Incorporation of other ontologies 9 4.091743 4 0.9481101
13 Motivation: Target audience 13 3.944954 4 0.9606601
24 Content: Axiom patterns 24 3.796296 4 0.9644064
5 QA: Examples of use 5 4.192661 5 0.9857979
14 KA: Knowledge acquisition methodology 14 3.926605 4 0.9879498
16 Content: Entity metadata policy 16 3.889908 4 1.0214319
8 Content: KR language 8 4.110092 4 1.0304570
17 Content: Upper ontology 17 3.880734 4 1.0339136
22 Change: Versioning policy 23 3.798165 4 1.0344064
18 QA: Testing 18 3.871560 4 1.0372767
28 KA: Content selection 28 3.379630 4 1.0386691
26 Content: Entity naming convention 26 3.743119 4 1.0399757
10 Basics: Ontology repository 10 4.009174 4 1.0407922
21 Change: Entity deprecation strategy 21 3.834862 4 1.0673068
12 Motivation: Competition 12 3.963303 4 1.0708823
20 Motivation: Need 20 3.851852 4 1.0835630
19 Content: Identifier generation policy 19 3.862385 4 1.0841270
11 QA: Evaluation 11 3.990826 4 1.0843620
23 SRD: Communication 22 3.798165 4 1.0867878
15 Change: Sustainability plan 15 3.889908 4 1.0915454
29 KA: Source knowledge location 29 3.357798 3 1.0931007
27 Content: Ontology metrics 27 3.422018 3 1.1808156
30 Content: Development environment 30 2.878505 3 1.3010176
#print(xtable(df_long_agg[c("ontology_feature","All","mean.all","median.all","sd.all")],digits=c(0,0,0,2,0,2)),include.rownames=FALSE)

### Correlation of standard deviation and mean
kable(cor(df_long_agg[c("sd.all","mean.all")]))
sd.all mean.all
sd.all 1.0000000 -0.8458595
mean.all -0.8458595 1.0000000
### Ranking table
dfl_i<-df_long_agg[c("ontology_feature","All","Author","Developer","Reviewer","User","Reader")]
dfl_i<-dfl_i[order(dfl_i$All),]
kable(dfl_i)
ontology_feature All Author Developer Reviewer User Reader
1 Basics: Ontology URL 1 2 2 2 2 2
2 Basics: Ontology name 2 1 1 1 1 1
3 Basics: Ontology owner 3 3 3 3 3 3
4 Basics: Ontology license 4 4 4 4 4 4
5 QA: Examples of use 5 5 8 5 5 5
6 SRD: Scope and coverage 6 8 6 6 8 6
7 Content: Ontology relationships 7 7 7 7 7 7
8 Content: KR language 8 6 5 9 9 8
9 Content: Incorporation of other ontologies 9 10 9 8 6 9
10 Basics: Ontology repository 10 14 12 10 10 11
11 QA: Evaluation 11 9 14 11 11 12
12 Motivation: Competition 12 12 11 13 13 14
13 Motivation: Target audience 13 15 13 12 15 13
14 KA: Knowledge acquisition methodology 14 21 16 20 17 20
15 Change: Sustainability plan 15 13 10 16 12 10
16 Content: Entity metadata policy 16 17 17 17 18 21
17 Content: Upper ontology 17 11 20 18 21 15
18 QA: Testing 18 16 24 14 16 19
19 Content: Identifier generation policy 19 25 18 19 19 17
20 Motivation: Need 20 20 22 15 22 18
21 Change: Entity deprecation strategy 21 18 15 23 14 16
23 SRD: Communication 22 22 19 22 23 24
22 Change: Versioning policy 23 24 21 24 20 22
24 Content: Axiom patterns 24 23 25 21 24 23
25 SRD: Development community 25 19 23 25 25 25
26 Content: Entity naming convention 26 26 26 26 26 26
27 Content: Ontology metrics 27 28 28 27 27 27
28 KA: Content selection 28 29 27 28 28 28
29 KA: Source knowledge location 29 27 29 29 29 29
30 Content: Development environment 30 30 30 30 30 30
thresh<-4
dfl_i_dev<-dfl_i
#dfl_i_dev$author_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Author)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Author)<=-thresh,"more",""))
#dfl_i_dev$developer_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Developer)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Developer)<=-thresh,"more",""))
#dfl_i_dev$reviewer_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Reviewer)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Reviewer)<=-thresh,"more",""))
#dfl_i_dev$user_d<-ifelse((dfl_i_dev$All-dfl_i_dev$User)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$User)<=-thresh,"more",""))
#dfl_i_dev$reader_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Reader)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Reader)<=-thresh,"more",""))
dfl_i_dev$author_d<-dfl_i_dev$All-dfl_i_dev$Author
dfl_i_dev$developer_d<-dfl_i_dev$All-dfl_i_dev$Developer
dfl_i_dev$reviewer_d<-dfl_i_dev$All-dfl_i_dev$Reviewer
dfl_i_dev$user_d<-dfl_i_dev$All-dfl_i_dev$User
dfl_i_dev$reader_d<-dfl_i_dev$All-dfl_i_dev$Reader
kable(dfl_i_dev)
ontology_feature All Author Developer Reviewer User Reader author_d developer_d reviewer_d user_d reader_d
1 Basics: Ontology URL 1 2 2 2 2 2 -1 -1 -1 -1 -1
2 Basics: Ontology name 2 1 1 1 1 1 1 1 1 1 1
3 Basics: Ontology owner 3 3 3 3 3 3 0 0 0 0 0
4 Basics: Ontology license 4 4 4 4 4 4 0 0 0 0 0
5 QA: Examples of use 5 5 8 5 5 5 0 -3 0 0 0
6 SRD: Scope and coverage 6 8 6 6 8 6 -2 0 0 -2 0
7 Content: Ontology relationships 7 7 7 7 7 7 0 0 0 0 0
8 Content: KR language 8 6 5 9 9 8 2 3 -1 -1 0
9 Content: Incorporation of other ontologies 9 10 9 8 6 9 -1 0 1 3 0
10 Basics: Ontology repository 10 14 12 10 10 11 -4 -2 0 0 -1
11 QA: Evaluation 11 9 14 11 11 12 2 -3 0 0 -1
12 Motivation: Competition 12 12 11 13 13 14 0 1 -1 -1 -2
13 Motivation: Target audience 13 15 13 12 15 13 -2 0 1 -2 0
14 KA: Knowledge acquisition methodology 14 21 16 20 17 20 -7 -2 -6 -3 -6
15 Change: Sustainability plan 15 13 10 16 12 10 2 5 -1 3 5
16 Content: Entity metadata policy 16 17 17 17 18 21 -1 -1 -1 -2 -5
17 Content: Upper ontology 17 11 20 18 21 15 6 -3 -1 -4 2
18 QA: Testing 18 16 24 14 16 19 2 -6 4 2 -1
19 Content: Identifier generation policy 19 25 18 19 19 17 -6 1 0 0 2
20 Motivation: Need 20 20 22 15 22 18 0 -2 5 -2 2
21 Change: Entity deprecation strategy 21 18 15 23 14 16 3 6 -2 7 5
23 SRD: Communication 22 22 19 22 23 24 0 3 0 -1 -2
22 Change: Versioning policy 23 24 21 24 20 22 -1 2 -1 3 1
24 Content: Axiom patterns 24 23 25 21 24 23 1 -1 3 0 1
25 SRD: Development community 25 19 23 25 25 25 6 2 0 0 0
26 Content: Entity naming convention 26 26 26 26 26 26 0 0 0 0 0
27 Content: Ontology metrics 27 28 28 27 27 27 -1 -1 0 0 0
28 KA: Content selection 28 29 27 28 28 28 -1 1 0 0 0
29 KA: Source knowledge location 29 27 29 29 29 29 2 0 0 0 0
30 Content: Development environment 30 30 30 30 30 30 0 0 0 0 0
### Ranking of information items broken down by roles
df_long_agg_long<-melt(dfl_i,id.vars="ontology_feature")
df_long_agg_long$ontology_feature <- factor(df_long_agg_long$ontology_feature, levels = dfl_i$ontology_feature)
ggplot(df_long_agg_long,aes(x=ontology_feature,y=value,group=variable,fill=variable)) + geom_bar(stat = "identity",position = "dodge") + coord_flip()

ggsave(paste(chartdir,"overall_ranking_by_role.pdf",sep = ""), width = 4, height = 2.5)

Comment Analysis

df<-df_comments[ , grepl( "_code[2-6]?$" , names( df_comments )) ]
#df$id<-df_comments$id

kw<-c()

for(i in 1:nrow(df)) {
  kw<-c(kw,unique(as.character(df[i,])))
}

ct_key<-count(kw)
kable(ct_key[order(-ct_key$freq),])
x freq
1 110
27 coverage 17
154 use_case_suitability 17
51 example_use 10
148 unclassified 10
2 active_development 8
156 user_community 6
14 compatibility_other_ontologies 5
16 competition 5
50 evidence_use 5
68 interoperability 4
119 quality 4
134 scope 4
42 domain 3
69 issue_tracking 3
86 logically_sound 3
92 motivated_editors 3
161 version_number 3
9 citation 2
20 consistency 2
24 content 2
33 definitions 2
35 dereferenceble_uris 2
40 documentation_quality 2
54 expressivity 2
64 inference 2
81 language 2
93 name 2
97 obo_principles 2
106 open_source 2
110 owl_profile 2
117 qa_tools 2
118 qa_toomuch_inbeginning 2
127 representational_adequacy 2
132 requirements_explicit 2
138 surveytool_problem 2
139 sustainability 2
149 update_request_process 2
153 use_case_adjustability 2
164 visualisation 2
167 we_need_to_talk 2
3 all_new_ontologies_at_icbo 1
4 all_requirements_toomuch 1
5 availability 1
6 available_formats 1
7 available_imports 1
8 change_on_demand 1
10 classification 1
11 coherency 1
12 commit_count 1
13 compatibility_ontologies 1
15 competency_questions 1
17 comprehensibility 1
18 configuration_optionals 1
19 connect_database 1
21 consistent 1
22 consistent_content 1
23 construct_frequency 1
25 content_maybe_toomuch 1
26 content_self_descriptive 1
28 cqs 1
29 data_capture_support 1
30 data_migration_support 1
31 dataset_alignment 1
32 defined_dependencies 1
34 deprecation_management 1
36 dereferencibility 1
37 development_priority_management 1
38 difficulty_use 1
39 documentation 1
41 doi 1
43 editor_info 1
44 email_list 1
45 evaluation_crucial 1
46 everything_important 1
47 evidence_added_value 1
48 evidence_application 1
49 evidence_interoperability 1
52 examples_more_important 1
53 explore_full_ontology 1
55 feature_scope_depends_audience 1
56 features_depend_on_usergroup 1
57 governance_process_scope_requirements_change 1
58 granularity 1
59 hierarchy 1
60 homepage 1
61 human_readable_descriptions 1
62 identfier_generation_policy_toomuch 1
63 imports_versioning 1
65 institution_endorsement 1
66 interdisciplinary 1
67 interesting_inferences 1
70 justification_deprecation 1
71 justification_development 1
72 justification_prioritisation 1
73 justification_requirements 1
74 justification_scope 1
75 justification_upper_ontology 1
76 justifications_changes 1
77 justify_modelling_decisions 1
78 ka_document_source 1
79 ka_full_disclosure_toomuch 1
80 ka_interest_depends_on_user 1
82 large_database_use 1
83 last_update_date 1
84 lessons_learnt 1
85 location 1
87 lots_of_metadata 1
88 metadata_vocabulary_use 1
89 metamodel_patterns_toomuch 1
90 minimal_dependencies 1
91 most_evaluations_inadequate 1
94 name_experts 1
95 not_one_off 1
96 number_projects 1
98 ontoclean 1
99 ontology_information_standard 1
100 ontology_location 1
101 ontology_migration_support 1
102 ontology_parts_utilisation_application 1
103 ontology_unavailable 1
104 ontology_visualisation 1
105 ontology_vs_vocabulary 1
107 orthogonality 1
108 out_of_box_ontology_website 1
109 outreach_competition 1
111 problems_solved 1
112 property_selection_strategy 1
113 prov_provenance_features 1
114 public_diff 1
115 purpose 1
116 purpose_explicit 1
120 raw_data_location_toomuch 1
121 realist_vs_application 1
122 reasoning_time 1
123 relation_to_other_ontologies 1
124 release_date 1
125 release_frequency 1
126 report_issue_tracker 1
128 represents_reality 1
129 requirements 1
130 requirements_analysis 1
131 requirements_dishonest 1
133 rich_entity_annotations 1
135 stakeholder_motivation 1
136 standardisation 1
137 support 1
140 target_audience 1
141 tawny_owl 1
142 tool_explore_ontology 1
143 tools_application 1
144 tools_changemanagement 1
145 tools_development 1
146 tools_using_ontology 1
147 tradeoff_performance_representation 1
150 update_strategy 1
151 updatecycles_toomuch_if_new 1
152 url_ontology 1
155 use_of_inference_manage_classification 1
157 user_friendly 1
158 user_involvement_development 1
159 vann_uri 1
160 version_history_plus_metadata 1
162 versioning 1
163 versions_imports 1
165 w3c_recommendation_annotation 1
166 w3c_recommendations 1
df_comments_important<-df_comments[ , grepl( "important_comments_code[2-6]?$" , names( df_comments )) ]

keywords<-c()

for(i in 1:nrow(df)) {
  keywords<-c(keywords,unique(as.character(df[i,])))
}

keywords<-keywords[keywords != ""];

ct_comments_all_key<-count(keywords)
kable(ct_comments_all_key[order(-ct_comments_all_key$freq),])
x freq
26 coverage 17
153 use_case_suitability 17
50 example_use 10
147 unclassified 10
1 active_development 8
155 user_community 6
13 compatibility_other_ontologies 5
15 competition 5
49 evidence_use 5
67 interoperability 4
118 quality 4
133 scope 4
41 domain 3
68 issue_tracking 3
85 logically_sound 3
91 motivated_editors 3
160 version_number 3
8 citation 2
19 consistency 2
23 content 2
32 definitions 2
34 dereferenceble_uris 2
39 documentation_quality 2
53 expressivity 2
63 inference 2
80 language 2
92 name 2
96 obo_principles 2
105 open_source 2
109 owl_profile 2
116 qa_tools 2
117 qa_toomuch_inbeginning 2
126 representational_adequacy 2
131 requirements_explicit 2
137 surveytool_problem 2
138 sustainability 2
148 update_request_process 2
152 use_case_adjustability 2
163 visualisation 2
166 we_need_to_talk 2
2 all_new_ontologies_at_icbo 1
3 all_requirements_toomuch 1
4 availability 1
5 available_formats 1
6 available_imports 1
7 change_on_demand 1
9 classification 1
10 coherency 1
11 commit_count 1
12 compatibility_ontologies 1
14 competency_questions 1
16 comprehensibility 1
17 configuration_optionals 1
18 connect_database 1
20 consistent 1
21 consistent_content 1
22 construct_frequency 1
24 content_maybe_toomuch 1
25 content_self_descriptive 1
27 cqs 1
28 data_capture_support 1
29 data_migration_support 1
30 dataset_alignment 1
31 defined_dependencies 1
33 deprecation_management 1
35 dereferencibility 1
36 development_priority_management 1
37 difficulty_use 1
38 documentation 1
40 doi 1
42 editor_info 1
43 email_list 1
44 evaluation_crucial 1
45 everything_important 1
46 evidence_added_value 1
47 evidence_application 1
48 evidence_interoperability 1
51 examples_more_important 1
52 explore_full_ontology 1
54 feature_scope_depends_audience 1
55 features_depend_on_usergroup 1
56 governance_process_scope_requirements_change 1
57 granularity 1
58 hierarchy 1
59 homepage 1
60 human_readable_descriptions 1
61 identfier_generation_policy_toomuch 1
62 imports_versioning 1
64 institution_endorsement 1
65 interdisciplinary 1
66 interesting_inferences 1
69 justification_deprecation 1
70 justification_development 1
71 justification_prioritisation 1
72 justification_requirements 1
73 justification_scope 1
74 justification_upper_ontology 1
75 justifications_changes 1
76 justify_modelling_decisions 1
77 ka_document_source 1
78 ka_full_disclosure_toomuch 1
79 ka_interest_depends_on_user 1
81 large_database_use 1
82 last_update_date 1
83 lessons_learnt 1
84 location 1
86 lots_of_metadata 1
87 metadata_vocabulary_use 1
88 metamodel_patterns_toomuch 1
89 minimal_dependencies 1
90 most_evaluations_inadequate 1
93 name_experts 1
94 not_one_off 1
95 number_projects 1
97 ontoclean 1
98 ontology_information_standard 1
99 ontology_location 1
100 ontology_migration_support 1
101 ontology_parts_utilisation_application 1
102 ontology_unavailable 1
103 ontology_visualisation 1
104 ontology_vs_vocabulary 1
106 orthogonality 1
107 out_of_box_ontology_website 1
108 outreach_competition 1
110 problems_solved 1
111 property_selection_strategy 1
112 prov_provenance_features 1
113 public_diff 1
114 purpose 1
115 purpose_explicit 1
119 raw_data_location_toomuch 1
120 realist_vs_application 1
121 reasoning_time 1
122 relation_to_other_ontologies 1
123 release_date 1
124 release_frequency 1
125 report_issue_tracker 1
127 represents_reality 1
128 requirements 1
129 requirements_analysis 1
130 requirements_dishonest 1
132 rich_entity_annotations 1
134 stakeholder_motivation 1
135 standardisation 1
136 support 1
139 target_audience 1
140 tawny_owl 1
141 tool_explore_ontology 1
142 tools_application 1
143 tools_changemanagement 1
144 tools_development 1
145 tools_using_ontology 1
146 tradeoff_performance_representation 1
149 update_strategy 1
150 updatecycles_toomuch_if_new 1
151 url_ontology 1
154 use_of_inference_manage_classification 1
156 user_friendly 1
157 user_involvement_development 1
158 vann_uri 1
159 version_history_plus_metadata 1
161 versioning 1
162 versions_imports 1
164 w3c_recommendation_annotation 1
165 w3c_recommendations 1
df_comments_important<-df_comments[ , grepl( "important_comments_code[2-6]?$" , names( df_comments )) ]

keywords<-c()

for(i in 1:nrow(df_comments_important)) {
  keywords<-c(keywords,unique(as.character(df_comments_important[i,])))
}

keywords<-keywords[keywords != ""];
ct_comments_important_key<-count(keywords)
kable(ct_comments_important_key[order(-ct_comments_important_key$freq),])
x freq
12 coverage 17
51 use_case_suitability 15
7 compatibility_other_ontologies 5
52 user_community 5
1 active_development 4
29 interoperability 4
46 scope 4
21 domain 3
24 evidence_use 3
32 logically_sound 3
34 motivated_editors 3
42 quality 3
11 content 2
16 definitions 2
17 dereferenceble_uris 2
20 documentation_quality 2
30 language 2
37 obo_principles 2
39 open_source 2
44 representational_adequacy 2
50 use_case_adjustability 2
2 availability 1
3 change_on_demand 1
4 classification 1
5 coherency 1
6 commit_count 1
8 competition 1
9 comprehensibility 1
10 consistent_content 1
13 data_capture_support 1
14 data_migration_support 1
15 defined_dependencies 1
18 difficulty_use 1
19 documentation 1
22 evidence_added_value 1
23 evidence_application 1
25 granularity 1
26 hierarchy 1
27 institution_endorsement 1
28 interdisciplinary 1
31 large_database_use 1
33 minimal_dependencies 1
35 not_one_off 1
36 number_projects 1
38 ontology_visualisation 1
40 property_selection_strategy 1
41 purpose_explicit 1
43 realist_vs_application 1
45 represents_reality 1
47 standardisation 1
48 sustainability 1
49 update_strategy 1
53 user_friendly 1
ct_comments_important_key$group<-"ungrouped"

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="coverage"|ct_comments_important_key$x=="scope"|ct_comments_important_key$x=="interdisciplinary"|ct_comments_important_key$x=="granularity","Scope and Coverage","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="compatibility_other_ontologies"|ct_comments_important_key$x=="interoperability","Interoperability","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="use_case_suitability"|ct_comments_important_key$x=="use_case_adjustability"|ct_comments_important_key$x=="purpose_explicit","Use Case","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="user_community"|ct_comments_important_key$x=="motivated_editors"|ct_comments_important_key$x=="active_development"|ct_comments_important_key$x=="change_on_demand"|ct_comments_important_key$x=="not_one_off"|ct_comments_important_key$x=="commit_count"|ct_comments_important_key$x=="institution_endorsement","Active Community","ungrouped"),ct_comments_important_key$group)


ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="evidence_added_value"|ct_comments_important_key$x=="evidence_application"|ct_comments_important_key$x=="number_projects"|ct_comments_important_key$x=="evidence_use"|ct_comments_important_key$x=="large_database_use","Evidence for use","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="domain"|ct_comments_important_key$x=="content"|ct_comments_important_key$x=="representational_adequacy"|ct_comments_important_key$x=="represents_reality"|ct_comments_important_key$x=="realist_vs_application"|ct_comments_important_key$x=="consistent_content"|ct_comments_important_key$x=="coherency","Content","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="definitions"|ct_comments_important_key$x=="documentation_quality"|ct_comments_important_key$x=="documentation"|ct_comments_important_key$x=="ontology_visualisation"|ct_comments_important_key$x=="language","Metadata and Documentation","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="obo_principles"|ct_comments_important_key$x=="availability"|ct_comments_important_key$x=="open_source"|ct_comments_important_key$x=="standardisation"|ct_comments_important_key$x=="sustainability"|ct_comments_important_key$x=="dereferenceble_uris"|ct_comments_important_key$x=="update_strategy","Publishing and Life Cycle","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="competition"|ct_comments_important_key$x=="quality","Other","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="hierarchy"|ct_comments_important_key$x=="classification"|ct_comments_important_key$x=="defined_dependencies"|ct_comments_important_key$x=="minimal_dependencies"|ct_comments_important_key$x=="property_selection_strategy"|ct_comments_important_key$x=="logically_sound","Representation","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="user_friendly"|ct_comments_important_key$x=="comprehensibility"|ct_comments_important_key$x=="difficulty_use"|ct_comments_important_key$x=="data_capture_support"|ct_comments_important_key$x=="data_migration_support","Usability","ungrouped"),ct_comments_important_key$group)

agg<-aggregate(ct_comments_important_key$freq,by=list(ct_comments_important_key$group),sum)
agg<-agg[order(-agg$x),]
#print(xtable(agg,digits=c(0,0,0)),include.rownames=FALSE)

keywords_other<-kw[!(kw %in% keywords)]

ct_comments_other_key<-count(keywords_other)
kable(ct_comments_other_key[order(-ct_comments_other_key$freq),])
x freq
1 110
27 example_use 10
100 unclassified 10
40 issue_tracking 3
108 version_number 3
6 citation 2
11 consistency 2
30 expressivity 2
38 inference 2
59 name 2
71 owl_profile 2
76 qa_tools 2
77 qa_toomuch_inbeginning 2
87 requirements_explicit 2
91 surveytool_problem 2
101 update_request_process 2
111 visualisation 2
114 we_need_to_talk 2
2 all_new_ontologies_at_icbo 1
3 all_requirements_toomuch 1
4 available_formats 1
5 available_imports 1
7 compatibility_ontologies 1
8 competency_questions 1
9 configuration_optionals 1
10 connect_database 1
12 consistent 1
13 construct_frequency 1
14 content_maybe_toomuch 1
15 content_self_descriptive 1
16 cqs 1
17 dataset_alignment 1
18 deprecation_management 1
19 dereferencibility 1
20 development_priority_management 1
21 doi 1
22 editor_info 1
23 email_list 1
24 evaluation_crucial 1
25 everything_important 1
26 evidence_interoperability 1
28 examples_more_important 1
29 explore_full_ontology 1
31 feature_scope_depends_audience 1
32 features_depend_on_usergroup 1
33 governance_process_scope_requirements_change 1
34 homepage 1
35 human_readable_descriptions 1
36 identfier_generation_policy_toomuch 1
37 imports_versioning 1
39 interesting_inferences 1
41 justification_deprecation 1
42 justification_development 1
43 justification_prioritisation 1
44 justification_requirements 1
45 justification_scope 1
46 justification_upper_ontology 1
47 justifications_changes 1
48 justify_modelling_decisions 1
49 ka_document_source 1
50 ka_full_disclosure_toomuch 1
51 ka_interest_depends_on_user 1
52 last_update_date 1
53 lessons_learnt 1
54 location 1
55 lots_of_metadata 1
56 metadata_vocabulary_use 1
57 metamodel_patterns_toomuch 1
58 most_evaluations_inadequate 1
60 name_experts 1
61 ontoclean 1
62 ontology_information_standard 1
63 ontology_location 1
64 ontology_migration_support 1
65 ontology_parts_utilisation_application 1
66 ontology_unavailable 1
67 ontology_vs_vocabulary 1
68 orthogonality 1
69 out_of_box_ontology_website 1
70 outreach_competition 1
72 problems_solved 1
73 prov_provenance_features 1
74 public_diff 1
75 purpose 1
78 raw_data_location_toomuch 1
79 reasoning_time 1
80 relation_to_other_ontologies 1
81 release_date 1
82 release_frequency 1
83 report_issue_tracker 1
84 requirements 1
85 requirements_analysis 1
86 requirements_dishonest 1
88 rich_entity_annotations 1
89 stakeholder_motivation 1
90 support 1
92 target_audience 1
93 tawny_owl 1
94 tool_explore_ontology 1
95 tools_application 1
96 tools_changemanagement 1
97 tools_development 1
98 tools_using_ontology 1
99 tradeoff_performance_representation 1
102 updatecycles_toomuch_if_new 1
103 url_ontology 1
104 use_of_inference_manage_classification 1
105 user_involvement_development 1
106 vann_uri 1
107 version_history_plus_metadata 1
109 versioning 1
110 versions_imports 1
112 w3c_recommendation_annotation 1
113 w3c_recommendations 1

Systematic review of MIRO compliance

paper_metadata<-c("Name","URL","Domain","Language","License","Repository")
df_paper_metadata<-df_paper_results[paper_metadata]
df_paper_coding<-df_paper_results[, !names(df_paper_results) %in% paper_metadata]
latex_paper_metadata<-df_paper_metadata[c("Name","Domain","URL")]
#print(xtable(latex_paper_metadata,digits=c(0,0,0,0)),include.rownames=FALSE)

df_coding_melt<-melt(df_paper_coding,id.vars = "Code")
df_coding_melt$value<-as.factor(df_coding_melt$value)
ggplot(df_coding_melt[df_coding_melt$variable %in% c("Ontology.name","Ontology.owner","OBO.Principles"),],aes(x=value,fill=value)) + geom_bar()+facet_wrap("variable")+ theme_bw() + scale_fill_grey()

ggplot(df_coding_melt[!(df_coding_melt$variable %in% c("Ontology.name","Ontology.owner","OBO.Principles")),],aes(x=value,fill=value)) + geom_bar()+facet_wrap("variable",ncol = 3) + theme_bw() + scale_fill_grey()

ggsave(paste(chartdir,"results_coding.pdf",sep = ""), width = 8, height = 10)
df_coding_melt$value<-as.numeric(as.character(df_coding_melt$value))
agg_compliance<-aggregate(df_coding_melt$value,by=list(df_coding_melt$variable),FUN=mean)
agg_compliance$compliance<-ifelse(agg_compliance$Group.1=="Ontology.name"|agg_compliance$Group.1=="Ontology.owner",agg_compliance$x/2,agg_compliance$x);
agg_compliance$compliance<-round(agg_compliance$compliance*100,2)
names(agg_compliance)<-c("MIRO item","x","Compliance")
write.csv(file = "agg_compliance.csv",agg_compliance)

# AT THIS POINT, A DOCUMENT WAS CREATED MANUALLY WITH THE VALUES FOR COMPLIANCE AND RATINGS, AS MANUALLY MATCHING THEM TURNED OUT IMPOSSIBLE. FOR THAT, agg_compliance and priority_by_group (the mean_all column only) where merged together.
df_cvr<-read.csv(file=ratingvcompliance_f,head=TRUE,sep=",",stringsAsFactors = FALSE)

Compliance vs Ratings

df_cvr$rating_cat<-ifelse(df_cvr$rating<3,"Optional",ifelse(df_cvr$rating<3.5,"Should","Must"))
df_cvr$compliance_cat<-ifelse(df_cvr$compliance<20,"Very Low",ifelse(df_cvr$compliance<50,"Low",ifelse(df_cvr$compliance<80,"Medium","High")))
df_cvr$cat<-paste(substring(df_cvr$rating_cat, 1, 1),substring(df_cvr$compliance_cat, 1, 1),sep="")
df_cvr$compliance_cat<-factor(df_cvr$compliance_cat, levels = c("Very Low","Low","Medium","High"))
df_cvr$rating_cat<-factor(df_cvr$rating_cat, levels = c("Optional","Should","Must"))

d_paper<-df_cvr[!is.na(df_cvr$rating),][c("miro_item_rating","rating","compliance","cat")]
d_paper<-d_paper[order(-d_paper$compliance,-d_paper$rating),]
#print(xtable(d_paper,digits=c(0,2,2,2,0)),include.rownames=FALSE)

ct_cat<-plyr::count(d_paper$cat)
ct_cat$pc<-round((ct_cat$freq/nrow(d_paper))*100,2)
names(df_cvr)<-c("miro_item_rating", "Rating", "miro_item_comp", "mean_compliance", "Compliance", "Rating category", "Compliance category")
ggplot(df_cvr,aes(y=Rating,x=Compliance,label=miro_item_comp,colour=`Rating category`)) + geom_point() +geom_text(aes(label=miro_item_comp),hjust=0.1, vjust=-0.6,  size=3) 
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_text).

ggsave(paste(chartdir,"scatter_compvrate.pdf",sep = ""), width = 12, height = 4)
## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_text).
ggplot(df_cvr[!is.na(df_cvr$Rating),],aes(x=`Compliance category`,fill=`Rating category`)) + geom_bar()

ggsave(paste(chartdir,"bar_cat_compvrate.pdf",sep = ""), width = 8, height = 2.5)