Demographics

fontsize<-16

## Role Analysis
(count(df_results$role_user))

##   x freq
## 1     18
## 2 U   92

(count(df_results$role_developer))

##   x freq
## 1     29
## 2 D   81

(count(df_results$role_review))

##   x freq
## 1     48
## 2 W   62

(count(df_results$role_reader))

##   x freq
## 1     17
## 2 R   93

(count(df_results$role_author))

##   x freq
## 1     35
## 2 A   75

colctrole<-colSums(df_results[c("r_user","r_developer","r_review","r_reader","r_author")])
names(colctrole)<-c("User","Developer","Reviewer","Reader","Author")
colctrole<-melt(colctrole)
colctrole$id<-rownames(colctrole)

ggplot(colctrole,aes(id,value)) +geom_bar(stat = "identity") + xlab("") + ylab("")  + theme(text = element_text(size=fontsize)) + coord_flip() + theme(plot.margin = unit(c(0,0,-0.5,-0.5), "cm"))

ggsave(paste(chartdir,"role.pdf",sep = ""), width = 3, height = 2.5)

### Cross-correlation matrix for role
names(df_role)<-c("Developer","User","Reader","Author","Reviewer")
corMat <- cor(df_role)
kable(corMat)

	Developer	User	Reader	Author	Reviewer
Developer	1.0000000	0.2930597	0.3149787	0.4329147	0.2223921
User	0.2930597	1.0000000	0.6266743	0.2781847	0.3045090
Reader	0.3149787	0.6266743	1.0000000	0.4098792	0.4352007
Author	0.4329147	0.2781847	0.4098792	1.0000000	0.4615392
Reviewer	0.2223921	0.3045090	0.4352007	0.4615392	1.0000000

melted_cormat <- melt(corMat)

ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) + geom_tile()+ xlab("") + ylab("") + scale_fill_gradient(low="white", high="black") + theme(text = element_text(size=fontsize)) + theme(plot.margin = unit(c(0,0,-0.5,0), "cm"))

ggsave(paste(chartdir,"role_cormat.pdf",sep = ""), width = 7, height = 2.5)

### Combinations of ontology roles
ct_r<-count(df_results$role)
ct_r$pc<-round(ct_r$freq/nrow(df_results)*100,2)
kable(ct_r[order(-ct_r$freq),])

	x	freq	pc
11	DURAW	48	43.64
10	DURA	13	11.82
9	DUR	9	8.18
1		5	4.55
17	UR	5	4.55
20	URW	5	4.55
19	URAW	4	3.64
3	D	3	2.73
16	U	3	2.73
2	A	2	1.82
5	DRA	2	1.82
15	RAW	2	1.82
18	URA	2	1.82
4	DA	1	0.91
6	DRW	1	0.91
7	DU	1	0.91
8	DUA	1	0.91
12	DURW	1	0.91
13	DW	1	0.91
14	R	1	0.91

##TDL Analysis
ct_tdl<-count(df_results$tdl)
ct_tdl<-ct_tdl[order(-ct_tdl$freq),]

ggplot(ct_tdl,aes(reorder(x,freq),freq)) +geom_bar(stat = "identity") + xlab("") + ylab("")  + theme(text = element_text(size=fontsize)) + theme(plot.margin = unit(c(0,0,-0.5,0), "cm"))

ggsave(paste(chartdir,"tdl.pdf",sep = ""), width = 6, height = 2.5)

##Job analysis
ct<-count(df_results$job)
ggplot(ct,aes(x=reorder(x,freq),y=freq)) + geom_bar(stat="identity") + ylab("") + xlab("") + coord_flip() + theme(text = element_text(size=fontsize)) + theme(plot.margin = unit(c(0,0,-0.5,-0.5), "cm"))

ggsave(paste(chartdir,"job.pdf",sep = ""), width = 4, height = 2.5)


write.csv(corMat,file = "correlation_matrix_usergroups.csv")

Overall ranking

##Create ranking by mean
df<-df_results[ , grepl( "^o_" , names( df_results )) ]
df$id<-df_results$id

df_long<-melt(df,id.vars="id")
df_long<-merge(df_long,df_results[ , c("id","r_developer","r_user","r_author","r_review","r_reader","role") ],by="id")

df_long<-merge(df_long,dfl[c("col","label")],by.x = "variable",by.y = "col")
df_long$label<-as.factor(df_long$label)

df_long_agg<-aggregate(df_long$value,by=list(df_long$label),FUN=function(x) mean(x, na.rm=TRUE))
df_long_agg<-df_long_agg[order(-df_long_agg$x),]
df_long$label <- factor(df_long$label, levels = df_long_agg$Group.1)

#head(df_long[c("variable","value")])

###Ordered mean rating of information items across all participants
ggplot(df_long,aes(x=label,y=value)) + stat_summary(fun.y = mean, geom = "bar") + geom_hline(yintercept = 5)+ ylab("Mean Rating") + xlab("")+ geom_hline(yintercept = 3.0) + geom_hline(yintercept = 3.5) + coord_flip(ylim=c(2.6,5)) + theme(text = element_text(size=10)) + theme(plot.margin = unit(c(0,0,-0.5,-0.5), "cm")) +   geom_text(aes(x=29.81, y=3.1,label = "SHOULD"), hjust = 0, vjust = 0, size=2.8) +   geom_text(aes(x=29.81, y=2.5,label = "OPTIONAL"), hjust = 0, vjust = 0, size=2.8,colour="white") +  geom_text(aes(x=29.81, y=4.1,label = "MUST"), hjust = 0, vjust = 0, size=2.8)

## Warning: Removed 38 rows containing non-finite values (stat_summary).

ggsave(paste(chartdir,"overall_mean_results.pdf",sep = ""), width = 6, height = 6)

## Warning: Removed 38 rows containing non-finite values (stat_summary).

## Rankings across roles
df_long_agg<-aggregate_by_role(df_long)
write.csv(df_long_agg,file = "priority_by_group.csv")
options(scipen = 100)
df_long_agg<-df_long_agg[order(df_long_agg$sd.all),]
kable(df_long_agg[c("ontology_feature","All","mean.all","median.all","sd.all")])

	ontology_feature	All	mean.all	median.all	sd.all
1	Basics: Ontology URL	1	4.715596	5	0.6816611
2	Basics: Ontology name	2	4.706422	5	0.6979169
4	Basics: Ontology license	4	4.504587	5	0.7890906
6	SRD: Scope and coverage	6	4.148148	4	0.8407253
25	SRD: Development community	25	3.768518	4	0.8604116
3	Basics: Ontology owner	3	4.527778	5	0.8696152
7	Content: Ontology relationships	7	4.128440	4	0.8829760
9	Content: Incorporation of other ontologies	9	4.091743	4	0.9481101
13	Motivation: Target audience	13	3.944954	4	0.9606601
24	Content: Axiom patterns	24	3.796296	4	0.9644064
5	QA: Examples of use	5	4.192661	5	0.9857979
14	KA: Knowledge acquisition methodology	14	3.926605	4	0.9879498
16	Content: Entity metadata policy	16	3.889908	4	1.0214319
8	Content: KR language	8	4.110092	4	1.0304570
17	Content: Upper ontology	17	3.880734	4	1.0339136
22	Change: Versioning policy	23	3.798165	4	1.0344064
18	QA: Testing	18	3.871560	4	1.0372767
28	KA: Content selection	28	3.379630	4	1.0386691
26	Content: Entity naming convention	26	3.743119	4	1.0399757
10	Basics: Ontology repository	10	4.009174	4	1.0407922
21	Change: Entity deprecation strategy	21	3.834862	4	1.0673068
12	Motivation: Competition	12	3.963303	4	1.0708823
20	Motivation: Need	20	3.851852	4	1.0835630
19	Content: Identifier generation policy	19	3.862385	4	1.0841270
11	QA: Evaluation	11	3.990826	4	1.0843620
23	SRD: Communication	22	3.798165	4	1.0867878
15	Change: Sustainability plan	15	3.889908	4	1.0915454
29	KA: Source knowledge location	29	3.357798	3	1.0931007
27	Content: Ontology metrics	27	3.422018	3	1.1808156
30	Content: Development environment	30	2.878505	3	1.3010176

#print(xtable(df_long_agg[c("ontology_feature","All","mean.all","median.all","sd.all")],digits=c(0,0,0,2,0,2)),include.rownames=FALSE)

### Correlation of standard deviation and mean
kable(cor(df_long_agg[c("sd.all","mean.all")]))

	sd.all	mean.all
sd.all	1.0000000	-0.8458595
mean.all	-0.8458595	1.0000000

### Ranking table
dfl_i<-df_long_agg[c("ontology_feature","All","Author","Developer","Reviewer","User","Reader")]
dfl_i<-dfl_i[order(dfl_i$All),]
kable(dfl_i)

	ontology_feature	All	Author	Developer	Reviewer	User	Reader
1	Basics: Ontology URL	1	2	2	2	2	2
2	Basics: Ontology name	2	1	1	1	1	1
3	Basics: Ontology owner	3	3	3	3	3	3
4	Basics: Ontology license	4	4	4	4	4	4
5	QA: Examples of use	5	5	8	5	5	5
6	SRD: Scope and coverage	6	8	6	6	8	6
7	Content: Ontology relationships	7	7	7	7	7	7
8	Content: KR language	8	6	5	9	9	8
9	Content: Incorporation of other ontologies	9	10	9	8	6	9
10	Basics: Ontology repository	10	14	12	10	10	11
11	QA: Evaluation	11	9	14	11	11	12
12	Motivation: Competition	12	12	11	13	13	14
13	Motivation: Target audience	13	15	13	12	15	13
14	KA: Knowledge acquisition methodology	14	21	16	20	17	20
15	Change: Sustainability plan	15	13	10	16	12	10
16	Content: Entity metadata policy	16	17	17	17	18	21
17	Content: Upper ontology	17	11	20	18	21	15
18	QA: Testing	18	16	24	14	16	19
19	Content: Identifier generation policy	19	25	18	19	19	17
20	Motivation: Need	20	20	22	15	22	18
21	Change: Entity deprecation strategy	21	18	15	23	14	16
23	SRD: Communication	22	22	19	22	23	24
22	Change: Versioning policy	23	24	21	24	20	22
24	Content: Axiom patterns	24	23	25	21	24	23
25	SRD: Development community	25	19	23	25	25	25
26	Content: Entity naming convention	26	26	26	26	26	26
27	Content: Ontology metrics	27	28	28	27	27	27
28	KA: Content selection	28	29	27	28	28	28
29	KA: Source knowledge location	29	27	29	29	29	29
30	Content: Development environment	30	30	30	30	30	30

thresh<-4
dfl_i_dev<-dfl_i
#dfl_i_dev$author_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Author)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Author)<=-thresh,"more",""))
#dfl_i_dev$developer_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Developer)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Developer)<=-thresh,"more",""))
#dfl_i_dev$reviewer_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Reviewer)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Reviewer)<=-thresh,"more",""))
#dfl_i_dev$user_d<-ifelse((dfl_i_dev$All-dfl_i_dev$User)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$User)<=-thresh,"more",""))
#dfl_i_dev$reader_d<-ifelse((dfl_i_dev$All-dfl_i_dev$Reader)>=thresh,"less",ifelse((dfl_i_dev$All-dfl_i_dev$Reader)<=-thresh,"more",""))
dfl_i_dev$author_d<-dfl_i_dev$All-dfl_i_dev$Author
dfl_i_dev$developer_d<-dfl_i_dev$All-dfl_i_dev$Developer
dfl_i_dev$reviewer_d<-dfl_i_dev$All-dfl_i_dev$Reviewer
dfl_i_dev$user_d<-dfl_i_dev$All-dfl_i_dev$User
dfl_i_dev$reader_d<-dfl_i_dev$All-dfl_i_dev$Reader
kable(dfl_i_dev)

	ontology_feature	All	Author	Developer	Reviewer	User	Reader	author_d	developer_d	reviewer_d	user_d	reader_d
1	Basics: Ontology URL	1	2	2	2	2	2	-1	-1	-1	-1	-1
2	Basics: Ontology name	2	1	1	1	1	1	1	1	1	1	1
3	Basics: Ontology owner	3	3	3	3	3	3	0	0	0	0	0
4	Basics: Ontology license	4	4	4	4	4	4	0	0	0	0	0
5	QA: Examples of use	5	5	8	5	5	5	0	-3	0	0	0
6	SRD: Scope and coverage	6	8	6	6	8	6	-2	0	0	-2	0
7	Content: Ontology relationships	7	7	7	7	7	7	0	0	0	0	0
8	Content: KR language	8	6	5	9	9	8	2	3	-1	-1	0
9	Content: Incorporation of other ontologies	9	10	9	8	6	9	-1	0	1	3	0
10	Basics: Ontology repository	10	14	12	10	10	11	-4	-2	0	0	-1
11	QA: Evaluation	11	9	14	11	11	12	2	-3	0	0	-1
12	Motivation: Competition	12	12	11	13	13	14	0	1	-1	-1	-2
13	Motivation: Target audience	13	15	13	12	15	13	-2	0	1	-2	0
14	KA: Knowledge acquisition methodology	14	21	16	20	17	20	-7	-2	-6	-3	-6
15	Change: Sustainability plan	15	13	10	16	12	10	2	5	-1	3	5
16	Content: Entity metadata policy	16	17	17	17	18	21	-1	-1	-1	-2	-5
17	Content: Upper ontology	17	11	20	18	21	15	6	-3	-1	-4	2
18	QA: Testing	18	16	24	14	16	19	2	-6	4	2	-1
19	Content: Identifier generation policy	19	25	18	19	19	17	-6	1	0	0	2
20	Motivation: Need	20	20	22	15	22	18	0	-2	5	-2	2
21	Change: Entity deprecation strategy	21	18	15	23	14	16	3	6	-2	7	5
23	SRD: Communication	22	22	19	22	23	24	0	3	0	-1	-2
22	Change: Versioning policy	23	24	21	24	20	22	-1	2	-1	3	1
24	Content: Axiom patterns	24	23	25	21	24	23	1	-1	3	0	1
25	SRD: Development community	25	19	23	25	25	25	6	2	0	0	0
26	Content: Entity naming convention	26	26	26	26	26	26	0	0	0	0	0
27	Content: Ontology metrics	27	28	28	27	27	27	-1	-1	0	0	0
28	KA: Content selection	28	29	27	28	28	28	-1	1	0	0	0
29	KA: Source knowledge location	29	27	29	29	29	29	2	0	0	0	0
30	Content: Development environment	30	30	30	30	30	30	0	0	0	0	0

### Ranking of information items broken down by roles
df_long_agg_long<-melt(dfl_i,id.vars="ontology_feature")
df_long_agg_long$ontology_feature <- factor(df_long_agg_long$ontology_feature, levels = dfl_i$ontology_feature)
ggplot(df_long_agg_long,aes(x=ontology_feature,y=value,group=variable,fill=variable)) + geom_bar(stat = "identity",position = "dodge") + coord_flip()

ggsave(paste(chartdir,"overall_ranking_by_role.pdf",sep = ""), width = 4, height = 2.5)

Comment Analysis

df<-df_comments[ , grepl( "_code[2-6]?$" , names( df_comments )) ]
#df$id<-df_comments$id

kw<-c()

for(i in 1:nrow(df)) {
  kw<-c(kw,unique(as.character(df[i,])))
}

ct_key<-count(kw)
kable(ct_key[order(-ct_key$freq),])

	x	freq
1		110
27	coverage	17
154	use_case_suitability	17
51	example_use	10
148	unclassified	10
2	active_development	8
156	user_community	6
14	compatibility_other_ontologies	5
16	competition	5
50	evidence_use	5
68	interoperability	4
119	quality	4
134	scope	4
42	domain	3
69	issue_tracking	3
86	logically_sound	3
92	motivated_editors	3
161	version_number	3
9	citation	2
20	consistency	2
24	content	2
33	definitions	2
35	dereferenceble_uris	2
40	documentation_quality	2
54	expressivity	2
64	inference	2
81	language	2
93	name	2
97	obo_principles	2
106	open_source	2
110	owl_profile	2
117	qa_tools	2
118	qa_toomuch_inbeginning	2
127	representational_adequacy	2
132	requirements_explicit	2
138	surveytool_problem	2
139	sustainability	2
149	update_request_process	2
153	use_case_adjustability	2
164	visualisation	2
167	we_need_to_talk	2
3	all_new_ontologies_at_icbo	1
4	all_requirements_toomuch	1
5	availability	1
6	available_formats	1
7	available_imports	1
8	change_on_demand	1
10	classification	1
11	coherency	1
12	commit_count	1
13	compatibility_ontologies	1
15	competency_questions	1
17	comprehensibility	1
18	configuration_optionals	1
19	connect_database	1
21	consistent	1
22	consistent_content	1
23	construct_frequency	1
25	content_maybe_toomuch	1
26	content_self_descriptive	1
28	cqs	1
29	data_capture_support	1
30	data_migration_support	1
31	dataset_alignment	1
32	defined_dependencies	1
34	deprecation_management	1
36	dereferencibility	1
37	development_priority_management	1
38	difficulty_use	1
39	documentation	1
41	doi	1
43	editor_info	1
44	email_list	1
45	evaluation_crucial	1
46	everything_important	1
47	evidence_added_value	1
48	evidence_application	1
49	evidence_interoperability	1
52	examples_more_important	1
53	explore_full_ontology	1
55	feature_scope_depends_audience	1
56	features_depend_on_usergroup	1
57	governance_process_scope_requirements_change	1
58	granularity	1
59	hierarchy	1
60	homepage	1
61	human_readable_descriptions	1
62	identfier_generation_policy_toomuch	1
63	imports_versioning	1
65	institution_endorsement	1
66	interdisciplinary	1
67	interesting_inferences	1
70	justification_deprecation	1
71	justification_development	1
72	justification_prioritisation	1
73	justification_requirements	1
74	justification_scope	1
75	justification_upper_ontology	1
76	justifications_changes	1
77	justify_modelling_decisions	1
78	ka_document_source	1
79	ka_full_disclosure_toomuch	1
80	ka_interest_depends_on_user	1
82	large_database_use	1
83	last_update_date	1
84	lessons_learnt	1
85	location	1
87	lots_of_metadata	1
88	metadata_vocabulary_use	1
89	metamodel_patterns_toomuch	1
90	minimal_dependencies	1
91	most_evaluations_inadequate	1
94	name_experts	1
95	not_one_off	1
96	number_projects	1
98	ontoclean	1
99	ontology_information_standard	1
100	ontology_location	1
101	ontology_migration_support	1
102	ontology_parts_utilisation_application	1
103	ontology_unavailable	1
104	ontology_visualisation	1
105	ontology_vs_vocabulary	1
107	orthogonality	1
108	out_of_box_ontology_website	1
109	outreach_competition	1
111	problems_solved	1
112	property_selection_strategy	1
113	prov_provenance_features	1
114	public_diff	1
115	purpose	1
116	purpose_explicit	1
120	raw_data_location_toomuch	1
121	realist_vs_application	1
122	reasoning_time	1
123	relation_to_other_ontologies	1
124	release_date	1
125	release_frequency	1
126	report_issue_tracker	1
128	represents_reality	1
129	requirements	1
130	requirements_analysis	1
131	requirements_dishonest	1
133	rich_entity_annotations	1
135	stakeholder_motivation	1
136	standardisation	1
137	support	1
140	target_audience	1
141	tawny_owl	1
142	tool_explore_ontology	1
143	tools_application	1
144	tools_changemanagement	1
145	tools_development	1
146	tools_using_ontology	1
147	tradeoff_performance_representation	1
150	update_strategy	1
151	updatecycles_toomuch_if_new	1
152	url_ontology	1
155	use_of_inference_manage_classification	1
157	user_friendly	1
158	user_involvement_development	1
159	vann_uri	1
160	version_history_plus_metadata	1
162	versioning	1
163	versions_imports	1
165	w3c_recommendation_annotation	1
166	w3c_recommendations	1

df_comments_important<-df_comments[ , grepl( "important_comments_code[2-6]?$" , names( df_comments )) ]

keywords<-c()

for(i in 1:nrow(df)) {
  keywords<-c(keywords,unique(as.character(df[i,])))
}

keywords<-keywords[keywords != ""];

ct_comments_all_key<-count(keywords)
kable(ct_comments_all_key[order(-ct_comments_all_key$freq),])

	x	freq
26	coverage	17
153	use_case_suitability	17
50	example_use	10
147	unclassified	10
1	active_development	8
155	user_community	6
13	compatibility_other_ontologies	5
15	competition	5
49	evidence_use	5
67	interoperability	4
118	quality	4
133	scope	4
41	domain	3
68	issue_tracking	3
85	logically_sound	3
91	motivated_editors	3
160	version_number	3
8	citation	2
19	consistency	2
23	content	2
32	definitions	2
34	dereferenceble_uris	2
39	documentation_quality	2
53	expressivity	2
63	inference	2
80	language	2
92	name	2
96	obo_principles	2
105	open_source	2
109	owl_profile	2
116	qa_tools	2
117	qa_toomuch_inbeginning	2
126	representational_adequacy	2
131	requirements_explicit	2
137	surveytool_problem	2
138	sustainability	2
148	update_request_process	2
152	use_case_adjustability	2
163	visualisation	2
166	we_need_to_talk	2
2	all_new_ontologies_at_icbo	1
3	all_requirements_toomuch	1
4	availability	1
5	available_formats	1
6	available_imports	1
7	change_on_demand	1
9	classification	1
10	coherency	1
11	commit_count	1
12	compatibility_ontologies	1
14	competency_questions	1
16	comprehensibility	1
17	configuration_optionals	1
18	connect_database	1
20	consistent	1
21	consistent_content	1
22	construct_frequency	1
24	content_maybe_toomuch	1
25	content_self_descriptive	1
27	cqs	1
28	data_capture_support	1
29	data_migration_support	1
30	dataset_alignment	1
31	defined_dependencies	1
33	deprecation_management	1
35	dereferencibility	1
36	development_priority_management	1
37	difficulty_use	1
38	documentation	1
40	doi	1
42	editor_info	1
43	email_list	1
44	evaluation_crucial	1
45	everything_important	1
46	evidence_added_value	1
47	evidence_application	1
48	evidence_interoperability	1
51	examples_more_important	1
52	explore_full_ontology	1
54	feature_scope_depends_audience	1
55	features_depend_on_usergroup	1
56	governance_process_scope_requirements_change	1
57	granularity	1
58	hierarchy	1
59	homepage	1
60	human_readable_descriptions	1
61	identfier_generation_policy_toomuch	1
62	imports_versioning	1
64	institution_endorsement	1
65	interdisciplinary	1
66	interesting_inferences	1
69	justification_deprecation	1
70	justification_development	1
71	justification_prioritisation	1
72	justification_requirements	1
73	justification_scope	1
74	justification_upper_ontology	1
75	justifications_changes	1
76	justify_modelling_decisions	1
77	ka_document_source	1
78	ka_full_disclosure_toomuch	1
79	ka_interest_depends_on_user	1
81	large_database_use	1
82	last_update_date	1
83	lessons_learnt	1
84	location	1
86	lots_of_metadata	1
87	metadata_vocabulary_use	1
88	metamodel_patterns_toomuch	1
89	minimal_dependencies	1
90	most_evaluations_inadequate	1
93	name_experts	1
94	not_one_off	1
95	number_projects	1
97	ontoclean	1
98	ontology_information_standard	1
99	ontology_location	1
100	ontology_migration_support	1
101	ontology_parts_utilisation_application	1
102	ontology_unavailable	1
103	ontology_visualisation	1
104	ontology_vs_vocabulary	1
106	orthogonality	1
107	out_of_box_ontology_website	1
108	outreach_competition	1
110	problems_solved	1
111	property_selection_strategy	1
112	prov_provenance_features	1
113	public_diff	1
114	purpose	1
115	purpose_explicit	1
119	raw_data_location_toomuch	1
120	realist_vs_application	1
121	reasoning_time	1
122	relation_to_other_ontologies	1
123	release_date	1
124	release_frequency	1
125	report_issue_tracker	1
127	represents_reality	1
128	requirements	1
129	requirements_analysis	1
130	requirements_dishonest	1
132	rich_entity_annotations	1
134	stakeholder_motivation	1
135	standardisation	1
136	support	1
139	target_audience	1
140	tawny_owl	1
141	tool_explore_ontology	1
142	tools_application	1
143	tools_changemanagement	1
144	tools_development	1
145	tools_using_ontology	1
146	tradeoff_performance_representation	1
149	update_strategy	1
150	updatecycles_toomuch_if_new	1
151	url_ontology	1
154	use_of_inference_manage_classification	1
156	user_friendly	1
157	user_involvement_development	1
158	vann_uri	1
159	version_history_plus_metadata	1
161	versioning	1
162	versions_imports	1
164	w3c_recommendation_annotation	1
165	w3c_recommendations	1

df_comments_important<-df_comments[ , grepl( "important_comments_code[2-6]?$" , names( df_comments )) ]

keywords<-c()

for(i in 1:nrow(df_comments_important)) {
  keywords<-c(keywords,unique(as.character(df_comments_important[i,])))
}

keywords<-keywords[keywords != ""];
ct_comments_important_key<-count(keywords)
kable(ct_comments_important_key[order(-ct_comments_important_key$freq),])

	x	freq
12	coverage	17
51	use_case_suitability	15
7	compatibility_other_ontologies	5
52	user_community	5
1	active_development	4
29	interoperability	4
46	scope	4
21	domain	3
24	evidence_use	3
32	logically_sound	3
34	motivated_editors	3
42	quality	3
11	content	2
16	definitions	2
17	dereferenceble_uris	2
20	documentation_quality	2
30	language	2
37	obo_principles	2
39	open_source	2
44	representational_adequacy	2
50	use_case_adjustability	2
2	availability	1
3	change_on_demand	1
4	classification	1
5	coherency	1
6	commit_count	1
8	competition	1
9	comprehensibility	1
10	consistent_content	1
13	data_capture_support	1
14	data_migration_support	1
15	defined_dependencies	1
18	difficulty_use	1
19	documentation	1
22	evidence_added_value	1
23	evidence_application	1
25	granularity	1
26	hierarchy	1
27	institution_endorsement	1
28	interdisciplinary	1
31	large_database_use	1
33	minimal_dependencies	1
35	not_one_off	1
36	number_projects	1
38	ontology_visualisation	1
40	property_selection_strategy	1
41	purpose_explicit	1
43	realist_vs_application	1
45	represents_reality	1
47	standardisation	1
48	sustainability	1
49	update_strategy	1
53	user_friendly	1

ct_comments_important_key$group<-"ungrouped"

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="coverage"|ct_comments_important_key$x=="scope"|ct_comments_important_key$x=="interdisciplinary"|ct_comments_important_key$x=="granularity","Scope and Coverage","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="compatibility_other_ontologies"|ct_comments_important_key$x=="interoperability","Interoperability","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="use_case_suitability"|ct_comments_important_key$x=="use_case_adjustability"|ct_comments_important_key$x=="purpose_explicit","Use Case","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="user_community"|ct_comments_important_key$x=="motivated_editors"|ct_comments_important_key$x=="active_development"|ct_comments_important_key$x=="change_on_demand"|ct_comments_important_key$x=="not_one_off"|ct_comments_important_key$x=="commit_count"|ct_comments_important_key$x=="institution_endorsement","Active Community","ungrouped"),ct_comments_important_key$group)


ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="evidence_added_value"|ct_comments_important_key$x=="evidence_application"|ct_comments_important_key$x=="number_projects"|ct_comments_important_key$x=="evidence_use"|ct_comments_important_key$x=="large_database_use","Evidence for use","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="domain"|ct_comments_important_key$x=="content"|ct_comments_important_key$x=="representational_adequacy"|ct_comments_important_key$x=="represents_reality"|ct_comments_important_key$x=="realist_vs_application"|ct_comments_important_key$x=="consistent_content"|ct_comments_important_key$x=="coherency","Content","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="definitions"|ct_comments_important_key$x=="documentation_quality"|ct_comments_important_key$x=="documentation"|ct_comments_important_key$x=="ontology_visualisation"|ct_comments_important_key$x=="language","Metadata and Documentation","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="obo_principles"|ct_comments_important_key$x=="availability"|ct_comments_important_key$x=="open_source"|ct_comments_important_key$x=="standardisation"|ct_comments_important_key$x=="sustainability"|ct_comments_important_key$x=="dereferenceble_uris"|ct_comments_important_key$x=="update_strategy","Publishing and Life Cycle","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="competition"|ct_comments_important_key$x=="quality","Other","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="hierarchy"|ct_comments_important_key$x=="classification"|ct_comments_important_key$x=="defined_dependencies"|ct_comments_important_key$x=="minimal_dependencies"|ct_comments_important_key$x=="property_selection_strategy"|ct_comments_important_key$x=="logically_sound","Representation","ungrouped"),ct_comments_important_key$group)

ct_comments_important_key$group<-ifelse(ct_comments_important_key$group=="ungrouped",ifelse(ct_comments_important_key$x=="user_friendly"|ct_comments_important_key$x=="comprehensibility"|ct_comments_important_key$x=="difficulty_use"|ct_comments_important_key$x=="data_capture_support"|ct_comments_important_key$x=="data_migration_support","Usability","ungrouped"),ct_comments_important_key$group)

agg<-aggregate(ct_comments_important_key$freq,by=list(ct_comments_important_key$group),sum)
agg<-agg[order(-agg$x),]
#print(xtable(agg,digits=c(0,0,0)),include.rownames=FALSE)

keywords_other<-kw[!(kw %in% keywords)]

ct_comments_other_key<-count(keywords_other)
kable(ct_comments_other_key[order(-ct_comments_other_key$freq),])

	x	freq
1		110
27	example_use	10
100	unclassified	10
40	issue_tracking	3
108	version_number	3
6	citation	2
11	consistency	2
30	expressivity	2
38	inference	2
59	name	2
71	owl_profile	2
76	qa_tools	2
77	qa_toomuch_inbeginning	2
87	requirements_explicit	2
91	surveytool_problem	2
101	update_request_process	2
111	visualisation	2
114	we_need_to_talk	2
2	all_new_ontologies_at_icbo	1
3	all_requirements_toomuch	1
4	available_formats	1
5	available_imports	1
7	compatibility_ontologies	1
8	competency_questions	1
9	configuration_optionals	1
10	connect_database	1
12	consistent	1
13	construct_frequency	1
14	content_maybe_toomuch	1
15	content_self_descriptive	1
16	cqs	1
17	dataset_alignment	1
18	deprecation_management	1
19	dereferencibility	1
20	development_priority_management	1
21	doi	1
22	editor_info	1
23	email_list	1
24	evaluation_crucial	1
25	everything_important	1
26	evidence_interoperability	1
28	examples_more_important	1
29	explore_full_ontology	1
31	feature_scope_depends_audience	1
32	features_depend_on_usergroup	1
33	governance_process_scope_requirements_change	1
34	homepage	1
35	human_readable_descriptions	1
36	identfier_generation_policy_toomuch	1
37	imports_versioning	1
39	interesting_inferences	1
41	justification_deprecation	1
42	justification_development	1
43	justification_prioritisation	1
44	justification_requirements	1
45	justification_scope	1
46	justification_upper_ontology	1
47	justifications_changes	1
48	justify_modelling_decisions	1
49	ka_document_source	1
50	ka_full_disclosure_toomuch	1
51	ka_interest_depends_on_user	1
52	last_update_date	1
53	lessons_learnt	1
54	location	1
55	lots_of_metadata	1
56	metadata_vocabulary_use	1
57	metamodel_patterns_toomuch	1
58	most_evaluations_inadequate	1
60	name_experts	1
61	ontoclean	1
62	ontology_information_standard	1
63	ontology_location	1
64	ontology_migration_support	1
65	ontology_parts_utilisation_application	1
66	ontology_unavailable	1
67	ontology_vs_vocabulary	1
68	orthogonality	1
69	out_of_box_ontology_website	1
70	outreach_competition	1
72	problems_solved	1
73	prov_provenance_features	1
74	public_diff	1
75	purpose	1
78	raw_data_location_toomuch	1
79	reasoning_time	1
80	relation_to_other_ontologies	1
81	release_date	1
82	release_frequency	1
83	report_issue_tracker	1
84	requirements	1
85	requirements_analysis	1
86	requirements_dishonest	1
88	rich_entity_annotations	1
89	stakeholder_motivation	1
90	support	1
92	target_audience	1
93	tawny_owl	1
94	tool_explore_ontology	1
95	tools_application	1
96	tools_changemanagement	1
97	tools_development	1
98	tools_using_ontology	1
99	tradeoff_performance_representation	1
102	updatecycles_toomuch_if_new	1
103	url_ontology	1
104	use_of_inference_manage_classification	1
105	user_involvement_development	1
106	vann_uri	1
107	version_history_plus_metadata	1
109	versioning	1
110	versions_imports	1
112	w3c_recommendation_annotation	1
113	w3c_recommendations	1

Systematic review of MIRO compliance

paper_metadata<-c("Name","URL","Domain","Language","License","Repository")
df_paper_metadata<-df_paper_results[paper_metadata]
df_paper_coding<-df_paper_results[, !names(df_paper_results) %in% paper_metadata]
latex_paper_metadata<-df_paper_metadata[c("Name","Domain","URL")]
#print(xtable(latex_paper_metadata,digits=c(0,0,0,0)),include.rownames=FALSE)

df_coding_melt<-melt(df_paper_coding,id.vars = "Code")
df_coding_melt$value<-as.factor(df_coding_melt$value)
ggplot(df_coding_melt[df_coding_melt$variable %in% c("Ontology.name","Ontology.owner","OBO.Principles"),],aes(x=value,fill=value)) + geom_bar()+facet_wrap("variable")+ theme_bw() + scale_fill_grey()

ggplot(df_coding_melt[!(df_coding_melt$variable %in% c("Ontology.name","Ontology.owner","OBO.Principles")),],aes(x=value,fill=value)) + geom_bar()+facet_wrap("variable",ncol = 3) + theme_bw() + scale_fill_grey()

ggsave(paste(chartdir,"results_coding.pdf",sep = ""), width = 8, height = 10)

df_coding_melt$value<-as.numeric(as.character(df_coding_melt$value))
agg_compliance<-aggregate(df_coding_melt$value,by=list(df_coding_melt$variable),FUN=mean)
agg_compliance$compliance<-ifelse(agg_compliance$Group.1=="Ontology.name"|agg_compliance$Group.1=="Ontology.owner",agg_compliance$x/2,agg_compliance$x);
agg_compliance$compliance<-round(agg_compliance$compliance*100,2)
names(agg_compliance)<-c("MIRO item","x","Compliance")
write.csv(file = "agg_compliance.csv",agg_compliance)

# AT THIS POINT, A DOCUMENT WAS CREATED MANUALLY WITH THE VALUES FOR COMPLIANCE AND RATINGS, AS MANUALLY MATCHING THEM TURNED OUT IMPOSSIBLE. FOR THAT, agg_compliance and priority_by_group (the mean_all column only) where merged together.
df_cvr<-read.csv(file=ratingvcompliance_f,head=TRUE,sep=",",stringsAsFactors = FALSE)

Compliance vs Ratings

df_cvr$rating_cat<-ifelse(df_cvr$rating<3,"Optional",ifelse(df_cvr$rating<3.5,"Should","Must"))
df_cvr$compliance_cat<-ifelse(df_cvr$compliance<20,"Very Low",ifelse(df_cvr$compliance<50,"Low",ifelse(df_cvr$compliance<80,"Medium","High")))
df_cvr$cat<-paste(substring(df_cvr$rating_cat, 1, 1),substring(df_cvr$compliance_cat, 1, 1),sep="")
df_cvr$compliance_cat<-factor(df_cvr$compliance_cat, levels = c("Very Low","Low","Medium","High"))
df_cvr$rating_cat<-factor(df_cvr$rating_cat, levels = c("Optional","Should","Must"))

d_paper<-df_cvr[!is.na(df_cvr$rating),][c("miro_item_rating","rating","compliance","cat")]
d_paper<-d_paper[order(-d_paper$compliance,-d_paper$rating),]
#print(xtable(d_paper,digits=c(0,2,2,2,0)),include.rownames=FALSE)

ct_cat<-plyr::count(d_paper$cat)
ct_cat$pc<-round((ct_cat$freq/nrow(d_paper))*100,2)
names(df_cvr)<-c("miro_item_rating", "Rating", "miro_item_comp", "mean_compliance", "Compliance", "Rating category", "Compliance category")
ggplot(df_cvr,aes(y=Rating,x=Compliance,label=miro_item_comp,colour=`Rating category`)) + geom_point() +geom_text(aes(label=miro_item_comp),hjust=0.1, vjust=-0.6,  size=3)

## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_text).

ggsave(paste(chartdir,"scatter_compvrate.pdf",sep = ""), width = 12, height = 4)

## Warning: Removed 6 rows containing missing values (geom_point).

## Warning: Removed 6 rows containing missing values (geom_text).

ggplot(df_cvr[!is.na(df_cvr$Rating),],aes(x=`Compliance category`,fill=`Rating category`)) + geom_bar()

ggsave(paste(chartdir,"bar_cat_compvrate.pdf",sep = ""), width = 8, height = 2.5)

The minimum information for the reporting of an ontology (MIRO) guidelines: Analysis

Nicolas Matentzoglu, James Malone, Chris Mungall and Robert Stevens

7 May 2016

Demographics

Overall ranking

Comment Analysis

Systematic review of MIRO compliance

Compliance vs Ratings