Measuring Expert Performance at Manually Classifying Domain Entities under Upper Ontology Classes: Analysis

Demographics

Gender

x freq
Female 4
Male 4

Age and Expertise

Min. X1st.Qu. Median Mean X3rd.Qu. Max.
37 39.5 43 44.14286 48 54

x freq
Expertise = 4 3
Expertise = 5 5

Expertise by Gender

bfo11expertise gender freq
Expertise = 4 Female 3
Expertise = 5 Female 1
Expertise = 5 Male 4

Rating Analysis

df_answers_raw<-dq[dq$variable=="question"&dq$id<fatiquecutoff,]
df_answers_raw$answer<-ifelse(df_answers_raw$answer==""|df_answers_raw$answer=="don't know"|df_answers_raw$answer==" ","Not given",df_answers_raw$answer)
df_answers_raw$variable<-NULL
df_answers_raw$empty<-ifelse(df_answers_raw$answer==" "|df_answers_raw$answer=="don't know"|df_answers_raw$answer=="Not given","EMPTY","OK")

df_answers_all<-df_answers_raw
df_answers_no_0<-df_answers_all[df_answers_all$pid!="0",]
df_confidence<-dq[dq$variable=="confidence",]
asis_output("### Questions per participant")

Questions per participant

kable(plyr::count(df_answers_no_0$pid),row.names = FALSE,digits = c(2))
x freq
1 46
2 46
3 46
4 46
5 46
6 46
7 46
8 46
#questions per participant
asis_output("### Participants with number of missing answers")

Participants with number of missing answers

x<-plyr::count(df_answers_no_0[c("pid","empty")])
kable(plyr::count(x[x$empty=="EMPTY",]$freq),row.names = FALSE,digits = c(2))
x freq
1 2
2 1
3 1
6 1
7 1
asis_output("### Missing answers by concept")

Missing answers by concept

x<-plyr::count(df_answers_no_0[c("concept","empty")])
x<-(x[x$freq>=1&x$empty=="EMPTY",])
kable(x[order(-x$freq),],row.names = FALSE,digits = c(2))
concept empty freq
situation EMPTY 4
why I had problems sleeping EMPTY 4
air space tomorrow EMPTY 3
time and place EMPTY 3
area where the hotel will be built EMPTY 1
distance EMPTY 1
my hotel room EMPTY 1
surface of pool table EMPTY 1
tan line EMPTY 1
warmth EMPTY 1
# Distribution of different ratings
asis_output("### Number of different answers (DA)")

Number of different answers (DA)

df_different_answers<-aggregate(df_answers_no_0$answer,by=list(df_answers_no_0$concept),FUN=function(x) {length(unique(x))})
names(df_different_answers)<-c("concept","DA")
df_different_answers<-df_different_answers[order(-df_different_answers$DA),]

agg_da<-plyr::count(df_different_answers$DA)

kable(agg_da,row.names = FALSE,digits = c(2))
x freq
1 10
2 12
3 10
4 12
5 2
asis_output("### Confidence (CON)")

Confidence (CON)

agg_con<-aggregate(as.numeric(df_confidence$answer),by=list(df_confidence$concept),FUN=function(x) {mean(x,na.rm = TRUE)})
names(agg_con)<-c("concept","CON")

asis_output("### Fleiss' Kappa (IER)")

Fleiss’ Kappa (IER)

df_answers_matrix_no_0<-reshape(df_answers_no_0[c("pid","concept","answer")],direction = "wide",timevar = "pid",idvar = "concept")
df_answers_matrix<-reshape(df_answers_all[c("pid","concept","answer")],direction = "wide",timevar = "pid",idvar = "concept")
kappam.fleiss(df_answers_matrix_no_0[!(names(df_answers_matrix_no_0) %in% c("concept"))])
 Fleiss' Kappa for m Raters

 Subjects = 46 
   Raters = 8 
    Kappa = 0.519 

        z = 69 
  p-value = 0 
## Analyse difference from expert
df_answers_matrix_melt_0<-melt(df_answers_matrix,id.vars = c("concept","answer.0"))
df_answers_matrix_melt_0$same<-ifelse(df_answers_matrix_melt_0$value==df_answers_matrix_melt_0$answer.0,1,0)


df_answers_bfo<-merge(df_answers_matrix_melt_0,df_bfo_sim,by.x = c("answer.0","value"),by.y = c("c1_l","c2_l"),all.x = TRUE)
df_answers_bfo$similarity<-ifelse(is.na(df_answers_bfo$similarity),0,df_answers_bfo$similarity)

asis_output("### Experimenter-expert agreement based on ontological similarity (EES)")

Experimenter-expert agreement based on ontological similarity (EES)

agg_ees<-aggregate(df_answers_bfo$similarity,by=list(df_answers_bfo$concept),FUN=function(x) {mean(x,na.rm = TRUE)})
names(agg_ees)<-c("concept","EES")

asis_output("### Experimenter-expert Agreement (EEC)")

Experimenter-expert Agreement (EEC)

agg_eec<-aggregate(df_answers_bfo$same,by=list(df_answers_bfo$concept),FUN=function(x) {mean(x,na.rm = TRUE)})
names(agg_eec)<-c("concept","EEC")

asis_output("### Shannons Entropy (SE)")

Shannons Entropy (SE)

df_answers_matrix_no_0$entropy<-apply(df_answers_matrix_no_0[!(names(df_answers_matrix_no_0) %in% c("concept"))], 1, function(x) entropy(x))


asis_output("### Inter-expert agreement based on ontological similarity (IES)")

Inter-expert agreement based on ontological similarity (IES)

# compute majority including missing answers
dmost<-plyr::count(df_answers_no_0[c("concept","answer")])
dmost<-dmost[order(dmost$concept,-dmost$freq),]
dmost <- by(dmost, dmost$concept, function(X) X[which.max(X$freq),])
dmost<-do.call("rbind", dmost)
names(dmost)<-c("concept","majority","freq")
#names(which.max(table(myvector))) 
conceptsnotgivenmajority<-dmost[dmost$majority=="Not given",c("concept")]

# compute majority excluding missing answers
dct<-plyr::count(df_answers_no_0[df_answers_no_0$answer!="Not given",c("concept","answer")])
dmost<-dct
dmost<-dmost[order(dmost$concept,-dmost$freq),]
#dmost_max<-aggregate(unique(dmost[c("concept","freq")])$freq,by=list(unique(dmost[c("concept","freq")])$concept),FUN=max)
#names(dmost_max)<-c("concept","max")
#dmost<-merge(dmost,dmost_max)
#dmost<-dmost[dmost$freq==dmost$max,c("concept","answer","freq")]

#dmost<-reshape(dmost,direction = "wide",timevar = "answer",idvar = "concept")
#dmost_ct<-plyr::count(dmost$concept)
#head(dmost_ct[dmost_ct$freq>1,])
dmost <- by(dmost, dmost$concept, function(X) X[which.max(X$freq),])
dmost<-do.call("rbind", dmost)
names(dmost)<-c("concept","majority","freq")

# Compute the majority similarity befor adding an asterisk the majority string
d0simpair<-majoritysimilarity(df_answers_no_0,dmost)
dmost$majority<-ifelse(dmost$concept %in% conceptsnotgivenmajority,paste(dmost$majority,"*",sep=""),dmost$majority)

#plyr::count(d0simpair$concept)
agg_ies<-aggregate(d0simpair$similarity,by=list(d0simpair$concept),FUN=mean)
names(agg_ies)<-c("concept","IES")

asis_output("### Proportion Analysis (???)")

Proportion Analysis (???)

dct$pc<-dct$freq/n_participants
dct_agg<-dplyr::summarise(group_by(dct,concept),mean=mean(pc), sd=sd(pc), max=max(pc))
dct_agg$sd<-ifelse(is.nan(dct_agg$sd),0,dct_agg$sd)
#kable(dct_agg)
#kable(d[d$different_responses>1,],row.names = FALSE)
#kable(dkappfleis,row.names = FALSE)


asis_output("### Creating final dataframe")

Creating final dataframe

x<-merge(df_different_answers,as.data.frame(dct_agg),by="concept")
x<-merge(x, df_answers_matrix_no_0,by="concept")
x<-merge(x, df_answers_matrix[c("concept","answer.0")],by="concept")
x<-merge(x, agg_eec,by="concept")
x<-merge(x, agg_ees,by="concept")
x<-merge(x, agg_ies,by="concept")
x<-merge(x, agg_con,by="concept")
x<-merge(x, dmost[c("concept","majority")],by="concept")
x$entropy<-abs(x$entropy)
df_full<-x[order(-x$entropy),]

Fatique analysis

df_fat<-merge(df_fatigue,df_full[c("concept","EES","IES","CON")],by="concept")
df_fat<-df_fat[order(df_fat$id),]
#df_fat$ies_dist_mean<-df_fat$IES-mean(df_fat$IES)
#View(df_fat)

asis_output("### Cut-off 26 (drop 20 out of 46)")

Cut-off 26 (drop 20 out of 46)

x<-melt(df_fat[df_fat$id<26,],id.vars = c("concept","id"))
y<- x %>% group_by(variable) %>% summarise(mean = mean(value))
ggplot(x,aes(id,value)) +geom_point() + geom_smooth()+ geom_hline(data = y, aes(yintercept=mean))  + facet_wrap('variable',scales='free_y')

asis_output("### Cut-off 31 (drop 15 out of 46)")

Cut-off 31 (drop 15 out of 46)

x1<-melt(df_fat[df_fat$id<31,],id.vars = c("concept","id"))
y1<- x1 %>% group_by(variable) %>% summarise(mean = mean(value))
ggplot(x1,aes(id,value)) +geom_point() + geom_smooth()+ geom_hline(data = y1, aes(yintercept=mean))  + facet_wrap('variable',scales='free_y')

asis_output("### Cut-off 36 (drop 10 out of 46)")

Cut-off 36 (drop 10 out of 46)

x2<-melt(df_fat[df_fat$id<36,],id.vars = c("concept","id"))
y2<- x2 %>% group_by(variable) %>% summarise(mean = mean(value))
ggplot(x2,aes(id,value)) +geom_point() + geom_smooth()+ geom_hline(data = y2, aes(yintercept=mean))  + facet_wrap('variable',scales='free_y')

asis_output("### Drop nothing")

Drop nothing

x3<-melt(df_fat,id.vars = c("concept","id"))
y3<- x3 %>% group_by(variable) %>% summarise(mean = mean(value))
ggplot(x3,aes(id,value)) +geom_point() + geom_smooth()+ geom_hline(data = y3, aes(yintercept=mean))  + facet_wrap('variable',scales='free_y')

mean(df_fat$EES)
[1] 0.7034604
mean(df_fat[1:as.integer(nrow(df_fat)/2),]$EES)
[1] 0.8143353
mean(df_fat[as.integer(nrow(df_fat)/2):nrow(df_fat),]$EES)
[1] 0.609561
cor.test(df_fat$EES,df_fat$id)

    Pearson's product-moment correlation

data:  df_fat$EES and df_fat$id
t = -4.9871, df = 44, p-value = 1.006e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.7588514 -0.3762792
sample estimates:
       cor 
-0.6009352 
cor.test(df_fat$IES,df_fat$id)

    Pearson's product-moment correlation

data:  df_fat$IES and df_fat$id
t = -4.872, df = 44, p-value = 1.47e-05
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.7528828 -0.3642641
sample estimates:
       cor 
-0.5919648 
theme_bfo20(ggplot(df_full,aes(EES,CON)) + geom_point()  + geom_smooth() + xlab("Experimenter-expert classification similarity (EES)")+ ylab("Self-rated confidence (CON)"))

ggsave("cor_con_ees.jpg",width = 3,height = 2)

theme_bfo20(ggplot(df_full,aes(IES,CON)) + geom_point() + geom_smooth() + xlab("Mean inter-expert alignment similarity (IES)")+ ylab("Self-rated confidence (CON)"))

ggsave("cor_con_ies.jpg",width = 3,height = 2)

theme_bfo20(ggplot(df_full,aes(max,EEC)) + geom_point() + geom_smooth() + xlab("Mean inter-expert agreement (MAC)")+ ylab("Experimenter-expert class. agreement (EEC)"))

#ggsave("cor_mac_eec.jpg",width = 3,height = 2)

theme_bfo20(ggplot(df_full,aes(IES,EES)) + geom_point() + geom_smooth() + xlab("Mean inter-expert agreement (MAC)")+ ylab("Experimenter-expert class. agreement (EEC)"))

#ggsave("cor_mac_eec.jpg",width = 3,height = 2)

User answers by concept

answer.0 is the answer determined through expert review.

concept answer.2 answer.6 answer.3 answer.4 answer.5 answer.1 answer.7 answer.8 answer.0
moment spatiotemporal instant temporal interval temporal instant site temporal instant fiat process part temporal instant spatiotemporal instant spatiotemporal instant
tan line one dimensional region fiat object part object boundary site one dimensional region object boundary Not given object boundary one dimensional region
air space tomorrow three dimensional region three dimensional region Not given Not given site site Not given object spatiotemporal interval
border fiat object part two dimensional region object boundary site fiat object part site fiat object part object boundary object boundary
time and place spatiotemporal instant Not given spatiotemporal interval Not given spatiotemporal interval fiat process part Not given spatiotemporal interval spatiotemporal instant
area where the hotel will be built two dimensional region site two dimensional region Not given three dimensional region site site site three dimensional region
hotel beach site fiat object part object site role site site fiat object part fiat object part
line of lattitude one dimensional region one dimensional region object boundary site one dimensional region site one dimensional region two dimensional region one dimensional region
patches on the floor two dimensional region object aggregate object aggregate object aggregate fiat object part object aggregate site two dimensional region scattered spatiotemporal region
situation Not given process aggregate Not given Not given processual context process Not given processual context processual context
surface of pool table Not given object boundary fiat object part object object boundary object boundary object boundary fiat object part two dimensional region
why I had problems sleeping Not given process generically dependent continuant Not given processual context Not given Not given processual context processual context
uncovered parts of the beach two dimensional region fiat object part fiat object part site fiat object part site site two dimensional region scattered spatiotemporal region
my hotel room site site three dimensional region Not given site site site object site
vacation weekend temporal interval fiat process part temporal interval temporal interval temporal interval process temporal interval spatiotemporal interval temporal interval
air space three dimensional region three dimensional region three dimensional region site three dimensional region site site object aggregate three dimensional region
distance quality quality quality quality one dimensional region one dimensional region Not given one dimensional region quality
kilometre zero zero dimensional region zero dimensional region generically dependent continuant site zero dimensional region site zero dimensional region site zero dimensional region
take off time temporal instant temporal interval temporal instant temporal interval temporal interval fiat process part temporal instant temporal instant temporal interval
place on wall where the postcard is put two dimensional region site fiat object part site two dimensional region site site site two dimensional region
point zero dimensional region zero dimensional region zero dimensional region site zero dimensional region site zero dimensional region one dimensional region zero dimensional region
timetable scattered temporal region generically dependent continuant scattered temporal region generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant object generically dependent continuant
bay three dimensional region site site site site site site object site
vacation location three dimensional region site site site spatiotemporal interval site site site spatial region
space three dimensional region site three dimensional region site three dimensional region site site site spatial region
to wash process process process process process function function function function
airplane flight process process processual context process process process process process process
bus object object object object object function object object object
clubbing process process object aggregate process process process process process process aggregate
digital photograph generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant object generically dependent continuant
dining process process process process process aggregate process process process process aggregate
edge fiat object part object boundary object boundary object boundary object boundary object boundary object boundary object boundary object boundary
flood plain two dimensional region site site site site site site site spatiotemporal interval
tourist area site site site site role site site site fiat object part
vacation brochure generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant generically dependent continuant object aggregate generically dependent continuant
warmth quality quality quality Not given quality quality quality quality quality
bus driver role role role role role role role role role
deckchairs on the beach object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate
drinking a beer process process process process process process process process process
end of cooking process boundary process boundary process boundary process boundary process boundary process boundary process boundary process boundary process boundary
end of flying process boundary process boundary process boundary process boundary process boundary process boundary process boundary process boundary process boundary
person object object object object object object object object object
tendency to be mosquito bitten disposition disposition disposition disposition disposition disposition disposition disposition disposition
tendency to defiate disposition disposition disposition disposition disposition disposition disposition disposition disposition
tour party object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate object aggregate
tourist role role role role role role role role role

Answer analysis

Metrics breakdown:

  • SE: Shannons entropy, measurement of noise (default table sorting)
  • EEC: Proportion of ratings in line with expert verdict
  • EES: Average concept similarity of the given answer to the expert verdict. Differentiates better than exp_aggr.
  • DA: Number of different unique answers for concept
  • MAC: Largest proportion of raters voting for the same term. For example, if 4/8 raters have voted concept A (0.5 ratio), and 2/8 concept B (0.25 ratio), that value would be 0.5.
  • rat_mean: mean proportion size (perhaps not so nice, but, the larger, the better)
  • rat_sd: standard deviation of proportion size.
  • CON: Mean confidence of the rater for answering the question.
  • IES: Pairwise similarity between answers given (based on ontological similarity)

Inter-rater Agreement

concept MAC MACEEC IES SE DA CON
bus driver 1.00 0.00 1.00 1.00 1 4.38
deckchairs on the beach 1.00 0.00 1.00 1.00 1 4.50
drinking a beer 1.00 0.00 1.00 1.00 1 4.50
end of cooking 1.00 0.00 1.00 1.00 1 4.50
end of flying 1.00 0.00 1.00 1.00 1 4.62
person 1.00 0.00 1.00 1.00 1 4.25
tendency to be mosquito bitten 1.00 0.00 1.00 1.00 1 4.00
tendency to defiate 1.00 0.00 1.00 1.00 1 4.50
tour party 1.00 0.00 1.00 1.00 1 4.38
tourist 1.00 0.00 1.00 1.00 1 4.38
airplane flight 0.88 0.00 0.96 0.82 2 4.38
dining 0.88 0.75 0.96 0.82 2 4.38
edge 0.88 0.00 0.95 0.82 2 3.88
flood plain 0.88 0.88 0.93 0.82 2 3.14
digital photograph 0.88 0.00 0.92 0.82 2 4.50
vacation brochure 0.88 0.00 0.92 0.82 2 4.50
tourist area 0.88 0.88 0.92 0.82 2 3.88
bus 0.88 0.00 0.91 0.82 2 4.62
clubbing 0.88 0.88 0.90 0.82 2 4.25
bay 0.75 0.00 0.88 0.65 3 3.62
warmth 0.88 0.00 0.88 0.82 2 4.14
vacation location 0.75 0.75 0.83 0.65 3 3.88
point 0.62 0.00 0.82 0.57 3 4.38
take off time 0.50 0.12 0.81 0.53 3 4.50
place on wall where the postcard is put 0.62 0.38 0.80 0.57 3 3.71
space 0.62 0.62 0.79 0.68 2 4.12
vacation weekend 0.62 0.00 0.76 0.48 4 4.38
hotel beach 0.50 0.25 0.76 0.42 4 3.88
patches on the floor 0.50 0.50 0.75 0.42 4 3.38
my hotel room 0.62 0.00 0.75 0.48 4 4.25
line of lattitude 0.50 0.00 0.74 0.42 4 3.88
timetable 0.62 0.00 0.73 0.57 3 3.88
surface of pool table 0.50 0.50 0.71 0.42 4 3.75
kilometre zero 0.50 0.00 0.71 0.53 3 3.86
border 0.38 0.12 0.71 0.36 4 3.62
air space 0.50 0.00 0.71 0.53 3 4.12
to wash 0.62 0.25 0.70 0.68 2 4.38
uncovered parts of the beach 0.38 0.38 0.68 0.48 3 3.50
area where the hotel will be built 0.50 0.38 0.66 0.42 4 4.14
distance 0.50 0.00 0.64 0.53 3 4.25
tan line 0.38 0.12 0.64 0.28 5 3.62
moment 0.38 0.12 0.62 0.28 5 3.14
time and place 0.38 0.25 0.51 0.40 4 3.29
air space tomorrow 0.25 0.25 0.43 0.36 4 3.17
situation 0.25 0.00 0.42 0.42 4 3.00
why I had problems sleeping 0.25 0.00 0.36 0.42 4 3.00

Correctness according to Experimenter Verdict

concept EES EEC expert majority
bus driver 1.00 1.00 role role
deckchairs on the beach 1.00 1.00 object aggregate object aggregate
drinking a beer 1.00 1.00 process process
end of cooking 1.00 1.00 process boundary process boundary
end of flying 1.00 1.00 process boundary process boundary
person 1.00 1.00 object object
tendency to be mosquito bitten 1.00 1.00 disposition disposition
tendency to defiate 1.00 1.00 disposition disposition
tour party 1.00 1.00 object aggregate object aggregate
tourist 1.00 1.00 role role
airplane flight 0.96 0.88 process process
edge 0.95 0.88 object boundary object boundary
digital photograph 0.92 0.88 generically dependent continuant generically dependent continuant
vacation brochure 0.92 0.88 generically dependent continuant generically dependent continuant
bus 0.91 0.88 object object
bay 0.88 0.75 site site
warmth 0.88 0.88 quality quality
point 0.82 0.62 zero dimensional region zero dimensional region
take off time 0.78 0.38 temporal interval temporal instant
vacation weekend 0.76 0.62 temporal interval temporal interval
my hotel room 0.75 0.62 site site
line of lattitude 0.74 0.50 one dimensional region one dimensional region
timetable 0.73 0.62 generically dependent continuant generically dependent continuant
kilometre zero 0.71 0.50 zero dimensional region zero dimensional region
dining 0.71 0.12 process aggregate process
air space 0.71 0.50 three dimensional region three dimensional region
border 0.68 0.25 object boundary fiat object part
hotel beach 0.66 0.25 fiat object part site
distance 0.64 0.50 quality quality
space 0.61 0.00 spatial region site
clubbing 0.61 0.00 process aggregate process
place on wall where the postcard is put 0.56 0.25 two dimensional region site
tourist area 0.54 0.00 fiat object part site
tan line 0.51 0.25 one dimensional region object boundary
vacation location 0.51 0.00 spatial region site
area where the hotel will be built 0.51 0.12 three dimensional region site
to wash 0.50 0.38 function process
moment 0.49 0.25 spatiotemporal instant temporal instant
time and place 0.44 0.12 spatiotemporal instant spatiotemporal interval*
situation 0.42 0.25 processual context processual context*
why I had problems sleeping 0.36 0.25 processual context processual context*
surface of pool table 0.35 0.00 two dimensional region object boundary
uncovered parts of the beach 0.24 0.00 scattered spatiotemporal region fiat object part
patches on the floor 0.23 0.00 scattered spatiotemporal region object aggregate
flood plain 0.22 0.00 spatiotemporal interval site
air space tomorrow 0.14 0.00 spatiotemporal interval site*

Key Metrics Summary:

measure mean min median max sd
EEC 0.5081522 0.0000000 0.5000000 1.000 0.3831541
EES 0.7034604 0.1361111 0.7243304 1.000 0.2528527
IES 0.8081683 0.3645833 0.8151042 1.000 0.1697295
MAC 0.6902174 0.2500000 0.6250000 1.000 0.2468399
SE 0.6679079 0.2814536 0.6462406 1.000 0.2375013
DA 2.6521739 1.0000000 3.0000000 5.000 1.2150426
CON 4.0075052 3.0000000 4.1339286 4.625 0.4707099

Correlations

x<-df_full_res
ggplot(x,aes(IES,EES)) + geom_point()

z<-df_full_res[c("EEC","EES","MAC","IES","SE","DA","CON")]
cormat <- round(cor(z),2)
cor.test(z$EEC,z$EES)

    Pearson's product-moment correlation

data:  z$EEC and z$EES
t = 14.906, df = 44, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.8483096 0.9515455
sample estimates:
      cor 
0.9136177 
cor.test(z$EEC,z$MAC)

    Pearson's product-moment correlation

data:  z$EEC and z$MAC
t = 6.2275, df = 44, p-value = 1.562e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.4918959 0.8131786
sample estimates:
     cor 
0.684456 
cor.test(z$EEC,z$CON)

    Pearson's product-moment correlation

data:  z$EEC and z$CON
t = 5.5508, df = 44, p-value = 1.536e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.4319321 0.7856841
sample estimates:
      cor 
0.6417604 
cor.test(z$EEC,z$IES)

    Pearson's product-moment correlation

data:  z$EEC and z$IES
t = 5.3463, df = 44, p-value = 3.049e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.4123534 0.7763937
sample estimates:
      cor 
0.6275325 
cor.test(z$EEC,z$DA)

    Pearson's product-moment correlation

data:  z$EEC and z$DA
t = -5.6753, df = 44, p-value = 1.01e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.7911050 -0.4435122
sample estimates:
       cor 
-0.6501084 
cor.test(z$EEC,z$SE)

    Pearson's product-moment correlation

data:  z$EEC and z$SE
t = 6.3651, df = 44, p-value = 9.797e-08
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5032210 0.8182152
sample estimates:
      cor 
0.6923742 
cor.test(z$EES,z$MAC)

    Pearson's product-moment correlation

data:  z$EES and z$MAC
t = 7.5732, df = 44, p-value = 1.667e-09
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5910092 0.8556833
sample estimates:
      cor 
0.7522454 
cor.test(z$EES,z$CON)

    Pearson's product-moment correlation

data:  z$EES and z$CON
t = 8.252, df = 44, p-value = 1.759e-10
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.6321365 0.8723312
sample estimates:
      cor 
0.7794071 
cor.test(z$EES,z$IES)

    Pearson's product-moment correlation

data:  z$EES and z$IES
t = 7.5314, df = 44, p-value = 1.917e-09
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5883009 0.8545674
sample estimates:
      cor 
0.7504373 
cor.test(z$EES,z$DA)

    Pearson's product-moment correlation

data:  z$EES and z$DA
t = -6.1691, df = 44, p-value = 1.904e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.8109882 -0.4870045
sample estimates:
       cor 
-0.6810221 
cor.test(z$EES,z$SE)

    Pearson's product-moment correlation

data:  z$EES and z$SE
t = 6.9238, df = 44, p-value = 1.478e-08
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5463195 0.8369502
sample estimates:
      cor 
0.7220963 
cor.test(z$MAC,z$CON)

    Pearson's product-moment correlation

data:  z$MAC and z$CON
t = 6.3962, df = 44, p-value = 8.819e-08
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5057347 0.8193266
sample estimates:
      cor 
0.6941255 
cor.test(z$MAC,z$IES)

    Pearson's product-moment correlation

data:  z$MAC and z$IES
t = 20.305, df = 44, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.9119025 0.9725029
sample estimates:
      cor 
0.9505639 
cor.test(z$MAC,z$DA)

    Pearson's product-moment correlation

data:  z$MAC and z$DA
t = -14.062, df = 44, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9462756 -0.8327739
sample estimates:
       cor 
-0.9044249 
cor.test(z$MAC,z$SE)

    Pearson's product-moment correlation

data:  z$MAC and z$SE
t = 22.292, df = 44, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.9257525 0.9769407
sample estimates:
      cor 
0.9584679 
cor.test(z$CON,z$IES)

    Pearson's product-moment correlation

data:  z$CON and z$IES
t = 6.7683, df = 44, p-value = 2.501e-08
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5347717 0.8319963
sample estimates:
      cor 
0.7141957 
cor.test(z$CON,z$DA)

    Pearson's product-moment correlation

data:  z$CON and z$DA
t = -5.7065, df = 44, p-value = 9.096e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.7924353 -0.4463720
sample estimates:
       cor 
-0.6521623 
cor.test(z$CON,z$SE)

    Pearson's product-moment correlation

data:  z$CON and z$SE
t = 5.6595, df = 44, p-value = 1.066e-06
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.4420572 0.7904268
sample estimates:
      cor 
0.6490622 
cor.test(z$IES,z$DA)

    Pearson's product-moment correlation

data:  z$IES and z$DA
t = -9.9037, df = 44, p-value = 8.998e-13
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9032850 -0.7123804
sample estimates:
       cor 
-0.8308554 
cor.test(z$IES,z$SE)

    Pearson's product-moment correlation

data:  z$IES and z$SE
t = 11.905, df = 44, p-value = 2.37e-15
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.7814032 0.9284099
sample estimates:
      cor 
0.8735462 
cor.test(z$DA,z$SE)

    Pearson's product-moment correlation

data:  z$DA and z$SE
t = -32.89, df = 44, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.9890955 -0.9644035
sample estimates:
       cor 
-0.9802628 
kable(cormat,row.names = TRUE)
EEC EES MAC IES SE DA CON
EEC 1.00 0.91 0.68 0.63 0.69 -0.65 0.64
EES 0.91 1.00 0.75 0.75 0.72 -0.68 0.78
MAC 0.68 0.75 1.00 0.95 0.96 -0.90 0.69
IES 0.63 0.75 0.95 1.00 0.87 -0.83 0.71
SE 0.69 0.72 0.96 0.87 1.00 -0.98 0.65
DA -0.65 -0.68 -0.90 -0.83 -0.98 1.00 -0.65
CON 0.64 0.78 0.69 0.71 0.65 -0.65 1.00
#print(xtable(cormat,digits=c(2)),include.rownames=TRUE)

Mean entropy by export ratings

Table is sorted by average entropy: treat it as degree of disagreement. Note that: disposition, object aggregate, process boundary and role where always rated perfectly correctly by all reviewers, while scattered spatiotemporal region, spatiotemporal instant and one dimensional region were really far away from correctly classified.

expert_opinion mean_entropy
spatiotemporal instant 1.98
one dimensional region 1.95
processual context 1.75
scattered spatiotemporal region 1.66
three dimensional region 1.58
two dimensional region 1.52
temporal interval 1.48
zero dimensional region 1.35
site 1.31
object boundary 1.22
spatiotemporal interval 1.22
fiat object part 1.15
spatial region 1.01
quality 0.97
function 0.95
generically dependent continuant 0.80
process aggregate 0.54
object 0.27
process 0.27
disposition 0.00
object aggregate 0.00
process boundary 0.00
role 0.00

Impact analysis of BFO classes across OBO ontologies

BFO Concept coverage

Coverage, my favourite metric, is the number of ontologies in the corpus mentioning a particular entity. Here, both by absolute number and proportion (of numbers of ontologies compared to the whole corpus).

BFO 1.1 Concept Nr. ontologies Coverage (%)
processual entity 39 28.68
quality 38 27.94
disposition 36 26.47
independent continuant 35 25.74
specifically dependent continuant 33 24.26
role 32 23.53
occurrent 29 21.32
continuant 27 19.85
entity 27 19.85
function 27 19.85
realizable entity 26 19.12
generically dependent continuant 25 18.38
site 20 14.71
spatial region 19 13.97
connected temporal region 18 13.24
object 18 13.24
object aggregate 18 13.24
scattered temporal region 18 13.24
temporal region 18 13.24
one dimensional region 17 12.50
three dimensional region 17 12.50
two dimensional region 17 12.50
zero dimensional region 17 12.50
process boundary 16 11.76
spatiotemporal region 16 11.76
fiat object part 14 10.29
object boundary 14 10.29
material entity 2 1.47
process 2 1.47
connected spatiotemporal region 1 0.74
dependent continuant 1 0.74
fiat process part 1 0.74
process aggregate 1 0.74
processual context 1 0.74
scattered spatiotemporal region 1 0.74
spatiotemporal instant 1 0.74
spatiotemporal interval 1 0.74
temporal instant 1 0.74
temporal interval 1 0.74

Coverage graph

BFO Concept impact

Impact is the average proportion of axioms containing the entity across the corpus. For example, if ‘continuant’ was used in 10 axioms in ontology O, and O has 1000 axioms, the impact of the entity would be 1 %. The overall BFO impact is the mean impact across all ontologies in OBO (136 in the current snapshot).

BFO 1.1 Concept Impact (mean %)
processual entity 0.463
disposition 0.338
independent continuant 0.260
role 0.258
specifically dependent continuant 0.245
quality 0.234
occurrent 0.222
continuant 0.191
spatial region 0.187
realizable entity 0.152
temporal region 0.140
function 0.137
entity 0.131
generically dependent continuant 0.128
object boundary 0.114
connected temporal region 0.091
three dimensional region 0.083
site 0.078
object aggregate 0.076
zero dimensional region 0.061
spatiotemporal region 0.059
scattered temporal region 0.054
one dimensional region 0.054
two dimensional region 0.054
process boundary 0.053
object 0.034
fiat object part 0.027
material entity 0.010
process 0.008
dependent continuant 0.006
process aggregate 0.005
fiat process part 0.005
processual context 0.005
connected spatiotemporal region 0.005
spatiotemporal interval 0.002
scattered spatiotemporal region 0.002
temporal interval 0.002
temporal instant 0.002
spatiotemporal instant 0.002

Impact graph

BFO Concept usage

Usage is similar to impact, however not normalised by ontology: It is simply the number of axiom containing the entity across the whole corpus divided by the number of all axioms in the corpus (i.e. the sum of all ontology sizes). It is the most biased of the three metrics, as one ontology with an idiosyncratic modelling style will dominate the metric (imagine one ontology with 360 axioms using processual entity).

BFO 1.1 Concept Nr. axioms Usage (%)
processual entity 360 0.008
role 342 0.007
continuant 275 0.006
independent continuant 301 0.006
occurrent 284 0.006
quality 276 0.006
function 227 0.005
disposition 172 0.004
generically dependent continuant 183 0.004
specifically dependent continuant 204 0.004
entity 164 0.003
realizable entity 149 0.003
spatial region 160 0.003
connected temporal region 75 0.002
object aggregate 77 0.002
object boundary 72 0.002
temporal region 104 0.002
object 30 0.001
one dimensional region 42 0.001
process boundary 53 0.001
scattered temporal region 38 0.001
site 64 0.001
spatiotemporal region 71 0.001
three dimensional region 54 0.001
two dimensional region 44 0.001
zero dimensional region 50 0.001
connected spatiotemporal region 6 0.000
dependent continuant 7 0.000
fiat object part 21 0.000
fiat process part 6 0.000
material entity 11 0.000
process 8 0.000
process aggregate 6 0.000
processual context 6 0.000
scattered spatiotemporal region 3 0.000
spatiotemporal instant 3 0.000
spatiotemporal interval 3 0.000
temporal instant 3 0.000
temporal interval 3 0.000

Usage graph

Survey comments

Number of comments by question

concept freq EES CON
border 5 0.6845238 3.625000
bay 4 0.8750000 3.625000
hotel beach 4 0.6625000 3.875000
take off time 4 0.7790179 4.500000
tan line 4 0.5111607 3.625000
tourist area 4 0.5375000 3.875000
vacation location 4 0.5062500 3.875000
why I had problems sleeping 4 0.3645833 3.000000
air space tomorrow 3 0.1361111 3.166667
clubbing 3 0.6111111 4.250000
digital photograph 3 0.9218750 4.500000
edge 3 0.9464286 3.875000
person 3 1.0000000 4.250000
place on wall where the postcard is put 3 0.5647321 3.714286
situation 3 0.4166667 3.000000
space 3 0.6125000 4.125000
time and place 3 0.4397321 3.285714
timetable 3 0.7343750 3.875000
vacation brochure 3 0.9218750 4.500000
airplane flight 2 0.9583333 4.375000
area where the hotel will be built 2 0.5059524 4.142857
bus 2 0.9125000 4.625000
bus driver 2 1.0000000 4.375000
deckchairs on the beach 2 1.0000000 4.500000
dining 2 0.7083333 4.375000
distance 2 0.6406250 4.250000
drinking a beer 2 1.0000000 4.500000
kilometre zero 2 0.7142857 3.857143
line of lattitude 2 0.7440476 3.875000
my hotel room 2 0.7500000 4.250000
patches on the floor 2 0.2326389 3.375000
point 2 0.8154762 4.375000
surface of pool table 2 0.3549107 3.750000
tendency to be mosquito bitten 2 1.0000000 4.000000
to wash 2 0.5000000 4.375000
tourist 2 1.0000000 4.375000
vacation weekend 2 0.7604167 4.375000
warmth 2 0.8750000 4.142857
air space 1 0.7075893 4.125000
end of cooking 1 1.0000000 4.500000
end of flying 1 1.0000000 4.625000
flood plain 1 0.2222222 3.142857
moment 1 0.4913194 3.142857
tour party 1 1.0000000 4.375000
uncovered parts of the beach 1 0.2395833 3.500000

Comment length by question

concept x EES CON
warmth 1221 0.8750000 4.142857
time and place 985 0.4397321 3.285714
why I had problems sleeping 941 0.3645833 3.000000
border 881 0.6845238 3.625000
timetable 848 0.7343750 3.875000
take off time 721 0.7790179 4.500000
vacation location 680 0.5062500 3.875000
space 653 0.6125000 4.125000
edge 612 0.9464286 3.875000
distance 599 0.6406250 4.250000
surface of pool table 589 0.3549107 3.750000
tan line 486 0.5111607 3.625000
situation 457 0.4166667 3.000000
area where the hotel will be built 448 0.5059524 4.142857
tendency to be mosquito bitten 445 1.0000000 4.000000
air space tomorrow 436 0.1361111 3.166667
moment 407 0.4913194 3.142857
place on wall where the postcard is put 404 0.5647321 3.714286
clubbing 402 0.6111111 4.250000
point 399 0.8154762 4.375000
patches on the floor 381 0.2326389 3.375000
line of lattitude 369 0.7440476 3.875000
person 369 1.0000000 4.250000
drinking a beer 341 1.0000000 4.500000
tourist 330 1.0000000 4.375000
vacation weekend 285 0.7604167 4.375000
kilometre zero 266 0.7142857 3.857143
hotel beach 256 0.6625000 3.875000
vacation brochure 253 0.9218750 4.500000
digital photograph 248 0.9218750 4.500000
airplane flight 231 0.9583333 4.375000
bay 224 0.8750000 3.625000
bus driver 192 1.0000000 4.375000
my hotel room 185 0.7500000 4.250000
dining 167 0.7083333 4.375000
bus 166 0.9125000 4.625000
tourist area 155 0.5375000 3.875000
flood plain 149 0.2222222 3.142857
to wash 149 0.5000000 4.375000
deckchairs on the beach 141 1.0000000 4.500000
end of cooking 108 1.0000000 4.500000
end of flying 105 1.0000000 4.625000
tour party 45 1.0000000 4.375000
uncovered parts of the beach 35 0.2395833 3.500000
air space 19 0.7075893 4.125000

Comment length by BFO concept

Group.1 x
quality 1820
object boundary 1493
processual context 1398
spatiotemporal instant 1392
generically dependent continuant 1349
spatial region 1333
temporal interval 1006
two dimensional region 993
one dimensional region 855
zero dimensional region 665
spatiotemporal interval 585
process 572
process aggregate 569
object 535
role 522
three dimensional region 467
disposition 445
scattered spatiotemporal region 416
fiat object part 411
site 409
process boundary 213
object aggregate 186
function 149

Number of comments by participant

pid freq
4 33
7 33
6 16
5 12
1 11
2 6

Number of comments per theme

x freq
A 20
B 52
C 12
D 18
E 26
F 4

Number of comments per concept

x freq
border 5
bay 4
hotel beach 4
take off time 4
tan line 4
tourist area 4
vacation location 4
why I had problems sleeping 4
airspace tomorrow 3
clubbing 3
edge 3
person 3
place on wall where the postcard is put 3
situation 3
time and place 3
timetable 3
vacation brochure 3
airplane flight 2
area where the hotel will be built 2
bus 2
bus driver 2
deckchairs on the beach 2
digital photograph 2
dining 2
distance 2
drinking a beer 2
kilometre zero 2
line of lattitude 2
my hotel room 2
patches on the floor 2
point 2
space 2
surface of pool table 2
tendency to be mosquito bitten 2
to wash 2
tourist 2
vacation weekend 2
warmth 2
airspace 1
end of cooking 1
end of flying 1
flood plain 1
moment 1
tour party 1
uncovered parts of the beach 1

BFO: Class hierarchy by expert agreement

BFO: Class hierarchy by expert agreement: Alt 1

The colour shading indicates mean similarity to expert (exp_sim), and the size of the node indicates coverage of the concept (as determined by a BioPortal survey).

r1<-d_cov[c("BFO 1.1 Concept","Coverage (%)")]
r2<-plyr::count((df_full_res[c("expert")]))
r3<-merge(r1,r2,by.x = "BFO 1.1 Concept",by.y="expert",all.x = TRUE)
r3$freq<-ifelse(is.na(r3$freq),0,r3$freq)
nrow(r3[r3$freq==2,])
[1] 21

Correlation analysis

SE EEC EES IES DA MAC CON COV
SE 1.00 0.72 0.74 0.87 -0.99 0.97 0.72 0.42
EEC 0.72 1.00 0.91 0.66 -0.68 0.72 0.67 0.52
EES 0.74 0.91 1.00 0.79 -0.68 0.80 0.80 0.49
IES 0.87 0.66 0.79 1.00 -0.84 0.95 0.80 0.37
DA -0.99 -0.68 -0.68 -0.84 1.00 -0.93 -0.73 -0.45
MAC 0.97 0.72 0.80 0.95 -0.93 1.00 0.78 0.44
CON 0.72 0.67 0.80 0.80 -0.73 0.78 1.00 0.47
COV 0.42 0.52 0.49 0.37 -0.45 0.44 0.47 1.00

BFO Concept Analysis: Overview Table

theme_bfo20(ggplot(do,aes(EEC,COV)) + geom_point() + geom_smooth() + xlab("Experimenter-expert class. agreement (EEC)")+ ylab("OBO Foundry Coverage (%)"))

ggsave("cor_bfocov_eec.jpg",width = 3,height = 2)

cor(do$EES,do$COV)
[1] 0.4874226
cormat2 <- round(cor(do[c("EEC","EES","MAC","IES","SE","DA","COV")]),2)

kable(cormat2,row.names = FALSE)
EEC EES MAC IES SE DA COV
1.00 0.91 0.72 0.66 0.72 -0.68 0.52
0.91 1.00 0.80 0.79 0.74 -0.68 0.49
0.72 0.80 1.00 0.95 0.97 -0.93 0.44
0.66 0.79 0.95 1.00 0.87 -0.84 0.37
0.72 0.74 0.97 0.87 1.00 -0.99 0.42
-0.68 -0.68 -0.93 -0.84 -0.99 1.00 -0.45
0.52 0.49 0.44 0.37 0.42 -0.45 1.00
kable(do,row.names = FALSE,digits = c(2))
concept EEC EES MAC IES SE DA CON COV
quality 0.69 0.76 0.69 0.76 0.68 2.50 4.20 27.94
disposition 1.00 1.00 1.00 1.00 1.00 1.00 4.25 26.47
role 1.00 1.00 1.00 1.00 1.00 1.00 4.38 23.53
function 0.38 0.50 0.62 0.70 0.68 2.00 4.38 19.85
generically dependent continuant 0.79 0.86 0.79 0.86 0.73 2.33 4.29 18.38
site 0.69 0.81 0.69 0.81 0.56 3.50 3.94 14.71
spatial region 0.00 0.56 0.69 0.81 0.66 2.50 4.00 13.97
object 0.94 0.96 0.94 0.96 0.91 1.50 4.44 13.24
object aggregate 1.00 1.00 1.00 1.00 1.00 1.00 4.44 13.24
one dimensional region 0.38 0.63 0.44 0.69 0.35 4.50 3.75 12.50
three dimensional region 0.31 0.61 0.50 0.68 0.47 3.50 4.13 12.50
two dimensional region 0.12 0.46 0.56 0.76 0.49 3.50 3.73 12.50
zero dimensional region 0.56 0.76 0.56 0.76 0.55 3.00 4.12 12.50
process boundary 1.00 1.00 1.00 1.00 1.00 1.00 4.56 11.76
fiat object part 0.12 0.60 0.69 0.84 0.62 3.00 3.88 10.29
object boundary 0.56 0.82 0.62 0.83 0.59 3.00 3.75 10.29
process 0.94 0.98 0.94 0.98 0.91 1.50 4.44 1.47
process aggregate 0.06 0.66 0.88 0.93 0.82 2.00 4.31 0.74
processual context 0.25 0.39 0.25 0.39 0.42 4.00 3.00 0.74
scattered spatiotemporal region 0.00 0.24 0.44 0.72 0.45 3.50 3.44 0.74
spatiotemporal instant 0.19 0.47 0.38 0.57 0.34 4.50 3.21 0.74
spatiotemporal interval 0.00 0.18 0.56 0.68 0.59 3.00 3.15 0.74
temporal interval 0.50 0.77 0.56 0.79 0.51 3.50 4.44 0.74
#print(xtable(do[c("concept","EEC","EES","MAC","IES","CON","COV")],digits=c(2)),include.rownames=FALSE)

Comments for concepts with low EEC

com<-merge(df_full_res[df_full_res$EEC==0,c("concept","EEC","expert")],df_commments[c("concept","answer.question","answer.comment")])
kable(com,row.names = FALSE,digits = c(2))
concept EEC expert answer.question answer.comment
clubbing 0 process aggregate process I’m less confident in how people use the word clubbing , but I think there’s a good case to say that clubbing on a particular evening is a process with participants including the people and the clubs, and with bone fide boundaries such as leaving the home and returning to the home.
clubbing 0 process aggregate process Maybe a process aggregate?
clubbing 0 process aggregate process At least there is such a process. But what you are putting in quotes is somewhat distracting.
flood plain 0 spatiotemporal interval site The plain is a site. The part that’s flooded could also be a good site. To capture the example, I would try to work with an aggregate of those sites.
patches on the floor 0 scattered spatiotemporal region object aggregate Could be object aggregate under the interpretation that what is being referred to is the water parts of the patches. However another plausible interpretation is that the reference is to the combination of water and floor. So then an independent continuants that is a sum object and fiat parts
patches on the floor 0 scattered spatiotemporal region site In the example, there are bona fide boundaries. Otherwise, they can be fiat object parts.
space 0 spatial region site This is the most confusing question so far. A throne room is a site. It is located in a spatial region. The phrase used to be indicates that we’re talking about that spatial region across time, and so a spatiotemporal region. The use of the space makes me think of a currently existing room or building, which would be a new site that happens to be located in the spatial region (now) that corresponds to the spatial region in Hadrian’s time. But space alone would make me think of the spatial region itself.
space 0 spatial region site There could also be arguments in favour of using three-dimensional region here. I don’t see much use of making this distinction.
surface of pool table 0 two dimensional region object boundary in the sense of geometrical surface. If the material is meant, then it would be a fiat object part.
surface of pool table 0 two dimensional region object Language is vague. I see 3 possible interpretations. 1) The material that makes up what is called the surface, namely the felt, in some kinds of pool tables. 2) part of an object boundary. Actually I’m not sure whether all parts of an object boundary are object boundaries themselves. 3) a siteAll 3 are independent continuants, so you could confidently assign that class. I don’t do that because I don’t find it particularly useful to know just that something is an independent continuant.
tourist area 0 fiat object part role A role that inheres in some object or site.
tourist area 0 fiat object part site Again, may be confounded with 3D region
tourist area 0 fiat object part site A particular area of a city is a site.
tourist area 0 fiat object part site should really be site with a role
uncovered parts of the beach 0 scattered spatiotemporal region fiat object part there might be arguments for site
vacation location 0 spatial region site Geographic locators tend to be ambiguous in language as to whether the site is being referred to or the building or other material occupiers of the site. It is a site rather than a spatial region because BFO’s notion of spatial region is that of a newtonian fixed space and on that view earth and places on earth move through different parts of space moment to moment.
vacation location 0 spatial region site There could also be arguments in favour of using three-dimensional region here. I don’t see much use of making this distinction.
vacation location 0 spatial region site Parts of the world that you would vacation to, such as Paris or a particular resort, are sites in BFO, but there’s plenty of room for confusion with BFO spatial regions.
vacation location 0 spatial region three dimensional region may be a site

Difficult suparts analysis

df_diff<-do[c("concept","MAC","EEC")]
df_diff$MACEECDIFF<-1-(df_diff$MAC-df_diff$EEC)
df_ch<-df_obo_ch[c("sub_l","super_l")]

# add subclass of self back in
x<-df_ch
x$super_l2<-x$sub_l
x<-unique(x[c("super_l2","sub_l")])
names(x)<-c("super_l","sub_l")
df_ch<-rbind(df_ch,x)

#merge to get big frame with all subclasses and their difficulty
## First hierarchy level
df_diff_ch<-merge(df_ch,df_diff,by.x = "sub_l",by.y = "concept",all.x = TRUE)
df_diff_x<-aggregate(cbind(df_diff_ch$MAC,df_diff_ch$EEC,df_diff_ch$MACEECDIFF),by=list(df_diff_ch$super_l),FUN=function(x) mean(x,na.rm = TRUE))
## Second hierarchy level
df_diff_ch<-merge(df_ch,df_diff_x,by.x = "sub_l",by.y = "Group.1",all.x = TRUE)
df_diff_x<-aggregate(cbind(df_diff_ch$V1,df_diff_ch$V2,df_diff_ch$V3),by=list(df_diff_ch$super_l),FUN=function(x) mean(x,na.rm = TRUE))

## Third level
df_diff_ch<-merge(df_ch,df_diff_x,by.x = "sub_l",by.y = "Group.1",all.x = TRUE)
df_diff_x<-aggregate(cbind(df_diff_ch$V1,df_diff_ch$V2,df_diff_ch$V3),by=list(df_diff_ch$super_l),FUN=function(x) mean(x,na.rm = TRUE))

## Fourth level
df_diff_ch<-merge(df_ch,df_diff_x,by.x = "sub_l",by.y = "Group.1",all.x = TRUE)
df_diff_x<-aggregate(cbind(df_diff_ch$V1,df_diff_ch$V2,df_diff_ch$V3),by=list(df_diff_ch$super_l),FUN=function(x) mean(x,na.rm = TRUE))

#View(df_diff_x)
names(df_diff_x)<-c("concept","MAC","EEC","MACEECDIFF")

asis_output("### EEC")

EEC

#pdf("graph_bfo_eec.pdf", width=10, height=7)

plot_hierarchy(plot_prepare_df(df_diff_x,vlabel = "concept",vvalue = "EEC"),df_obo_ch,circular = TRUE)

#dev.off()

asis_output("### MAC")

MAC

plot_hierarchy(plot_prepare_df(df_diff_x,vlabel = "concept",vvalue = "MAC"),df_obo_ch,circular = TRUE)

#pdf("graph_bfo_eecmac.pdf", width=10, height=7)
asis_output("### Difference of EEC and MAC")

Difference of EEC and MAC

plot_hierarchy(plot_prepare_df(df_diff_x,vlabel = "concept",vvalue = "MACEECDIFF"),df_obo_ch,circular = TRUE)

#dev.off()

Robert Stevens, Phil Lord, James Malone, Nicolas Matentzoglu

2018-07-04