library(tidyverse)
library(kableExtra)
library(ggplot2)
library(mdatools)
#library(ggbiplot)
library(plotly)
library(stringr)




df_detrend = read.csv('Predizione_immagini_detrend.csv', sep = ';')
df_derivata = read.csv('Predizione_immagini_derivata.csv', sep = ';')


df_detrend$perc_Adulterant = 1-df_detrend$perc_origano

Conteggio campioni




colnames(df_detrend) = c('ID', 'Adulterant', 'Predicted_Oregano', 'NA', 'Contaminant', 'Type', 'Actual_oregano', 'Actual_adulterant')

df_detrend$Type = str_replace(df_detrend$Type, 'Miscela', 'Mix')
df_detrend$Type = str_replace(df_detrend$Type, 'Adulterante', 'Adulterant')
df_detrend$Type = str_replace(df_detrend$Type, 'Origano', 'Oregano')

p = ggplot(df_detrend, aes(Type, fill = Contaminant))+geom_bar()+
  theme_minimal()+
  ylab('')+
  xlab('')

ggplotly(p)

Detrend


kable(df_detrend, row.names = T) %>% kable_styling('striped', fixed_thead = T, full_width = FALSE) %>%scroll_box( height = "500px")
ID Adulterant Predicted_Oregano NA Contaminant Type Actual_oregano Actual_adulterant
1 P_A_01_R1 0.0748 0.9241 0.0011 Sumac Mix 0.950 0.050
2 P_A_01_R2 0.0459 0.9530 0.0011 Sumac Mix 0.950 0.050
3 P_A_01_R3 0.0543 0.9457 0.0000 Sumac Mix 0.950 0.050
4 P_A_02_R1 0.1219 0.8765 0.0016 Sumac Mix 0.900 0.100
5 P_A_02_R2 0.1173 0.8816 0.0011 Sumac Mix 0.900 0.100
6 P_A_02_R3 0.1208 0.8782 0.0010 Sumac Mix 0.900 0.100
7 P_A_03_R1 0.0820 0.9174 0.0006 Sumac Mix 0.950 0.050
8 P_A_03_R2 0.0532 0.9454 0.0014 Sumac Mix 0.950 0.050
9 P_A_03_R3 0.0617 0.9379 0.0004 Sumac Mix 0.950 0.050
10 P_A_04_R1 0.1392 0.8595 0.0013 Sumac Mix 0.900 0.100
11 P_A_04_R2 0.1176 0.8803 0.0021 Sumac Mix 0.900 0.100
12 P_A_04_R3 0.1177 0.8798 0.0025 Sumac Mix 0.900 0.100
13 P_A_05_R1 0.8759 0.1236 0.0004 Myrtle Adulterant 0.000 1.000
14 P_A_05_R2 0.8561 0.1437 0.0003 Myrtle Adulterant 0.000 1.000
15 P_A_05_R3 0.8522 0.1450 0.0029 Myrtle Adulterant 0.000 1.000
16 P_A_06_R1 0.9539 0.0448 0.0013 Olive Adulterant 0.000 1.000
17 P_A_06_R2 0.9250 0.0671 0.0079 Olive Adulterant 0.000 1.000
18 P_A_06_R3 0.9124 0.0808 0.0068 Olive Adulterant 0.000 1.000
19 P_A_07_R1 0.9850 0.0132 0.0018 Strawberry Adulterant 0.000 1.000
20 P_A_07_R2 0.9912 0.0054 0.0035 Strawberry Adulterant 0.000 1.000
21 P_A_07_R3 0.9926 0.0073 0.0002 Strawberry Adulterant 0.000 1.000
22 P_A_08_R1 0.9831 0.0142 0.0027 Sumac Adulterant 0.000 1.000
23 P_A_08_R2 0.9713 0.0243 0.0044 Sumac Adulterant 0.000 1.000
24 P_A_08_R3 0.9753 0.0154 0.0093 Sumac Adulterant 0.000 1.000
25 P_A_09_R1 0.0681 0.9319 0.0000 Infiorescenze Oregano 1.000 0.000
26 P_A_09_R2 0.0792 0.9203 0.0005 Infiorescenze Oregano 1.000 0.000
27 P_A_09_R3 0.0662 0.9330 0.0008 Infiorescenze Oregano 1.000 0.000
28 P_A_10_R1 0.0052 0.9943 0.0005 Not Refined Oregano 1.000 0.000
29 P_A_10_R2 0.0126 0.9863 0.0011 Not Refined Oregano 1.000 0.000
30 P_A_10_R3 0.0063 0.9937 0.0000 Not Refined Oregano 1.000 0.000
31 P_M_48_01_R1 0.1686 0.8314 0.0000 Strawberry Mix 0.800 0.200
32 P_M_48_01_R2 0.1507 0.8493 0.0000 Strawberry Mix 0.800 0.200
33 P_M_48_01_R3 0.1577 0.8421 0.0001 Strawberry Mix 0.800 0.200
34 P_M_48_02_R1 0.3291 0.6709 0.0000 Strawberry Mix 0.700 0.300
35 P_M_48_02_R2 0.3836 0.6164 0.0000 Strawberry Mix 0.700 0.300
36 P_M_48_02_R3 0.3549 0.6425 0.0026 Strawberry Mix 0.700 0.300
37 P_M_48_03_R1 0.0906 0.9064 0.0030 Sumac Mix 0.800 0.200
38 P_M_48_03_R2 0.1026 0.8949 0.0025 Sumac Mix 0.800 0.200
39 P_M_48_03_R3 0.0901 0.9073 0.0026 Sumac Mix 0.800 0.200
40 P_M_48_04_R1 0.4463 0.5537 0.0000 Myrtle Mix 0.400 0.600
41 P_M_48_04_R2 0.4244 0.5756 0.0000 Myrtle Mix 0.400 0.600
42 P_M_48_04_R3 0.4252 0.5748 0.0000 Myrtle Mix 0.400 0.600
43 P_M_48_05_R1 0.1061 0.8930 0.0010 Olive Mix 0.700 0.300
44 P_M_48_05_R2 0.0939 0.9060 0.0002 Olive Mix 0.700 0.300
45 P_M_48_05_R3 0.1186 0.8814 0.0000 Olive Mix 0.700 0.300
46 P_M_48_06_R1 0.1912 0.8088 0.0000 Strawberry Mix 0.800 0.200
47 P_M_48_06_R2 0.1942 0.8058 0.0000 Strawberry Mix 0.800 0.200
48 P_M_48_06_R3 0.1593 0.8404 0.0003 Strawberry Mix 0.800 0.200
49 P_M_48_07_R1 0.3606 0.6385 0.0009 Strawberry Mix 0.600 0.400
50 P_M_48_07_R2 0.2927 0.7057 0.0016 Strawberry Mix 0.600 0.400
51 P_M_48_07_R3 0.3677 0.6304 0.0019 Strawberry Mix 0.600 0.400
52 P_M_48_08_R1 0.3597 0.6390 0.0013 Olive Mix 0.600 0.400
53 P_M_48_08_R2 0.3171 0.6827 0.0001 Olive Mix 0.600 0.400
54 P_M_48_08_R3 0.3045 0.6955 0.0000 Olive Mix 0.600 0.400
55 P_M_48_14_R1 0.2169 0.7830 0.0001 Olive Mix 0.800 0.200
56 P_M_48_14_R2 0.2159 0.7839 0.0001 Olive Mix 0.800 0.200
57 P_M_48_14_R3 0.2377 0.7594 0.0030 Olive Mix 0.800 0.200
58 P_M_48_16_R1 0.0045 0.9951 0.0004 Other Mix 0.940 0.060
59 P_M_48_16_R2 0.0016 0.9984 0.0000 Other Mix 0.940 0.060
60 P_M_48_16_R3 0.0068 0.9909 0.0023 Other Mix 0.940 0.060
61 P_M_48_17_R1 0.0026 0.9973 0.0002 Other Mix 0.985 0.015
62 P_M_48_17_R2 0.0038 0.9947 0.0015 Other Mix 0.985 0.015
63 P_M_48_17_R3 0.0014 0.9981 0.0004 Other Mix 0.985 0.015
64 P_M_48_29_R1 0.0915 0.9085 0.0000 Myrtle Mix 0.900 0.100
65 P_M_48_29_R2 0.0970 0.9030 0.0000 Myrtle Mix 0.900 0.100
66 P_M_48_29_R3 0.0940 0.9060 0.0000 Myrtle Mix 0.900 0.100
67 P_M_48_36_R1 0.1236 0.8750 0.0015 Sumac Mix 0.900 0.100
68 P_M_48_36_R2 0.1696 0.8298 0.0006 Sumac Mix 0.900 0.100
69 P_M_48_36_R3 0.1705 0.8294 0.0001 Sumac Mix 0.900 0.100
70 P_O_47_01_R1 0.0231 0.9769 0.0000 Oregano Oregano 1.000 0.000
71 P_O_47_01_R2 0.0324 0.9676 0.0000 Oregano Oregano 1.000 0.000
72 P_O_47_01_R3 0.0390 0.9600 0.0010 Oregano Oregano 1.000 0.000
73 P_O_47_02_R1 0.0148 0.9852 0.0000 Oregano Oregano 1.000 0.000
74 P_O_47_02_R2 0.0105 0.9894 0.0001 Oregano Oregano 1.000 0.000
75 P_O_47_02_R3 0.0107 0.9893 0.0000 Oregano Oregano 1.000 0.000
76 P_O_47_03_R1 0.0254 0.9746 0.0000 Oregano Oregano 1.000 0.000
77 P_O_47_03_R2 0.0184 0.9815 0.0002 Oregano Oregano 1.000 0.000
78 P_O_47_03_R3 0.0147 0.9853 0.0000 Oregano Oregano 1.000 0.000
79 P_O_47_04_R1 0.0476 0.9524 0.0000 Oregano Oregano 1.000 0.000
80 P_O_47_04_R2 0.0648 0.9348 0.0004 Oregano Oregano 1.000 0.000
81 P_O_47_04_R3 0.0731 0.9269 0.0000 Oregano Oregano 1.000 0.000
82 P_O_47_05_R1 0.0073 0.9927 0.0000 Oregano Oregano 1.000 0.000
83 P_O_47_05_R2 0.0123 0.9877 0.0000 Oregano Oregano 1.000 0.000
84 P_O_47_05_R3 0.0132 0.9868 0.0000 Oregano Oregano 1.000 0.000
85 P_O_47_06_R1 0.0612 0.9388 0.0000 Oregano Oregano 1.000 0.000
86 P_O_47_06_R2 0.0519 0.9481 0.0000 Oregano Oregano 1.000 0.000
87 P_O_47_06_R3 0.0608 0.9392 0.0000 Oregano Oregano 1.000 0.000
88 P_O_47_07_R1 0.0224 0.9773 0.0003 Oregano Oregano 1.000 0.000
89 P_O_47_07_R2 0.0163 0.9837 0.0000 Oregano Oregano 1.000 0.000
90 P_O_47_07_R3 0.0145 0.9855 0.0000 Oregano Oregano 1.000 0.000
91 P_O_47_08_R1 0.0811 0.9189 0.0000 Oregano Oregano 1.000 0.000
92 P_O_47_08_R2 0.0904 0.9096 0.0000 Oregano Oregano 1.000 0.000
93 P_O_47_08_R3 0.0700 0.9300 0.0000 Oregano Oregano 1.000 0.000
94 P_O_47_09_R1 0.0155 0.9840 0.0004 Oregano Oregano 1.000 0.000
95 P_O_47_09_R2 0.0195 0.9805 0.0000 Oregano Oregano 1.000 0.000
96 P_O_47_09_R3 0.0143 0.9848 0.0010 Oregano Oregano 1.000 0.000
97 P_O_47_10_R1 0.0361 0.9639 0.0000 Oregano Oregano 1.000 0.000
98 P_O_47_10_R2 0.0521 0.9479 0.0000 Oregano Oregano 1.000 0.000
99 P_O_47_10_R3 0.0502 0.9498 0.0000 Oregano Oregano 1.000 0.000
100 P_O_47_11_R1 0.0665 0.9335 0.0000 Oregano Oregano 1.000 0.000
101 P_O_47_11_R2 0.0734 0.9266 0.0000 Oregano Oregano 1.000 0.000
102 P_O_47_11_R3 0.0765 0.9235 0.0000 Oregano Oregano 1.000 0.000
103 P_O_47_12_R1 0.0415 0.9585 0.0000 Oregano Oregano 1.000 0.000
104 P_O_47_12_R2 0.0476 0.9524 0.0000 Oregano Oregano 1.000 0.000
105 P_O_47_12_R3 0.0466 0.9534 0.0000 Oregano Oregano 1.000 0.000
106 P_O_47_13_R1 0.0117 0.9883 0.0000 Oregano Oregano 1.000 0.000
107 P_O_47_13_R2 0.0161 0.9839 0.0000 Oregano Oregano 1.000 0.000
108 P_O_47_13_R3 0.0136 0.9864 0.0000 Oregano Oregano 1.000 0.000
109 P_O_47_14_R1 0.0036 0.9964 0.0000 Oregano Oregano 1.000 0.000
110 P_O_47_14_R2 0.0037 0.9963 0.0000 Oregano Oregano 1.000 0.000
111 P_O_47_14_R3 0.0037 0.9963 0.0000 Oregano Oregano 1.000 0.000
112 P_O_47_15_R1 0.3579 0.6408 0.0013 Infiorescenze Oregano 1.000 0.000
113 P_O_47_15_R2 0.3463 0.6536 0.0002 Infiorescenze Oregano 1.000 0.000
114 P_O_47_15_R3 0.3283 0.6708 0.0009 Infiorescenze Oregano 1.000 0.000
115 P_O_47_16_R1 0.0437 0.9563 0.0000 Oregano Oregano 1.000 0.000
116 P_O_47_16_R2 0.0189 0.9811 0.0000 Oregano Oregano 1.000 0.000
117 P_O_47_16_R3 0.0297 0.9703 0.0000 Oregano Oregano 1.000 0.000
118 P_O_47_17_R1 0.0434 0.9566 0.0000 Oregano Oregano 1.000 0.000
119 P_O_47_17_R2 0.0903 0.9097 0.0000 Oregano Oregano 1.000 0.000
120 P_O_47_17_R3 0.0632 0.9367 0.0001 Oregano Oregano 1.000 0.000
121 P_O_47_18_R1 0.0259 0.9741 0.0000 Oregano Oregano 1.000 0.000
122 P_O_47_18_R2 0.0242 0.9758 0.0000 Oregano Oregano 1.000 0.000
123 P_O_47_18_R3 0.0197 0.9803 0.0000 Oregano Oregano 1.000 0.000
124 P_O_47_19_R1 0.0602 0.9392 0.0006 Oregano Oregano 1.000 0.000
125 P_O_47_19_R2 0.0465 0.9531 0.0004 Oregano Oregano 1.000 0.000
126 P_O_47_19_R3 0.0822 0.9178 0.0000 Oregano Oregano 1.000 0.000
127 P_O_47_20_R1 0.0140 0.9772 0.0088 Oregano Oregano 1.000 0.000
128 P_O_47_20_R2 0.0172 0.9826 0.0002 Oregano Oregano 1.000 0.000
129 P_O_47_20_R3 0.0114 0.9886 0.0000 Oregano Oregano 1.000 0.000
130 P_O_47_21_R1 0.0219 0.9781 0.0000 Oregano Oregano 1.000 0.000
131 P_O_47_21_R2 0.0244 0.9756 0.0000 Oregano Oregano 1.000 0.000
132 P_O_47_21_R3 0.0397 0.9603 0.0000 Oregano Oregano 1.000 0.000
133 P_O_47_22_R1 0.0373 0.9626 0.0001 Oregano Oregano 1.000 0.000
134 P_O_47_22_R2 0.0236 0.9764 0.0000 Oregano Oregano 1.000 0.000
135 P_O_47_22_R3 0.0251 0.9748 0.0001 Oregano Oregano 1.000 0.000
136 P_O_47_23_R1 0.0014 0.9986 0.0000 Oregano Oregano 1.000 0.000
137 P_O_47_23_R2 0.0018 0.9973 0.0010 Oregano Oregano 1.000 0.000
138 P_O_47_23_R3 0.0019 0.9978 0.0003 Oregano Oregano 1.000 0.000
139 P_O_47_24_R1 0.0413 0.9587 0.0000 Oregano Oregano 1.000 0.000
140 P_O_47_24_R2 0.0339 0.9659 0.0003 Oregano Oregano 1.000 0.000
141 P_O_47_24_R3 0.0359 0.9638 0.0003 Oregano Oregano 1.000 0.000



Percentuale di Oregano


df_detrend1 = df_detrend[df_detrend$Type != 'Adulterant',]


p = ggplot(df_detrend1, aes(Predicted_Oregano, fill = Type))+
  geom_histogram(bins = 40, position = position_dodge2())+
  scale_fill_manual(values = c('deepskyblue3', 'forestgreen'))+
  theme_bw()+
  xlab('Predicted oregano %')+
  ylab('')+
  scale_y_continuous(breaks=seq(0,24,2))+
  scale_x_continuous(breaks = seq(0.5,1,0.05))
 
ggplotly(p)


p = ggplot(df_detrend, aes(x = Contaminant,y= Predicted_Oregano, color = Type, text = ID))+geom_boxplot()+
  scale_color_manual(values = c('brown3','deepskyblue4', 'forestgreen'))+
  theme_bw()+
  xlab('')+
  ylab('Predicted % oregano')

ggplotly(p)


p = ggplot(df_detrend, aes(x = Actual_oregano,y = Predicted_Oregano, color = Type, class = Contaminant, text= ID))+
  scale_color_manual(values = c('brown3','deepskyblue4', 'forestgreen'))+
  geom_point()+
  theme_bw()+
  xlab('Actual % oregano')+
  ylab('Predicted % oregano')
  
ggplotly(p)


p = ggplot(df_detrend, aes(x = Actual_oregano,y = Predicted_Oregano, color = Contaminant, class = Type, text= ID))+
  geom_point()+
  theme_bw()+
  xlab('Actual % oregano')+
  ylab('Predicted % oregano')

ggplotly(p)



Ricerca di un valore soglia

Selezione delle immagini appartenenti alle repliche 1 e 2, ovvero quelle che hanno costuito il training set, escludendo i campioni di Adulterant, Oregano not refined, infiorescenze e other.


df1 = df_detrend[(df_detrend$Type == 'Oregano' & df_detrend$Contaminant == 'Oregano'),] 


dfO3 = df1[str_detect(df1$ID, 'R3'),]
df1 = df1[str_detect(df1$ID, 'R2') |str_detect(df1$ID, 'R1') ,]
dfmisc = df_detrend[(df_detrend$Type == 'Mix' & df_detrend$Contaminant != 'Other'),]




kable(df1, row.names = T) %>% kable_styling('striped', fixed_thead = T, full_width = FALSE) %>%
  scroll_box( height = "500px")
ID Adulterant Predicted_Oregano NA Contaminant Type Actual_oregano Actual_adulterant
70 P_O_47_01_R1 0.0231 0.9769 0.0000 Oregano Oregano 1 0
71 P_O_47_01_R2 0.0324 0.9676 0.0000 Oregano Oregano 1 0
73 P_O_47_02_R1 0.0148 0.9852 0.0000 Oregano Oregano 1 0
74 P_O_47_02_R2 0.0105 0.9894 0.0001 Oregano Oregano 1 0
76 P_O_47_03_R1 0.0254 0.9746 0.0000 Oregano Oregano 1 0
77 P_O_47_03_R2 0.0184 0.9815 0.0002 Oregano Oregano 1 0
79 P_O_47_04_R1 0.0476 0.9524 0.0000 Oregano Oregano 1 0
80 P_O_47_04_R2 0.0648 0.9348 0.0004 Oregano Oregano 1 0
82 P_O_47_05_R1 0.0073 0.9927 0.0000 Oregano Oregano 1 0
83 P_O_47_05_R2 0.0123 0.9877 0.0000 Oregano Oregano 1 0
85 P_O_47_06_R1 0.0612 0.9388 0.0000 Oregano Oregano 1 0
86 P_O_47_06_R2 0.0519 0.9481 0.0000 Oregano Oregano 1 0
88 P_O_47_07_R1 0.0224 0.9773 0.0003 Oregano Oregano 1 0
89 P_O_47_07_R2 0.0163 0.9837 0.0000 Oregano Oregano 1 0
91 P_O_47_08_R1 0.0811 0.9189 0.0000 Oregano Oregano 1 0
92 P_O_47_08_R2 0.0904 0.9096 0.0000 Oregano Oregano 1 0
94 P_O_47_09_R1 0.0155 0.9840 0.0004 Oregano Oregano 1 0
95 P_O_47_09_R2 0.0195 0.9805 0.0000 Oregano Oregano 1 0
97 P_O_47_10_R1 0.0361 0.9639 0.0000 Oregano Oregano 1 0
98 P_O_47_10_R2 0.0521 0.9479 0.0000 Oregano Oregano 1 0
100 P_O_47_11_R1 0.0665 0.9335 0.0000 Oregano Oregano 1 0
101 P_O_47_11_R2 0.0734 0.9266 0.0000 Oregano Oregano 1 0
103 P_O_47_12_R1 0.0415 0.9585 0.0000 Oregano Oregano 1 0
104 P_O_47_12_R2 0.0476 0.9524 0.0000 Oregano Oregano 1 0
106 P_O_47_13_R1 0.0117 0.9883 0.0000 Oregano Oregano 1 0
107 P_O_47_13_R2 0.0161 0.9839 0.0000 Oregano Oregano 1 0
109 P_O_47_14_R1 0.0036 0.9964 0.0000 Oregano Oregano 1 0
110 P_O_47_14_R2 0.0037 0.9963 0.0000 Oregano Oregano 1 0
115 P_O_47_16_R1 0.0437 0.9563 0.0000 Oregano Oregano 1 0
116 P_O_47_16_R2 0.0189 0.9811 0.0000 Oregano Oregano 1 0
118 P_O_47_17_R1 0.0434 0.9566 0.0000 Oregano Oregano 1 0
119 P_O_47_17_R2 0.0903 0.9097 0.0000 Oregano Oregano 1 0
121 P_O_47_18_R1 0.0259 0.9741 0.0000 Oregano Oregano 1 0
122 P_O_47_18_R2 0.0242 0.9758 0.0000 Oregano Oregano 1 0
124 P_O_47_19_R1 0.0602 0.9392 0.0006 Oregano Oregano 1 0
125 P_O_47_19_R2 0.0465 0.9531 0.0004 Oregano Oregano 1 0
127 P_O_47_20_R1 0.0140 0.9772 0.0088 Oregano Oregano 1 0
128 P_O_47_20_R2 0.0172 0.9826 0.0002 Oregano Oregano 1 0
130 P_O_47_21_R1 0.0219 0.9781 0.0000 Oregano Oregano 1 0
131 P_O_47_21_R2 0.0244 0.9756 0.0000 Oregano Oregano 1 0
133 P_O_47_22_R1 0.0373 0.9626 0.0001 Oregano Oregano 1 0
134 P_O_47_22_R2 0.0236 0.9764 0.0000 Oregano Oregano 1 0
136 P_O_47_23_R1 0.0014 0.9986 0.0000 Oregano Oregano 1 0
137 P_O_47_23_R2 0.0018 0.9973 0.0010 Oregano Oregano 1 0
139 P_O_47_24_R1 0.0413 0.9587 0.0000 Oregano Oregano 1 0
140 P_O_47_24_R2 0.0339 0.9659 0.0003 Oregano Oregano 1 0


dfperc = quantile(df1$Predicted_Oregano, probs = c(0, 0.03, 0.05 , 0.1 , 0.15, 0.2)) %>% data.frame()
colnames(dfperc) = 'Percentile'
dfperc = dfperc %>% data.frame()
dfperc %>% kable(row.names = T) %>% kable_paper('striped', fixed_thead = T, full_width = FALSE)
Percentile
0% 0.909600
3% 0.912920
5% 0.920825
10% 0.934150
15% 0.939100
20% 0.948100
soglia0 = min(df1$Predicted_Oregano)
soglia05 = quantile(df1$Predicted_Oregano, probs = 0.05)
soglia10 = quantile(df1$Predicted_Oregano, probs = 0.10)



Relativamente alla percentuale di Oregano predetta dal modello, per la classe ‘Oregano’ il valore più basso è 0.9096.


Si valutano i dati relativi alle immagini del test set più quelli delle miscele.


soglia = soglia0

dftest = rbind(dfmisc, dfO3)
#write.csv(dftest, "dftest.csv", row.names=FALSE)

dftest$Oregano_test = with(dftest, ifelse(Type == 'Oregano' & Predicted_Oregano > soglia, 'TP',ifelse(Type == 'Mix' & Predicted_Oregano > soglia, 'FP', ifelse(Type == 'Oregano' & Predicted_Oregano < soglia, 'FN','TN'))))
dftest$misc_test = with(dftest, ifelse(Type == 'Mix' & Predicted_Oregano < soglia, 'TP',ifelse(Type == 'Oregano' & Predicted_Oregano < soglia, 'FP', ifelse(Type == 'Mix' & Predicted_Oregano > soglia, 'FN','TN'))))



kable(dftest, row.names = T) %>% kable_styling('striped', fixed_thead = T, full_width = FALSE) %>%
  scroll_box( height = "500px")
ID Adulterant Predicted_Oregano NA Contaminant Type Actual_oregano Actual_adulterant Oregano_test misc_test
1 P_A_01_R1 0.0748 0.9241 0.0011 Sumac Mix 0.95 0.05 FP FN
2 P_A_01_R2 0.0459 0.9530 0.0011 Sumac Mix 0.95 0.05 FP FN
3 P_A_01_R3 0.0543 0.9457 0.0000 Sumac Mix 0.95 0.05 FP FN
4 P_A_02_R1 0.1219 0.8765 0.0016 Sumac Mix 0.90 0.10 TN TP
5 P_A_02_R2 0.1173 0.8816 0.0011 Sumac Mix 0.90 0.10 TN TP
6 P_A_02_R3 0.1208 0.8782 0.0010 Sumac Mix 0.90 0.10 TN TP
7 P_A_03_R1 0.0820 0.9174 0.0006 Sumac Mix 0.95 0.05 FP FN
8 P_A_03_R2 0.0532 0.9454 0.0014 Sumac Mix 0.95 0.05 FP FN
9 P_A_03_R3 0.0617 0.9379 0.0004 Sumac Mix 0.95 0.05 FP FN
10 P_A_04_R1 0.1392 0.8595 0.0013 Sumac Mix 0.90 0.10 TN TP
11 P_A_04_R2 0.1176 0.8803 0.0021 Sumac Mix 0.90 0.10 TN TP
12 P_A_04_R3 0.1177 0.8798 0.0025 Sumac Mix 0.90 0.10 TN TP
31 P_M_48_01_R1 0.1686 0.8314 0.0000 Strawberry Mix 0.80 0.20 TN TP
32 P_M_48_01_R2 0.1507 0.8493 0.0000 Strawberry Mix 0.80 0.20 TN TP
33 P_M_48_01_R3 0.1577 0.8421 0.0001 Strawberry Mix 0.80 0.20 TN TP
34 P_M_48_02_R1 0.3291 0.6709 0.0000 Strawberry Mix 0.70 0.30 TN TP
35 P_M_48_02_R2 0.3836 0.6164 0.0000 Strawberry Mix 0.70 0.30 TN TP
36 P_M_48_02_R3 0.3549 0.6425 0.0026 Strawberry Mix 0.70 0.30 TN TP
37 P_M_48_03_R1 0.0906 0.9064 0.0030 Sumac Mix 0.80 0.20 TN TP
38 P_M_48_03_R2 0.1026 0.8949 0.0025 Sumac Mix 0.80 0.20 TN TP
39 P_M_48_03_R3 0.0901 0.9073 0.0026 Sumac Mix 0.80 0.20 TN TP
40 P_M_48_04_R1 0.4463 0.5537 0.0000 Myrtle Mix 0.40 0.60 TN TP
41 P_M_48_04_R2 0.4244 0.5756 0.0000 Myrtle Mix 0.40 0.60 TN TP
42 P_M_48_04_R3 0.4252 0.5748 0.0000 Myrtle Mix 0.40 0.60 TN TP
43 P_M_48_05_R1 0.1061 0.8930 0.0010 Olive Mix 0.70 0.30 TN TP
44 P_M_48_05_R2 0.0939 0.9060 0.0002 Olive Mix 0.70 0.30 TN TP
45 P_M_48_05_R3 0.1186 0.8814 0.0000 Olive Mix 0.70 0.30 TN TP
46 P_M_48_06_R1 0.1912 0.8088 0.0000 Strawberry Mix 0.80 0.20 TN TP
47 P_M_48_06_R2 0.1942 0.8058 0.0000 Strawberry Mix 0.80 0.20 TN TP
48 P_M_48_06_R3 0.1593 0.8404 0.0003 Strawberry Mix 0.80 0.20 TN TP
49 P_M_48_07_R1 0.3606 0.6385 0.0009 Strawberry Mix 0.60 0.40 TN TP
50 P_M_48_07_R2 0.2927 0.7057 0.0016 Strawberry Mix 0.60 0.40 TN TP
51 P_M_48_07_R3 0.3677 0.6304 0.0019 Strawberry Mix 0.60 0.40 TN TP
52 P_M_48_08_R1 0.3597 0.6390 0.0013 Olive Mix 0.60 0.40 TN TP
53 P_M_48_08_R2 0.3171 0.6827 0.0001 Olive Mix 0.60 0.40 TN TP
54 P_M_48_08_R3 0.3045 0.6955 0.0000 Olive Mix 0.60 0.40 TN TP
55 P_M_48_14_R1 0.2169 0.7830 0.0001 Olive Mix 0.80 0.20 TN TP
56 P_M_48_14_R2 0.2159 0.7839 0.0001 Olive Mix 0.80 0.20 TN TP
57 P_M_48_14_R3 0.2377 0.7594 0.0030 Olive Mix 0.80 0.20 TN TP
64 P_M_48_29_R1 0.0915 0.9085 0.0000 Myrtle Mix 0.90 0.10 TN TP
65 P_M_48_29_R2 0.0970 0.9030 0.0000 Myrtle Mix 0.90 0.10 TN TP
66 P_M_48_29_R3 0.0940 0.9060 0.0000 Myrtle Mix 0.90 0.10 TN TP
67 P_M_48_36_R1 0.1236 0.8750 0.0015 Sumac Mix 0.90 0.10 TN TP
68 P_M_48_36_R2 0.1696 0.8298 0.0006 Sumac Mix 0.90 0.10 TN TP
69 P_M_48_36_R3 0.1705 0.8294 0.0001 Sumac Mix 0.90 0.10 TN TP
72 P_O_47_01_R3 0.0390 0.9600 0.0010 Oregano Oregano 1.00 0.00 TP TN
75 P_O_47_02_R3 0.0107 0.9893 0.0000 Oregano Oregano 1.00 0.00 TP TN
78 P_O_47_03_R3 0.0147 0.9853 0.0000 Oregano Oregano 1.00 0.00 TP TN
81 P_O_47_04_R3 0.0731 0.9269 0.0000 Oregano Oregano 1.00 0.00 TP TN
84 P_O_47_05_R3 0.0132 0.9868 0.0000 Oregano Oregano 1.00 0.00 TP TN
87 P_O_47_06_R3 0.0608 0.9392 0.0000 Oregano Oregano 1.00 0.00 TP TN
90 P_O_47_07_R3 0.0145 0.9855 0.0000 Oregano Oregano 1.00 0.00 TP TN
93 P_O_47_08_R3 0.0700 0.9300 0.0000 Oregano Oregano 1.00 0.00 TP TN
96 P_O_47_09_R3 0.0143 0.9848 0.0010 Oregano Oregano 1.00 0.00 TP TN
99 P_O_47_10_R3 0.0502 0.9498 0.0000 Oregano Oregano 1.00 0.00 TP TN
102 P_O_47_11_R3 0.0765 0.9235 0.0000 Oregano Oregano 1.00 0.00 TP TN
105 P_O_47_12_R3 0.0466 0.9534 0.0000 Oregano Oregano 1.00 0.00 TP TN
108 P_O_47_13_R3 0.0136 0.9864 0.0000 Oregano Oregano 1.00 0.00 TP TN
111 P_O_47_14_R3 0.0037 0.9963 0.0000 Oregano Oregano 1.00 0.00 TP TN
117 P_O_47_16_R3 0.0297 0.9703 0.0000 Oregano Oregano 1.00 0.00 TP TN
120 P_O_47_17_R3 0.0632 0.9367 0.0001 Oregano Oregano 1.00 0.00 TP TN
123 P_O_47_18_R3 0.0197 0.9803 0.0000 Oregano Oregano 1.00 0.00 TP TN
126 P_O_47_19_R3 0.0822 0.9178 0.0000 Oregano Oregano 1.00 0.00 TP TN
129 P_O_47_20_R3 0.0114 0.9886 0.0000 Oregano Oregano 1.00 0.00 TP TN
132 P_O_47_21_R3 0.0397 0.9603 0.0000 Oregano Oregano 1.00 0.00 TP TN
135 P_O_47_22_R3 0.0251 0.9748 0.0001 Oregano Oregano 1.00 0.00 TP TN
138 P_O_47_23_R3 0.0019 0.9978 0.0003 Oregano Oregano 1.00 0.00 TP TN
141 P_O_47_24_R3 0.0359 0.9638 0.0003 Oregano Oregano 1.00 0.00 TP TN


p = ggplot(dftest, aes(Predicted_Oregano, fill = Type))+
  geom_histogram(bins = 40, position = position_dodge2())+
  scale_fill_manual(values = c('deepskyblue3', 'forestgreen'))+
  theme_bw()+
  xlab('Predicted % oregano')+
  ylab('')+
  scale_y_continuous(breaks=seq(0,8,1))+
  scale_x_continuous(breaks = seq(0.5,1,0.05))+
  geom_vline(xintercept = soglia0, linetype="dashed", color = "red", size=0.5)+
  annotate(geom="text", y=8.1, x= soglia0-0.017, label="Min", color="red")
ggplotly(p)



median = dftest %>%
  group_by(Type) %>%
  summarise(median=median(Predicted_Oregano))

p = ggplot(dftest, aes(Predicted_Oregano, fill = Type))+
  geom_density(alpha = 0.4, aes(y=..density..))+
  geom_vline(data = median, aes(xintercept = median, color = Type), linetype = 'dashed')+
  scale_fill_manual(values = c('deepskyblue3', 'forestgreen'))+
  scale_color_manual(values = c('deepskyblue4', 'darkgreen'))+
  theme_bw()+
  xlab('Predicted % oregano')+
  ylab('Density')+
  scale_x_continuous(breaks = seq(0.5,1,0.05))+
  ggtitle('Density plot')
 
ggplotly(p)


p = ggplot(dftest, aes(x = Contaminant,y= Predicted_Oregano, color = Type))+geom_boxplot()+
  scale_color_manual(values = c('deepskyblue4', 'forestgreen'))+
  theme_bw()+
  xlab('')+
  ylab('Predicted % oregano')+
  geom_hline(yintercept = soglia0, linetype="dashed", color = "red", size=0.5)+
  #geom_hline(yintercept = soglia10, linetype="dashed", color = "orange", size=0.5)+
  annotate(geom="text", x=0.55, y= soglia0+0.01, label="Min", color="red")
  #annotate(geom="text", x=0.7, y= soglia10+0.01, label="10%", color="orange")
  
ggplotly(p)


La percentuale di NA per tale campione è invece in linea con il resto del dataset.



p = ggplot(dftest, aes(x = Actual_oregano,y = Predicted_Oregano, color = Contaminant, class = Type, text= ID))+
  geom_point()+
  theme_bw()+
  xlab('Actual % oregano')+
  ylab('Predicted % oregano')+
  geom_hline(yintercept = soglia0, linetype="dashed", color = "red", size=0.5)+
  #geom_hline(yintercept = soglia10, linetype="dashed", color = "orange", size=0.5)+
  annotate(geom="text", x=0.4, y= soglia0+0.01, label="Min", color="red")
  #annotate(geom="text", x=0.42, y= soglia10+0.01, label="10%", color="orange")
  
ggplotly(p)


Sulla base dei dati appena trovati è possibile ricavare i valori di sensibilità e specificità per entrambe le classi.



otp = dftest[(dftest$Oregano_test == 'TP'),] %>% nrow()
otn = dftest[(dftest$Oregano_test == 'TN'),] %>% nrow()
ofp = dftest[(dftest$Oregano_test == 'FP'),] %>% nrow()
ofn = dftest[(dftest$Oregano_test == 'FN'),] %>% nrow()
mtp = dftest[(dftest$misc_test == 'TP'),] %>% nrow()
mtn = dftest[(dftest$misc_test == 'TN'),] %>% nrow()
mfp = dftest[(dftest$misc_test == 'FP'),] %>% nrow()
mfn = dftest[(dftest$misc_test == 'FN'),] %>% nrow()

oacc = ((otp+otn)/nrow(dftest))
macc = ((mtp+mtn)/nrow(dftest))

Oregano = c(otp,otn, ofp, ofn)
Mix = c(mtp, mtn, mfp, mfn)

dfconf = data.frame(Oregano, Mix) %>% t() %>% data.frame()
colnames(dfconf) = c('TP', 'TN', 'FP', 'FN')

Oregano_reale = c(otp, ofn)
Miscela_reale = c(ofp, otn)


dfconf2 = data.frame(Oregano_reale, Miscela_reale)
rownames(dfconf2) = c('Oregano predetto', 'Miscela predetta')
colnames(dfconf2) = c('Oregano reale', 'Miscela reale')


dfconf2 %>% kable(row.names = T) %>% kable_paper( 'hover', fixed_thead = T, full_width = FALSE)
Oregano reale Miscela reale
Oregano predetto 23 6
Miscela predetta 0 39



kable(dfconf, row.names = T) %>% kable_styling( 'hover', fixed_thead = T, full_width = FALSE)
TP TN FP FN
Oregano 23 39 6 0
Mix 39 23 0 6



senso = otp/(otp+ofn)
speco = otn/(otn+ofp)
sensm = mtp/(mtp+mfn)
specm = mtn/(mtn+mfp)
eff = sqrt(senso*speco)

Oregano = c(senso, speco, eff, oacc)
Mix = c(sensm, specm, eff, macc) 

dfconf2 = data.frame(Oregano, Mix)
dfconf2 = dfconf2 %>% t() %>% data.frame()
colnames(dfconf2) = c('SENS', 'SPEC', 'EFF', 'ACC')
dfconf2 = round(dfconf2, digits = 3)

kable(dfconf2, row.names = T) %>% kable_styling( 'hover', fixed_thead = T, full_width = FALSE)
SENS SPEC EFF ACC
Oregano 1.000 0.867 0.931 0.912
Mix 0.867 1.000 0.931 0.912



prob_prev = c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6)
soglie = seq(0.8, 1, 0.01)
sensvec = c()
specvec = c()
probvec = c()
effvec = c()
accvec = c()

dftest1 = dftest

for (soglia in soglie) {
  
  for (prob in prob_prev) {
  
  dftest1$Oregano_test = with(dftest, ifelse(Type == 'Oregano' & Oregano > soglia, 'TP',ifelse(Type == 'Mix' & Predicted_Oregano > soglia, 'FP', ifelse(Type == 'Oregano' & Predicted_Oregano <   soglia, 'FN','TN'))))
  
  otp = dftest1[(dftest1$Oregano_test == 'TP'),] %>% nrow()
  otn = dftest1[(dftest1$Oregano_test == 'TN'),] %>% nrow()
  ofp = dftest1[(dftest1$Oregano_test == 'FP'),] %>% nrow()
  ofn = dftest1[(dftest1$Oregano_test == 'FN'),] %>% nrow() 
  
  senso = (otp/(otp+ofn))
  speco = (otn/(otn+ofp))
  oacc = ((otp+otn)/nrow(dftest))

  probvec = append(probvec, (senso*(1-prob))/(senso*(1-prob)+((1-speco)*(prob))))
  
  }
   
  sensvec = append(sensvec, (otp/(otp+ofn)))
  specvec = append(specvec, (otn/(otn+ofp)))
  effvec = append(effvec, sqrt(senso*speco))
  accvec = append(accvec, (otp+otn)/nrow(dftest))
  
}

dfsensspec = data.frame( 'Soglia' = soglie, 'SENS' = sensvec, 'SPEC' = specvec)
dfsensspec = pivot_longer(dfsensspec, cols = 2:3, names_to = 'SENS_SPEC', values_to = 'Valore')


P = ggplot(dfsensspec, aes(Soglia, Valore, color = SENS_SPEC))+ geom_line(size = 0.2)+ geom_point(size =1)+
  theme_bw()+scale_x_continuous(breaks = seq(0.8,1,0.01))+
  scale_y_continuous(breaks = seq(0,1,0.1))+
  ggtitle('Variazione SENS/SPEC')

ggplotly(P)


dfeff = data.frame('Soglia' = soglie, 'EFF' = effvec)


p = ggplot(dfeff, aes(Soglia, EFF))+ geom_line(size = 0.1)+ geom_point(size =1, color = 'blue3')+
  theme_bw()+scale_x_continuous(breaks = seq(0.8,1,0.01))+
  scale_y_continuous(breaks = seq(0,1,0.1))+
  ggtitle('Variazione efficienza')
ggplotly(p)


dfacc = data.frame('Soglia' = soglie, 'ACC' = accvec)


p = ggplot(dfacc, aes(Soglia, ACC))+ geom_line(size = 0.1)+ geom_point(size =1, color = 'green4')+
  theme_bw()+scale_x_continuous(breaks = seq(0.8,1,0.01))+
  scale_y_continuous(breaks = seq(0,1,0.1))+
  ggtitle('Variazione accuratezza')
ggplotly(p)



(Per vedere come cambiano sensibilità e specificità con un diverso valore soglia CLICCA QUI)



otp = dftest[(dftest$Oregano_test == 'TP'),] %>% nrow()
otn = dftest[(dftest$Oregano_test == 'TN'),] %>% nrow()
ofp = dftest[(dftest$Oregano_test == 'FP'),] %>% nrow()
ofn = dftest[(dftest$Oregano_test == 'FN'),] %>% nrow()
mtp = dftest[(dftest$misc_test == 'TP'),] %>% nrow()
mtn = dftest[(dftest$misc_test == 'TN'),] %>% nrow()
mfp = dftest[(dftest$misc_test == 'FP'),] %>% nrow()
mfn = dftest[(dftest$misc_test == 'FN'),] %>% nrow()

senso = (otp/(otp+ofn))
speco = (otn/(otn+ofp))

prob_prev05 = 0.52
prob = (senso*prob_prev05)/(senso*prob_prev05+((1-speco)*(1-prob_prev05)))*100

Alla luce di questi risultati, se si ipotizza un’incidenza sul mercato di campioni adulterati di circa il 52% è possibile stimare la probabilità che un campione incognito che viene classificato come Oregano sia effettivamente tale.

In altre parole, indicando con \(P(A)\) l’incidenza ipotizzata e con \(P(O)\) il suo complementare, si avrà che


\[ P(O|+) = \frac{P(+|O)P(O)}{P(+)} = \frac{P(+|O)P(O)}{P(+|O)P(O)+P(+|A)P(A)} \]



\(P(+|O)\) è ovviamente la sensibilità del modello è \(P(+|A)\) la specificità complementare.

In questo ipotetico scenario quindi, ogni qual volta viene rilevato un campione positivo, la probabilità che sia effettivamente un campione di Oregano puro sarà dell’89.04% !



prob_prevlong = rep(prob_prev, length(soglie))
soglielong = rep(soglie, each = length(prob_prev))


dfprob = data.frame('Soglia' = soglielong, 'perc_adulterazione' = factor(prob_prevlong), 'Prob_Oregano' = probvec)
dfprob = dfprob[dfprob$Soglia != 1,]

p= ggplot(dfprob, aes(Soglia, Prob_Oregano, color = perc_adulterazione))+ geom_line(size = 0.2)+ geom_point(size =1)+
  theme_bw()+scale_x_continuous(breaks = seq(0.8,1,0.01))+
  scale_y_continuous(breaks = seq(0,1,0.1))+
  ggtitle('Variazione probabilità')
ggplotly(p)


