#packages
library(flextable) #tables
library(tidyverse) #enrinonment

library(mice) # imputation

library(janitor)

library(psych) #psychometrics (classical theory)
library(lavaan) #SEM
library(semPlot) #plot cfa models
library(semTools) #reliability sem models

library(DataExplorer) #missing
library(summarytools) #show data info
library(gridExtra) #add features for grid

library(emmeans) #anova and posthoc
library(effsize) #cohen D

library(ggpubr) #ridge plot

library(cutpointr) #ROC

#round
options(scipen=1, digits=2)

1 Cautionary notes

This markdown relates to two different manuscripts:

-Psychometric properties and clinical utility of the executive function inventory for children and adolescents: a large multistage populational study including children with ADHD

-The clinical utility of psychometric tools: a real-data approach from a study including children with ADHD

The first is a published manuscript (DOI: https://doi.org/10.1080/21622965.2020.1726353) and the last is a book chapter.

This markdown, the data presented here, and the manuscripts are also used in my Psychometrics course, at Pontifical Catholic University of Rio de Janeiro (PUC-Rio), which partially explains the way I built this markdown document.
The first part of the codes (Data handling) explains how to get the data and adjust some issues. Originally, the data was a messy Excel file and several steps were needed to fix everything. As everything was done, you do not need to run these chunks again, but just load the Rdata file and skip the related chunks. Thus, Data handling chunks, both from Parents and Teachers, are only useful to be aware of what was done. The second part of this markdown is Psychometrics, from parents and teachers, all tables of the first manuscript can be replicated. Be aware that chunks with the eval = TRUE options are useful, whereas the other chunks don’t need to run. The chunk ADHD group contains preliminary codes to make possible compute one solid dataset include both groups (typical and ADHD) and the same variables used with them. I’m very much aware that these codes could be placed at the beginning, which makes sense. However, these steps were intentionally developed to make possible study the manuscript with different perspectives. In other words, when someone wants to dig into psychometric models, the firsts sets of chunks are useful. When one wants to dig into the most specific part of clinical utility, the second part of this markdown is really useful. The chunk Clinical utility uses the dataset composed of the two groups (Typical and ADHD) and reproduce the results reported in the manuscript, as well as provides a partial replication of figures of the manuscript. Then, after all that said, ROC chunk relates to the book chapter, in which I and other co-authors have introduced concepts such as the sensitivity and specificity of a psychological test. The last part of this document is Future work, which mix temporary stuffs that I did during the process and what would be really suitable for future works.

Please feel free to ask further information and/or to reach me out via luisfac@puc-rio.br

Thanks, Luis

2 Get data

Please check at https://osf.io/kx2ym/files/. I recommend importing and loading the R files (Base R - EFICA (manuscript and book chapter) instead of raw files. However, feel free to get the raw data if you want to replicate these analyses.

#get file
load(file="C:/Users/luisf/Dropbox/Puc-Rio/Consultoria - Arruda/Base R - EFICA (manuscript and book chapter).RData")

2.1 Data handling - Parents

ds <- backup
ds <- clean_names(ds)

2.2 Add demographic variables to ds

#add sex
ds <- ds %>% 
  mutate(sex = if_else(sexo_certo == "1", "Female", "Male"))

#add race
ds <- ds %>% 
  mutate(race = if_else(cor == "1","White","non-white"))

#rename age
ds <- ds %>% 
  rename(age = idade)

#add guardian
ds <- ds %>% 
 mutate(guardian = recode(quem_preenche_certo, 
                          "1" = "mother",
                          "2" = "father",
                          "3" = "other"))
# add schooling
ds <- ds %>% 
  rename(schooling = grau_instrucao)

# add socioeconomic
ds <- ds %>% 
  rename(socioeconomic = classe_economica)

#change values (tipico for typical)
ds <- ds %>% mutate(group = "typical")

#remove useless variables
ds <- ds %>% select(-c(pais_tct_metacog1,pais_tct_metacog2, pais_tct_regulacao, profs_tct_metacog, profs_tct_regulacao, quem_preenche))

2.3 Copy variables - Parents

ds <- ds %>% 
  mutate_at(vars(ife_1:ife_65), list(cat = ~paste0(.))) %>%  
  rename_at(vars(ends_with( "_cat")), list(~paste0("y", 1:65)))

2.4 Transform into numeric - Parents

ds <- ds %>%
  mutate_at(vars(y1:y65), ~as.numeric(.))

2.5 Round values if necessary - Parents

ds <- ds %>% 
  mutate_at(vars(y1:y65), ~round(.,0))

2.6 Check distribution (outliers) - Parents

ds %>% select(y1:y65) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(name, value)) + geom_boxplot()

2.7 Replace coding errors with missing code - Parents

ds <- ds %>% 
  mutate_at(vars(y1:y65), ~replace(., . > 2 | . < 0, NA))

2.8 Missing cases - Parents

ds %>% select(y1:y65) %>% summarise(sum(is.na(.)))
ds %>% select(y1:y65) %>% plot_missing()

#get first temp data (pais)
tempData <- ds %>% select(y1:y65) %>% mice(.,m=5,maxit=50,meth='pmm',seed=123)
ife_65_c <- complete(tempData,1)

2.9 Replace from the original dataset

2.9.1 Function to update values

#https://stackoverflow.com/questions/44930149/replace-a-subset-of-a-data-frame-with-dplyr-join-operations
replace_subset <- function(df, df_subset, id_col_names = c()) {
  
  # work out which of the columns contain "new" data
  new_data_col_names <- colnames(df_subset)[which(!colnames(df_subset) %in% id_col_names)]
  
  # complete the df_subset with the extra columns from df
  df_sub_to_join <- df_subset %>%
    left_join(select(df, -new_data_col_names), by = c(id_col_names))
  
  # join and bind rows
  df_out <- df %>%
    anti_join(df_sub_to_join, by = c(id_col_names)) %>%
    bind_rows(df_sub_to_join)
  
  return(df_out)
}

# create an index for Dataset
ds <- ds %>%  mutate(id = row_number())
#create an index for complete dataset (items-only)
ife_65_c <- ife_65_c %>%  mutate(id = row_number())

#replace 
ds <- replace_subset(df = ds , df_subset = ife_65_c, id_col_names = c("id"))

2.10 Check if the imputation worked

ds %>% select(y1:y65) %>% summarise(sum(is.na(.)))
ds %>% select(y1:y65) %>% plot_missing()

2.11 Remove unecessary data

#rm(tempData, ife_65_c)

3 Psychometrics - Parents

3.1 Factorial models - Parents

First, get a random sample subsetting the original dataset (into a smaller but representative data)

#random subsample -- Parents
set.seed(123)
ife_65_650 <- ife_65_c %>% sample_n(., 650)

Then, I’ll create and run the model

#create the model
mod_cfa_pais <- 'metacog1   =~ y12 + y26 + y8 + y1 + y17 + y20 + y10 + y47 + y48 + y60 + y55 + y6 + y7 + y57
regulacao   =~ y21 + y23 + y19 + y52 + y28 + y3 + y4 + y50 + y45 + y24 + y25 + y22 + y18 + y54 + y59 + y14 + y42 + y16 + y11 + y53 + y58 + y32
metacog2 =~ y30 + y41 + y31 + y29 + y35 + y38 + y37 + y2 + y40 + y13 + y46 + y43 + y62 + y44 + y9 + y27 + y33 + y39 + y65 + y34 + y36 + y56 + y5'

#run the model
cfa_pais <- cfa(mod_cfa_pais, #model name
                data=ife_65_650, #dataset
                estimator = 'WLSMV', #method for extracting factors
                orthogonal=FALSE,
                ordered=names(ife_65_650))
#reliability
rel_pais <- reliability(cfa_pais)

And get the results avaiable in Table 2. Items analysis, descriptive, and CFA results of EFICA-P (parents).

## Report the model
parameterEstimates(cfa_pais, standardized=TRUE) %>% 
  filter(op == "=~") %>% 
  mutate(stars = ifelse(pvalue < .001, "***", 
                        ifelse(pvalue < .01, "**", 
                               ifelse(pvalue < .05, "*", "")))) %>%
  select('Latent Factor'=lhs, 
         Indicator=rhs, 
         B=est, 
         SE=se, Z=z, 
         Beta=std.all, 
         sig=stars) %>% 
  arrange(`Latent Factor`,desc(Beta)) %>% 
  flextable() %>% 
  colformat_num(., digit = 3)

Latent Factor	Indicator	B	SE	Z	Beta	sig
metacog1	y8	1.082	0.062	17.322	0.715	***
metacog1	y20	1.058	0.071	14.913	0.699	***
metacog1	y17	1.011	0.062	16.321	0.667	***
metacog1	y12	1.000	0.000		0.660
metacog1	y26	0.994	0.048	20.628	0.656	***
metacog1	y48	0.993	0.066	14.950	0.655	***
metacog1	y7	0.964	0.071	13.652	0.637	***
metacog1	y1	0.957	0.065	14.832	0.632	***
metacog1	y47	0.935	0.061	15.245	0.617	***
metacog1	y10	0.880	0.065	13.489	0.581	***
metacog1	y55	0.868	0.063	13.860	0.573	***
metacog1	y60	0.860	0.064	13.418	0.568	***
metacog1	y57	0.802	0.069	11.685	0.529	***
metacog1	y6	0.752	0.080	9.349	0.497	***
metacog2	y46	1.988	0.262	7.584	0.734	***
metacog2	y27	1.928	0.251	7.671	0.712	***
metacog2	y37	1.838	0.240	7.668	0.679	***
metacog2	y44	1.829	0.243	7.529	0.675	***
metacog2	y5	1.715	0.219	7.831	0.633	***
metacog2	y62	1.662	0.225	7.385	0.613	***
metacog2	y31	1.645	0.216	7.631	0.607	***
metacog2	y2	1.641	0.226	7.256	0.606	***
metacog2	y35	1.631	0.213	7.641	0.602	***
metacog2	y13	1.569	0.226	6.939	0.579	***
metacog2	y38	1.525	0.215	7.092	0.563	***
metacog2	y34	1.481	0.200	7.418	0.547	***
metacog2	y43	1.455	0.202	7.207	0.537	***
metacog2	y40	1.399	0.199	7.038	0.517	***
metacog2	y36	1.355	0.184	7.374	0.500	***
metacog2	y9	1.352	0.194	6.963	0.499	***
metacog2	y56	1.241	0.189	6.565	0.458	***
metacog2	y41	1.225	0.182	6.727	0.452	***
metacog2	y65	1.183	0.181	6.530	0.437	***
metacog2	y39	1.125	0.175	6.414	0.415	***
metacog2	y29	1.017	0.121	8.433	0.376	***
metacog2	y30	1.000	0.000		0.369
metacog2	y33	0.959	0.153	6.270	0.354	***
regulacao	y16	1.114	0.049	22.936	0.785	***
regulacao	y4	1.016	0.048	21.356	0.716	***
regulacao	y21	1.000	0.000		0.705
regulacao	y45	0.997	0.047	21.320	0.703	***
regulacao	y19	0.989	0.043	23.012	0.697	***
regulacao	y23	0.978	0.044	22.380	0.690	***
regulacao	y3	0.959	0.047	20.243	0.676	***
regulacao	y28	0.934	0.049	19.191	0.658	***
regulacao	y25	0.929	0.051	18.108	0.655	***
regulacao	y59	0.928	0.047	19.738	0.654	***
regulacao	y52	0.916	0.043	21.229	0.646	***
regulacao	y14	0.911	0.049	18.743	0.642	***
regulacao	y24	0.878	0.046	19.047	0.619	***
regulacao	y18	0.873	0.051	16.998	0.615	***
regulacao	y50	0.860	0.048	17.879	0.606	***
regulacao	y53	0.855	0.055	15.677	0.603	***
regulacao	y42	0.806	0.049	16.379	0.568	***
regulacao	y22	0.802	0.051	15.802	0.565	***
regulacao	y32	0.773	0.055	14.036	0.545	***
regulacao	y11	0.766	0.058	13.157	0.540	***
regulacao	y54	0.753	0.053	14.254	0.530	***
regulacao	y58	0.630	0.056	11.221	0.444	***

fitMeasures(cfa_pais, c("aic","chisq", "df", "pvalue", "cfi", "tli", "rmsea"))

##      aic    chisq       df   pvalue      cfi      tli    rmsea 
##       NA 4607.853 1649.000    0.000    0.965    0.963    0.053

fitMeasures(cfa_pais, c("chisq.scaled", "df.scaled", "pvalue.scaled", "cfi.scaled","tli.scaled","rmsea.scaled","srmr"))

##  chisq.scaled     df.scaled pvalue.scaled    cfi.scaled    tli.scaled 
##      3716.879      1649.000         0.000         0.902         0.898 
##  rmsea.scaled          srmr 
##         0.044         0.069

Now I’ll plot this solution.

semPaths(cfa_pais, title = FALSE, 
         label.cex = 2, #font - variables
         sizeLat = 7,  #font latent
         sizeMan = 4, #font - variables
         edge.label.cex = 1.2, #lambda font
         minimum = 0.1, 
         sizeInt = 0.8, 
         mar = c(1, 1, 1, 1), residuals = FALSE, 
         intercepts = FALSE, thresholds = FALSE, layout = "circle", 
         "std", 
         cut = 0.3,
         "hide", #hide latent estimates
         posCol = c("black", "dimgray"), #color of paths
         color = list(lat = rgb(173, 216, 230, maxColorValue = 255), 
                      man = rgb(240, 248, 255, maxColorValue = 255)))

4 Normative data

First, I’ll include an age interval index.

ds <- ds %>% 
  mutate(age_group = cut(idade, breaks = c(5,6,8,10,12,14, 16, 18, +Inf), include.lowest = TRUE)) %>% 
  mutate_if(is.factor,fct_explicit_na)

then, I’ll add a summative scores to the dataset

#parents
ds <- ds %>% 
  mutate(parents_metacog1 = rowSums(select(., y12 , y26 , y8 , y1 , y17 , y20 , y10 , y47 , y48 , y60 , y55 , y6 , y7 , y57), na.rm = T)) %>% 
  mutate(parents_hotindex = rowSums(select(., y21 ,  y23 ,  y19 ,  y52 ,  y28 ,  y3 ,  y4 ,  y50 ,  y45 ,  y24 ,  y25 ,  y22 ,  y18 ,  y54 ,  y59 ,  y14 ,  y42 ,  y16 ,  y11 ,  y53 ,  y58 ,  y32), na.rm = T)) %>% 
  mutate(parents_metacog2 = rowSums(select(.,  y30 ,  y41 ,  y31 ,  y29 ,  y35 ,  y38 ,  y37 ,  y2 ,  y40 ,  y13 ,  y46 ,  y43 ,  y62 ,  y44 ,  y9 ,  y27 ,  y33 ,  y39 ,  y65 ,  y34 ,  y36 ,  y56 ,  y5), na.rm = T)) %>% 
  mutate(parents_total = parents_metacog1+parents_hotindex+parents_metacog2)

And now I’ll report the normative data for parents scale. To make clear if I need specific normative tables, I’ll compare the results of boys and girls.

lm(parents_total ~  factor(sex) * age, ds) %>%
  apaTables::apa.aov.table(.)

Table 4. Normative table for EFICA-p and EFICA-t results.

# By age
ds %>%
  group_by(age_group) %>% #grouping
  summarise_at(vars(parents_total),
               list(mean = mean, 
                    sd = sd,
                    p90 = ~quantile(., .9),
                    p95 = ~quantile(., .95),
                    sample = ~sum(!is.na(.)))) %>%
  flextable()

age_group	mean	sd	p90	p95	sample
[5,6]	44	19	69	77	1017
(6,8]	45	20	71	79	983
(8,10]	43	20	70	76	807
(10,12]	43	20	71	77	273
(12,14]	42	22	72	81	88
(14,16]	40	19	67	72	72
(16,18]	31	18	57	60	44

5 Data handling - Teachers

5.1 Copy variables - Teachers

ds <- ds %>% 
  mutate_at(vars(ife_1_1:ife_52_1), list(prof = ~paste0(.))) %>%  
  rename_at(vars(ends_with( "_prof")), list(~paste0("i", 1:52)))

5.2 Transform into numeric - teachers

ds <- ds %>%
  mutate_at(vars(i1:i52), ~as.numeric(.))

5.3 Round values if necessary - teachers

ds <- ds %>% 
  mutate_at(vars(i1:i52), ~round(.,0))

5.4 Check distribution (outliers) - teachers

ds %>% select(i1:i52) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(name, value)) + geom_boxplot()

5.5 Replace coding errors with missing code - teachers

ds <- ds %>% 
  mutate_at(vars(i1:i52), ~replace(., . > 2 | . < 0, NA))

5.6 Missing cases - Teachers

ds %>% select(i1:i52) %>% summarise(sum(is.na(.)))
ds %>% select(i1:i52) %>% plot_missing()

#get first temp data (pais)
tempData <- ds %>% select(i1:i52) %>% mice(.,m=5,maxit=50,meth='pmm',seed=123)
ife_52_c <- complete(tempData,1)

5.7 Replace from the original dataset - Teachers

# create an index for Dataset
#ds <- ds %>%  mutate(id = row_number())
#create an index for complete dataset (items-only)
ife_52_c <- ife_52_c %>%  mutate(id = row_number())

#replace 
ds <- replace_subset(df = ds , df_subset = ife_52_c, id_col_names = c("id"))

5.8 Check if the imputation worked

ds %>% select(i1:i52) %>% summarise(sum(is.na(.)))
ds %>% select(i1:i52) %>% plot_missing()

6 Psychometrics - Teachers

6.1 Factorial models - Teachers

First, get a random sample subsetting the original dataset (into a smaller but representative data)

#random subsample -- teachers
set.seed(2)
ife_52_520 <- ife_52_c %>% sample_n(., 520)

Then, I’ll create and run the model

#create the model
mod_cfa_professores <- 'MetaCog =~ i23 + i14 + i17 + i9 + i19 + i30 + i38 + i49 + i43 + i21 + i25 + i8 + i52 + i36 + i32 + i37 + i48 + i13 + i46 + i6 + i7 + i10 + i15 + i1 + i29 + i4 + i18 + i12 + i33 + i39 + i47 + i3
                        Regulacao =~ i26 + i45 + i42 + i28 + i2 + i27 + i34 + i16 + i50 + i5 + i24 + i22 + i20 + i41 + i35 + i44 + i40 + i31 + i51 + i11'
#run the model

cfa_prof <- cfa(mod_cfa_professores, #model name
                  data=ife_52_520, #dataset
                  estimator = 'WLSMV', #method for extracting factors
                  orthogonal=FALSE,
                  ordered=names(ife_52_520))

And get the results, as presented in Table 3. Item analysis and CFA results – EFICA-T (Teachers).

## Report the model
parameterEstimates(cfa_prof, standardized=TRUE) %>% 
  filter(op == "=~") %>% 
  mutate(stars = ifelse(pvalue < .001, "***", 
                        ifelse(pvalue < .01, "**", 
                               ifelse(pvalue < .05, "*", "")))) %>%
  select('Latent Factor'=lhs, 
         Indicator=rhs, 
         B=est, 
         SE=se, Z=z, 
         Beta=std.all, 
         sig=stars) %>% 
  arrange(`Latent Factor`,desc(Beta)) %>% 
  flextable() %>% 
  colformat_num(., digit = 3)

Latent Factor	Indicator	B	SE	Z	Beta	sig
MetaCog	i8	1.020	0.013	78.629	0.922	***
MetaCog	i25	1.005	0.013	78.514	0.909	***
MetaCog	i23	1.000	0.000		0.904
MetaCog	i19	0.990	0.015	67.904	0.895	***
MetaCog	i48	0.986	0.016	62.499	0.892	***
MetaCog	i14	0.973	0.014	70.083	0.880	***
MetaCog	i21	0.972	0.015	65.262	0.879	***
MetaCog	i30	0.969	0.015	63.116	0.876	***
MetaCog	i17	0.968	0.015	64.253	0.875	***
MetaCog	i49	0.966	0.016	60.570	0.874	***
MetaCog	i29	0.963	0.022	44.651	0.871	***
MetaCog	i36	0.957	0.016	58.206	0.866	***
MetaCog	i12	0.943	0.018	51.511	0.853	***
MetaCog	i52	0.933	0.022	41.675	0.844	***
MetaCog	i9	0.931	0.019	50.117	0.843	***
MetaCog	i13	0.920	0.020	45.391	0.832	***
MetaCog	i1	0.919	0.020	44.840	0.831	***
MetaCog	i32	0.918	0.022	42.234	0.830	***
MetaCog	i33	0.911	0.021	43.136	0.824	***
MetaCog	i3	0.911	0.030	30.005	0.824	***
MetaCog	i39	0.905	0.024	37.075	0.818	***
MetaCog	i43	0.905	0.020	45.196	0.818	***
MetaCog	i37	0.895	0.024	37.411	0.810	***
MetaCog	i46	0.887	0.020	43.452	0.802	***
MetaCog	i18	0.875	0.027	32.896	0.791	***
MetaCog	i38	0.872	0.023	37.454	0.789	***
MetaCog	i4	0.851	0.024	35.552	0.770	***
MetaCog	i6	0.844	0.024	34.550	0.763	***
MetaCog	i7	0.827	0.026	32.004	0.748	***
MetaCog	i47	0.827	0.031	27.095	0.748	***
MetaCog	i10	0.815	0.027	30.518	0.737	***
MetaCog	i15	0.757	0.032	23.635	0.685	***
Regulacao	i44	1.104	0.036	30.870	0.920	***
Regulacao	i51	1.102	0.039	27.938	0.918	***
Regulacao	i28	1.091	0.032	34.075	0.909	***
Regulacao	i40	1.080	0.037	29.150	0.899	***
Regulacao	i35	1.050	0.033	31.492	0.875	***
Regulacao	i41	1.047	0.036	28.714	0.872	***
Regulacao	i11	1.037	0.039	26.295	0.864	***
Regulacao	i22	1.024	0.035	28.954	0.854	***
Regulacao	i34	1.017	0.036	28.603	0.848	***
Regulacao	i27	1.016	0.034	30.101	0.846	***
Regulacao	i45	1.011	0.028	36.371	0.842	***
Regulacao	i31	1.009	0.038	26.890	0.841	***
Regulacao	i26	1.000	0.000		0.833
Regulacao	i2	0.994	0.037	26.522	0.828	***
Regulacao	i5	0.993	0.039	25.241	0.828	***
Regulacao	i16	0.992	0.038	26.313	0.827	***
Regulacao	i50	0.985	0.039	24.950	0.821	***
Regulacao	i42	0.976	0.038	25.818	0.813	***
Regulacao	i24	0.968	0.037	26.302	0.806	***
Regulacao	i20	0.951	0.036	26.596	0.792	***

fitMeasures(cfa_prof, c("aic","chisq", "df", "pvalue", "cfi", "tli", "rmsea"))

##      aic    chisq       df   pvalue      cfi      tli    rmsea 
##       NA 5151.299 1273.000    0.000    0.991    0.991    0.077

fitMeasures(cfa_prof, c("chisq.scaled", "df.scaled", "pvalue.scaled", "cfi.scaled","tli.scaled","rmsea.scaled","srmr"))

##  chisq.scaled     df.scaled pvalue.scaled    cfi.scaled    tli.scaled 
##      3678.288      1273.000         0.000         0.955         0.953 
##  rmsea.scaled          srmr 
##         0.060         0.074

Now I’ll plot this solution.

semPaths(cfa_prof, title = FALSE, 
         label.cex = 2, #font - variables
         sizeLat = 7,  #font latent
         sizeMan = 4, #font - variables
         edge.label.cex = 1.2, #lambda font
         minimum = 0.1, 
         sizeInt = 0.8, 
         mar = c(1, 1, 1, 1), residuals = FALSE, 
         intercepts = FALSE, thresholds = FALSE, layout = "circle", 
         "std", 
         cut = 0.3,
         "hide", #hide latent estimates
         posCol = c("black", "dimgray"), #color of paths
         color = list(lat = rgb(173, 216, 230, maxColorValue = 255), 
                      man = rgb(240, 248, 255, maxColorValue = 255)))

6.2 Remove unecessary data

rm(tempData, ife_52_c)

7 Normative data - teachers

First, I’ll add a summative score to the dataset

ds <- ds %>% 
  mutate(teachers_metacog = rowSums(select(., i23 , i14 , i17 , i9 , i19 , i30 , i38 , i49 , i43 , i21 , i25 , i8 , i52 , i36 , i32 , i37 , i48 , i13 , i46 , i6 , i7 , i10 , i15 , i1 , i29 , i4 , i18 , i12 , i33 , i39 , i47 , i3), na.rm=T)) %>% 
  mutate(teachers_hotindex = rowSums(select(., i26 , i45 , i42 , i28 , i2 , i27 , i34 , i16 , i50 , i5 , i24 , i22 , i20 , i41 , i35 , i44 , i40 , i31 , i51 , i11), na.rm=T)) %>% 
  mutate(teachers_total = teachers_metacog+teachers_hotindex)

I’ll report the normative data for parents scale, as presented in Table 4. Normative table for EFICA-p and EFICA-t results.

lm(teachers_total ~  factor(sex) * age, ds) %>%
  apaTables::apa.aov.table(.)

## 
## 
## ANOVA results using teachers_total as the dependent variable
##  
## 
##          Predictor         SS   df       MS      F    p partial_eta2
##        (Intercept)   75321.98    1 75321.98 163.49 .000             
##        factor(sex)     205.34    1   205.34   0.45 .504          .00
##                age     203.27    1   203.27   0.44 .507          .00
##  factor(sex) x age    7979.86    1  7979.86  17.32 .000          .01
##              Error 1511175.67 3280   460.72                         
##  CI_90_partial_eta2
##                    
##          [.00, .00]
##          [.00, .00]
##          [.00, .01]
##                    
## 
## Note: Values in square brackets indicate the bounds of the 90% confidence interval for partial eta-squared

# By age
ds %>%
  group_by(age_group, sex) %>% #grouping
  summarise_at(vars(teachers_total),
               list(mean = mean, 
                    sd = sd,
                    p90 = ~quantile(., .9),
                    p95 = ~quantile(., .95),
                    sample = ~sum(!is.na(.)))) %>%
  flextable()

age_group	sex	mean	sd	p90	p95	sample
[5,6]	Female	21	20	49	61	498
[5,6]	Male	27	22	58	70	519
(6,8]	Female	22	20	51	60	492
(6,8]	Male	29	23	61	74	491
(8,10]	Female	20	19	46	55	414
(8,10]	Male	29	23	65	72	393
(10,12]	Female	21	18	42	55	128
(10,12]	Male	34	24	65	77	145
(12,14]	Female	20	23	56	65	46
(12,14]	Male	38	29	78	91	42
(14,16]	Female	20	22	52	53	35
(14,16]	Male	45	26	75	98	37
(16,18]	Female	24	20	52	58	25
(16,18]	Male	29	32	69	101	19

8 ADHD group

For this set of analyses, I’ll create a new dataset that contains both typical (n = 3284) and ADHD children (n = 165). No missing values imputation methods will be used in the adhd dataset. Both parents and teachers will be merged into the chunks.

8.1 Data handling

First I’ll define a ds for this group

ds_adhd <- dados_tdah
ds_adhd <- clean_names(ds_adhd)

Then I’ll add and rename specific variables

#add sex
ds_adhd <- ds_adhd %>% 
  mutate(sex = if_else(sexo == "1", "Female", "Male"))

#add race
ds_adhd <- ds_adhd %>%
  mutate(race = if_else(cor == "1","White","non-white"))

#rename age
ds_adhd <- ds_adhd %>%
  rename(age = idade)

#add guardian
ds_adhd <- ds_adhd %>%
 mutate(guardian = recode(quem_preenche, 
                          "1" = "mother",
                          "2" = "father",
                          "3" = "other"))
# add socioeconomic
ds_adhd <- ds_adhd %>%
  rename(socioeconomic = classe_economica)

#age group
ds_adhd <- ds_adhd %>%
  mutate(age_group = cut(age, breaks = c(5,6,8,10,12,14, 16, 18, +Inf), include.lowest = TRUE))

Then I’ll check if this dataset is reliable.

# Parents
ds_adhd <- ds_adhd %>% 
  # Copy variables
  mutate_at(vars(if_epa_1:if_epa_65), list(cat = ~paste0(.))) %>%  #<-- here labels are different
  rename_at(vars(ends_with( "_cat")), list(~paste0("y", 1:65))) %>% 
  # Transform into numeric
  mutate_at(vars(y1:y65), ~as.numeric(.)) %>%  
  # Round values if necessary - Parents
  mutate_at(vars(y1:y65), ~round(.,0)) 


# Teachers
ds_adhd <- ds_adhd %>% 
  mutate_at(vars(if_epr_1:if_epr_52), list(prof = ~paste0(.))) %>%  #<-- here labels are different again
  rename_at(vars(ends_with( "_prof")), list(~paste0("i", 1:52))) %>% 
  ## Transform into numeric - teachers
  mutate_at(vars(i1:i52), ~as.numeric(.)) %>% 
  ## Round values if necessary - teachers
  mutate_at(vars(i1:i52), ~round(.,0))

I’ll also check distribution (outliers)

ds_adhd %>% select(y1:y65) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(name, value)) + geom_boxplot()

ds_adhd  %>% select(i1:i52) %>% 
  pivot_longer(everything()) %>% 
  ggplot(., aes(name, value)) + geom_boxplot()

and replace coding errors with missing code - Parents

ds_adhd <- ds_adhd %>% 
  mutate_at(vars(y1:y65), ~replace(., . > 2 | . < 0, NA))

ds_adhd <- ds_adhd %>% 
  mutate_at(vars(i1:i52), ~replace(., . > 2 | . < 0, NA))

Now check missings

#parents
ds_adhd %>% select(y1:y65) %>% summarise(sum(is.na(.)))
ds_adhd %>% select(y1:y65) %>% plot_missing()

#teachers
ds_adhd %>% select(i1:i52) %>% summarise(sum(is.na(.)))
ds_adhd %>% select(i1:i52) %>% plot_missing()

Then I’ll add summative scores

ds_adhd <- ds_adhd %>% 
  #parents
  mutate(parents_metacog1 = rowSums(select(., y12 , y26 , y8 , y1 , y17 , y20 , y10 , y47 , y48 , y60 , y55 , y6 , y7 , y57), na.rm = T)) %>% 
  mutate(parents_hotindex = rowSums(select(., y21 ,  y23 ,  y19 ,  y52 ,  y28 ,  y3 ,  y4 ,  y50 ,  y45 ,  y24 ,  y25 ,  y22 ,  y18 ,  y54 ,  y59 ,  y14 ,  y42 ,  y16 ,  y11 ,  y53 ,  y58 ,  y32), na.rm = T)) %>% 
  mutate(parents_metacog2 = rowSums(select(.,  y30 ,  y41 ,  y31 ,  y29 ,  y35 ,  y38 ,  y37 ,  y2 ,  y40 ,  y13 ,  y46 ,  y43 ,  y62 ,  y44 ,  y9 ,  y27 ,  y33 ,  y39 ,  y65 ,  y34 ,  y36 ,  y56 ,  y5), na.rm = T)) %>% 
  mutate(parents_total = parents_metacog1+parents_hotindex+parents_metacog2) %>% 
  #teachers
  mutate(teachers_metacog = rowSums(select(., i23 , i14 , i17 , i9 , i19 , i30 , i38 , i49 , i43 , i21 , i25 , i8 , i52 , i36 , i32 , i37 , i48 , i13 , i46 , i6 , i7 , i10 , i15 , i1 , i29 , i4 , i18 , i12 , i33 , i39 , i47 , i3), na.rm=T)) %>% #<- consider missing
  mutate(teachers_hotindex = rowSums(select(., i26 , i45 , i42 , i28 , i2 , i27 , i34 , i16 , i50 , i5 , i24 , i22 , i20 , i41 , i35 , i44 , i40 , i31 , i51 , i11), na.rm=T)) %>%  #<- consider missing
  mutate(teachers_total = teachers_metacog+teachers_hotindex)

Now I’ll create a new dataset including the common variables among two groups

#check similatiries and differences
base_1 <- ds %>% 
  rename_at(vars(snap_1:snap_18, -contains("int")), ~paste0("sna_ppa_",1:18)) %>% 
  rename_at(vars(snap_1_1:snap_18_1, -contains("int")), ~paste0("sna_ppr_",1:18)) 
#get names
base_1 <- base_1 %>% names %>% data.frame %>% mutate(base = "general") %>% setNames(.,c("var","base"))
base_2 <- ds_adhd %>% names %>% data.frame %>% mutate(base = "adhd")%>% setNames(.,c("var","base"))
#check names
bind_rows(base_1,base_2) %>% #get both datasets
  mutate_at(vars(var),
            ~ tolower(.) %>% stringi::stri_trans_general(.,"Latin-ASCII")) %>% #just fix name issues
  group_by(var) %>% #group by variable 
  mutate(n = n()) %>%  #add counts
  filter(n==1) %>% #filter for exclusive variables
  #arrange(n) %>% 
  print(n=nrow(.)) #display
rm(base_1, base_2)

Now merge the datasets

#merge datasets
base_1 <- ds %>% 
  rename_at(vars(snap_1:snap_18, -contains("int")), ~paste0("sna_ppa_",1:18)) %>% 
  rename_at(vars(snap_1_1:snap_18_1, -contains("int")), ~paste0("sna_ppr_",1:18)) %>% 
  mutate_at(vars(starts_with("sdq"), starts_with("sna_ppr_")), ~as.numeric(.)) 

base_2 <- ds_adhd %>% 
  mutate(group = "adhd") %>% 
  rename(no_x = no)

common_names <- function(base1, base2){
  #create two datasets to storage the colnames
  base1 <- base1 %>% names %>% data.frame %>% mutate(base = "base1") %>% setNames(.,c("var","base"))
  base2 <- base2 %>% names %>% data.frame %>% mutate(base = "base2") %>% setNames(.,c("var","base"))
  
  #merge     
  lista_comum <- bind_rows(base1,base2) %>% #get both datasets
  mutate_at(vars(var),
            ~ tolower(.) %>% stringi::stri_trans_general(.,"Latin-ASCII")) %>% #just fix name issues
  group_by(var) %>% #group by variable 
  mutate(n = n()) %>%  #add counts
  filter(n==2) %>% #filter for varialbes into the two datasets
  arrange(var) %>% 
  #distinct(var, .keep_all = TRUE)  %>% 
  unstack(.,var~base) #unstack
  
  var_comum <<- lista_comum %>% pull(base1)
}
common_names(base1 = base_1, base2 = base_2)

#create a dataset with typical and adhd children with same variables
ds_full <- bind_rows(base_1, base_2) %>% 
  select(all_of(var_comum)) %>% 
  select(group, parents_total, teachers_total, 
         parents_metacog1, parents_hotindex, parents_metacog2, teachers_metacog, teachers_hotindex, everything())

Check if everything matches

ds_full %>% 
  group_by(group) %>% 
  select(parents_total, teachers_total, 
         parents_metacog1, parents_hotindex, parents_metacog2, teachers_metacog, teachers_hotindex) %>% 
  descr()

ds_full <- ds_full %>% 
  mutate(group = factor(group, levels=c("typical", "adhd")))

cor(ds_full %>% filter(group == "typical") %>% select(sdq_1),ds$sdq_1, use = "complete.obs")
cor(ds_full %>% filter(group == "adhd") %>% select(sdq_1),ds_adhd$sdq_1, use = "complete.obs")

Remove unnecessary variables

rm(base_1, base_2)

9 Clinical Utility

lm(parents_total ~ factor(group), data = ds_full) %>% summary()
lm(teachers_total ~ factor(group), data = ds_full) %>% summary()

9.1 plot (known-groups) - Parents

ggplot(ds_full, aes(x = parents_total, group =  group, fill = group)) +
  geom_density() +
  scale_fill_manual(values = c("#0072B250","#D55E0050"), labels = c("Typical", "ADHD")) +
   geom_segment(data = ds_full %>%
                  group_by(group) %>% 
                  summarize(mean = mean(parents_total), sd = sd(parents_total)), aes(x = mean, xend = mean, y = 0, yend = 0.022),
                size = 1, alpha=0.5, lty="21", color = "red") + 
  geom_text(data = ds_full %>%
              group_by(group) %>% 
              summarize(mean = mean(parents_total), sd = sd(parents_total)), 
            aes(x = mean, y = 0.005,label = paste0("μ = ",round(mean, 2))),position = position_stack(vjust = .5)) +
  theme_bw() +
  xlim(-10,130) +
  labs(x = "Parents score", y = "", fill = "Group")

ggplot(ds_full, aes(x = parents_total, y = group, fill = group)) +
  geom_density_ridges(scale = 10, 
                      size = 0.25, 
                      rel_min_height = 0.03,
                    # quantile_lines = T, 
                      alpha = .8, 
                      color = "black") +
  scale_fill_manual(values = c("#0072B250","#D55E0050"), labels = c("Typical", "ADHD")) +
  theme_ridges(center = TRUE) +
  geom_text(
    data = density_lines,
    aes(x = mean, y = group, label = paste("mean = ",round(mean, 2))),
    size = 4, nudge_x = 0.03, nudge_y = 0.5)

9.2 plot (known-groups) - Teachers

ggplot(ds_full, aes(x = teachers_total, group =  group, fill = group)) +
  geom_density() +
  scale_fill_manual(values = c("#0072B250","#D55E0050"), labels = c("Typical", "ADHD")) +
  geom_segment(data = ds_full %>%
                 group_by(group) %>% 
                 summarize(mean = mean(teachers_total), sd = sd(teachers_total)), aes(x = mean, xend = mean, y = 0, yend = 0.025),
               size = 1, alpha=0.5, lty="21", color = "red") + 
  geom_text(data = ds_full %>%
              group_by(group) %>% 
              summarize(mean = mean(teachers_total), sd = sd(teachers_total)), 
            aes(x = mean, y = 0.005,label = paste0("μ = ",round(mean, 2))),position = position_stack(vjust = .5)) +
  theme_bw() +
  xlim(-10,120) +
  labs(x = "Teachers score", y = "", fill = "Group")

10 ROC curve

This chunk refers to the book chapter

This function below I’ll make possible plot and check tables

10.1 Getting dataset done

#get dataset
ds_full %>% 
  select(group, age_group,parents_total, teachers_total) %>% 
  mutate(diag = if_else(group == "typical",0,1)) -> dados_predict

#EFICA P
cutoff_efica_p <- cutpointr(dados_predict, parents_total, diag, 
                method = maximize_metric,  
                metric = youden)

summary(cutoff_efica_p)

## Method: maximize_metric 
## Predictor: parents_total 
## Outcome: diag 
## Direction: >= 
## 
##   AUC    n n_pos n_neg
##  0.88 3449   165  3284
## 
##  optimal_cutpoint youden  acc sensitivity specificity  tp fn  fp   tn
##                55   0.61 0.72         0.9        0.71 149 16 955 2329
## 
## Predictor summary: 
##         Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## overall    0 14      30     44   45      59  81  113 21   0
## 0          0 13      29     43   44      57  77  113 20   0
## 1         34 47      61     73   75      88 104  111 17   0

plot(cutoff_efica_p)

#plot_metric_boot(cutoff_efica_p)

#plot_sensitivity_specificity(cp)
#plot_roc(cutoff_efica_p) + ggtitle("ROC curve for EFICA-P")
#plot_cutpointr(cp, xvar = cutpoints, yvar = sum_sens_spec, conf_lvl = 0.9)

#set.seed(123)
#opt_cut <- cutpointr(dados_predict, parents_total, diag, direction = ">=", 
#                     pos_class = 1,
#                     neg_class = 0, 
#                     method = maximize_metric, metric = youden, boot_runs = 1000)
#plot_metric(opt_cut)
#summary(opt_cut)
#https://cran.r-project.org/web/packages/cutpointr/vignettes/cutpointr.html

#EFICA P
cutoff_efica_t <- cutpointr(dados_predict, teachers_total, diag, 
                method = maximize_metric, 
                metric = youden)
summary(cutoff_efica_t)

## Method: maximize_metric 
## Predictor: teachers_total 
## Outcome: diag 
## Direction: >= 
## 
##   AUC    n n_pos n_neg
##  0.92 3449   165  3284
## 
##  optimal_cutpoint youden  acc sensitivity specificity  tp fn  fp   tn
##                45    0.7 0.81        0.89        0.81 147 18 626 2658
## 
## Predictor summary: 
##         Min. 5% 1st Qu. Median Mean 3rd Qu. 95% Max. SD NAs
## overall    0  0       7     22   27      42  74  104 24   0
## 0          0  0       6     21   25      39  68  104 22   0
## 1          0 35      56     70   68      81  95  104 19   0

plot(cutoff_efica_t)

plot_sensitivity_specificity(cutoff_efica_t)

grid.arrange(plot_roc(cutoff_efica_p) + ggtitle("ROC curve for EFICA-P") + theme_bw(),
             plot_roc(cutoff_efica_t) + ggtitle("ROC curve for EFICA-T") + theme_bw(),
             ncol = 2, nrow = 1)

Now, I’ll present the trade-off (payoff?) between sensitivity and specificity. In near future, it will become a shiny app.

plot_efica_roc_curve <- function(cutoff) {  
  
  #display table
  tabela_roc <- dados_predict %>% 
    mutate(test_result = if_else(parents_total>=cutoff,"(1)Positive","(2)Negative")) %>% 
    mutate(diag = if_else(diag==1,"ADHD","Typical")) %>% 
    group_by(test_result,diag) %>% 
    count() %>% 
    pivot_wider(names_from = diag, values_from = n) %>% 
    ungroup() %>% 
    add_row(test_result = "Sens&Spec", 
            "ADHD" = .$ADHD[1]/(.$ADHD[1]+.$ADHD[2]), 
            "Typical" = .$Typical[2]/(.$Typical[2]+.$Typical[1])) %>% 
    add_row(test_result = "Accuracy", 
            "ADHD" = (.$ADHD[1]+.$Typical[2])/(.$ADHD[1]+.$ADHD[2]+.$Typical[2]+.$Typical[1]), 
            "Typical" = 0) %>% 
    
    print(tabela_roc)
  
  #get means for plotting
  mean_typical <- dados_predict %>% filter(group == "typical") %>% summarise(mean(parents_total)) %>% pull()
  sd_typical <- dados_predict %>% filter(group == "typical") %>% summarise(sd(parents_total)) %>% pull()
  
  mean_adhd <- dados_predict %>% filter(group == "adhd") %>% summarise(mean(parents_total)) %>% pull()
  sd_adhd <- dados_predict %>% filter(group == "adhd") %>% summarise(sd(parents_total)) %>% pull()
  
  #plot
  gridExtra::grid.arrange(
  ggplot(data = data.frame(u = c(-20, 150)),
         aes(x = u)) +
    geom_density(stat = "function", 
                 alpha=0.6,
                 fun = dnorm, 
                 mapping = aes(fill='Typical'), 
                 args = list(mean = mean_typical,
                             sd = sd_typical)) +
    geom_density(stat = "function", 
                 alpha=0.6,
                 fun = dnorm, 
                 mapping = aes(fill='Clinical'), 
                 args = list(mean = mean_adhd,
                             sd = sd_adhd)) +
    geom_vline(xintercept = cutoff, size=2) +
    geom_text(y  = 0.02, x = cutoff-15, label = " - \n (no disease)") +
    geom_text(y  = 0.02, x = cutoff+15, label = " + \n (disease)") +
    geom_text(y  = 0.011, x = 130, 
              label = paste("Cut-off = ",cutoff)) + #cut-off
    geom_text(y  = 0.009, x = 130, 
              label = paste("Sensitivity = ",round(tabela_roc$ADHD[3],2))) + #that's the sensitivity
    geom_text(y  = 0.007, x = 130, label = 
            paste("Specificity =",round(tabela_roc$Typical[3],2))) + #that's the specificity
    geom_text(y  = 0.005, x = 130, label = 
                paste("Accuracy =",round(tabela_roc$ADHD[4],2))) + #that's the accuracy    
    scale_fill_grey() +
    theme_void() +
    theme(legend.position = "none") ,

  #ROC CURVE plot
 
  ggplot(dados_predict, aes(d = diag, m = parents_total)) + 
    geom_vline(xintercept = 1-(tabela_roc$Typical[3]), color = 'grey50') + #1-specificity (False positive rate)
    geom_hline(yintercept = (tabela_roc$ADHD[3]), color = 'grey50') + #Sensitivity (True positive)
    plotROC::geom_roc(cutoffs.at = cutoff,size = 0.5, size.point = 0.5) +
    plotROC::style_roc() + geom_abline(slope = 1, intercept = 0, color = 'grey50')
    #geom_text(y  = 0.1, x = 0.90, label = paste("AUC ",round(plotROC::calc_auc(groc)[3],2))) #in case I want the area under the curve
  )
}

plot_efica_roc_curve(cutoff = 55)

## # A tibble: 4 x 3
##   test_result    ADHD  Typical
##   <chr>         <dbl>    <dbl>
## 1 (1)Positive 149      955    
## 2 (2)Negative  16     2329    
## 3 Sens&Spec     0.903    0.709
## 4 Accuracy      0.718    0

11 Future work

#https://rviews.rstudio.com/2019/03/01/some-r-packages-for-roc-curves/ #http://www.biosoft.hacettepe.edu.tr/easyROC/ #incrivel: https://kennis-research.shinyapps.io/ROC-Curves/ and http://www.navan.name/roc/

library("OptimalCutpoints")
dados_predict <- as.data.frame(dados_predict)

#compute the best cut-off
optimal.cutpoint.Youden <- optimal.cutpoints(X = "parents_total", 
                                             status = "diag", 
                                             tag.healthy = 0, 
                                             methods = "Youden", 
                                             data = dados_predict, 
                                             pop.prev = NULL,
                                             control = control.cutpoints(), 
                                             ci.fit = FALSE, 
                                             conf.level = 0.95, 
                                             trace = FALSE)
optimal.cutpoint.Youden<-optimal.cutpoints(X = "parents_total", status = "diag", tag.healthy = 0, 
methods = "Youden", data = dados_predict, pop.prev = NULL, 
control = control.cutpoints(generalized.Youden = TRUE), ci.fit = TRUE, conf.level = 0.95, 
trace = FALSE)

summary(optimal.cutpoint.Youden)
#get summary
summary(optimal.cutpoint.Youden)
#print
print.optimal.cutpoints(optimal.cutpoint.Youden)

#plot
plot(optimal.cutpoint.Youden, col = "blue")
#https://rpubs.com/thisisrg/supp_code_17 -- interessante

library(pROC)
roc_obj <- roc(response=dados_predict$diag, predictor=factor(dados_predict$prediction, ordered = TRUE), plot=TRUE)
roc_obj <- roc(response=as.numeric(dados_predict$diag), predictor=dados_predict$parents_total, plot=TRUE)

plot(roc_obj, col="red", lwd=3, main="ROC curve QDA")
auc_qda<-auc(roc_obj)
coords(roc_obj, x="best", input="threshold", best.method="youden", transpose = TRUE) # 


#library(InformationValue)
#optCutOff <- optimalCutoff(logitMod$diag, predicted)[1] 
#http://r-statistics.co/Logistic-Regression-With-R.html

library(ROCR)
predictions <- prediction(dados_predict$parents_total, dados_predict$diag)
predictions
perf <- performance(pred,"tpr","fpr")
plot(perf,
     avg="threshold",
     spread.estimate="boxplot")

plot(perf)

perf <- performance(pred, "tpr", "fpr")
plot(perf,
     avg= "threshold",
     colorize=TRUE,
     lwd= 3,
     main= "With ROCR you can produce standard plots\nlike ROC curves ...")
plot(perf,
     lty=3,
     col="grey78",
     add=TRUE)

sens <- data.frame(x=unlist(performance(predictions, "sens")@x.values), 
                   y=unlist(performance(predictions, "sens")@y.values))
spec <- data.frame(x=unlist(performance(predictions, "spec")@x.values), 
                   y=unlist(performance(predictions, "spec")@y.values))

sens %>% ggplot(aes(x,y)) + 
  geom_line() + 
  geom_line(data=spec, aes(x,y,col="red")) +
  scale_y_continuous(sec.axis = sec_axis(~., name = "Specificity")) +
  labs(x='Cutoff', y="Sensitivity") +
  theme(axis.title.y.right = element_text(colour = "red"), legend.position="none") +
  geom_vline(xintercept = 61)

#https://stackoverflow.com/questions/23240182/deciding-threshold-for-glm-logistic-regression-model-in-r

sens = cbind(unlist(performance(predictions, "sens")@x.values), unlist(performance(predictions, "sens")@y.values))
spec = cbind(unlist(performance(predictions, "spec")@x.values), unlist(performance(predictions, "spec")@y.values))
sens[which.min(apply(sens, 1, function(x) min(colSums(abs(t(spec) - x))))), 1]

ds_full %>% 
  filter(age_group == "[5,6]") %>% 
  select(group, parents_total) %>% 
  mutate(diag = factor(if_else(group == "typical",0,1))) -> dados_predict

str(dados_predict)

#https://community.rstudio.com/t/how-to-choose-best-threshold-value-automatically/12910
library(caret)
controlparameter <- trainControl(method="cv",
                                 number=10,
                                 savePredictions = TRUE)

##Apply Logistic Regression with K fold Cross Validation
##train
modelglm <- train(diag ~ parents_total,data = dados_predict,
                  method="glm",
                  family="binomial",
                  trControl=controlparameter)
modelglm
## Make prediction with mymodelglm to train data
prediction <- predict(modelglm,dados_predict)
#add to dataset
dados_predict$prediction <- prediction
##Validation of Prediction
confu_mat <- table(prediction=prediction,acctual=dados_predict$diag)
confu_mat
confusionMatrix(data=dados_predict$prediction, reference=dados_predict$diag, positive = "1")

devtools::install_github("sachsmc/plotROC")
#https://cran.r-project.org/web/packages/plotROC/vignettes/examples.html
library(plotROC)
shiny_plotROC()

ggplot(dados_predict, aes(d = diag, m = parents_total)) +  
  geom_roc(cutoffs.at = c(30, 55, 80)) + style_roc(theme = theme_grey)

#http://www.epeter-stats.de:3838/fbroc.serv/

Psychometric properties and clinical utility of the executive function inventory for children and adolescents: a large multistage populational study including children with ADHD

Luis Anunciacao, PhD

08/7/2020