library(dplyr)
library(plotly)
library(ggplot2)
library(tidyr)
library(magrittr)
library(plotrix)
library(rgl)
library(car)
library(lubridate)
library(ggplot2)
library(GGally)
library(corrplot)
library(corrgram)
library(ppcor)
library(readxl)
IMI_e_IPT <- read_excel("IMI e IPT.xlsx", 
    col_types = c("numeric", "text", "text", 
        "text", "text", "text", "numeric", 
        "text", "numeric", "text", "text", 
        "text", "text", "text", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric"))
## New names:
## * `` -> ...1
IMI_e_IPT<- IMI_e_IPT %>% 
  mutate_all(replace_na, 0)

dim(IMI_e_IPT)
## [1] 2858   24
outliers <- boxplot(IMI_e_IPT$IPT, plot=FALSE)$out



no_outliers <- IMI_e_IPT 
no_outliers <-no_outliers[which(no_outliers$IPT %in% outliers),]

graf <- IMI_e_IPT[, 15:24]

dim(graf)
## [1] 2858   10

0.1 Coerência do modelo de análise

1 Testes de normalidade

Com seu papel no teorema central do limite, a distribuição normal é encontrada em muitos dos testes estatísticos chamados gaussianos ou assintoticamente gaussianos. O pressuposto de normalidade é feito sobre uma distribuição a priori em um teste de aderência para indicar que esta distribuição segue aproximadamente uma distribuição normal. Existem vários testes de normalidade.

panel.hist <- function(x, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks; nB <- length(breaks)
  y <- h$counts; y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}

# 1.2 by Melina de Souza Leite 
panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"), 
                      cex = 1, col.line="red") {
  points(x, y, pch = pch, col = col, bg = bg, cex = cex)
  ok <- is.finite(x) & is.finite(y)
  if (any(ok)) {
    abline(lm(y[ok]~x[ok]), col = col.line)
  }
}

# 1.3 help(pairs) by Melina de Souza Leite 
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- abs(cor(x, y))
  txt <- format(c(r, 0.123456789), digits = digits)[1]
  txt <- paste0(prefix, txt)
  if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
  text(0.5, 0.5, txt, cex = cex.cor * r)
}



pairs(graf,  
      diag.panel = panel.hist,
      upper.panel = panel.cor,
      lower.panel = panel.lm,
      main="Correlação Multivariável")

1.1 Dados não normalizados

summary(IMI_e_IPT$IPT)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.04546 0.21083 0.21212 5.15000
summary(IMI_e_IPT$IMI)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2304  0.4178  0.5025  0.5051  0.5868  1.0234
dim(IMI_e_IPT)
## [1] 2858   24
par(mfrow=c(1,2))
hist(IMI_e_IPT$IMI)
hist(IMI_e_IPT$IPT)

1.1.1 Shapiro-Wilk test

O método de Shapiro-Wilk é amplamente recomendado para teste de normalidade e fornece melhor potência que o K-S. É baseado na correlação entre os dados e as pontuações normais correspondentes.

A partir da saída, o valor p> 0,05 implica que a distribuição dos dados não é significativamente diferente da distribuição normal. Em outras palavras, podemos assumir a normalidade.

Neste caso o teste mostra que a distribuição não é normal nos dados não normalizados.

shapiro.test(IMI_e_IPT$IPT)
## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IPT
## W = 0.52019, p-value < 2.2e-16
shapiro.test(IMI_e_IPT$IMI)
## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IMI
## W = 0.99093, p-value = 1.775e-12
shapiro.test(IMI_e_IPT$IPT)
## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IPT
## W = 0.52019, p-value < 2.2e-16
shapiro.test(IMI_e_IPT$IMI)
## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IMI
## W = 0.99093, p-value = 1.775e-12

1.2 Dados normalizados

norm_MI_PT <- IMI_e_IPT %>%
   mutate_all(replace_na, 0) %>%
   mutate(IPT_t = 0.510 + log(IPT)) %>%
   mutate(IMI_t = 10 + IMI)  %>%
   filter(IPT_t != "-Inf")
  


summary(norm_MI_PT$IPT_t)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.8207 -1.9749 -1.0994 -1.0274 -0.1831  2.1490
summary(norm_MI_PT$IMI_t)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.23   10.42   10.50   10.50   10.58   10.94
dim(norm_MI_PT)
## [1] 1496   26
par(mfrow=c(1,2))
hist(norm_MI_PT$IPT_t)
hist(norm_MI_PT$IMI_t)

1.2.1 Shapiro-Wilk test

shapiro.test(norm_MI_PT$IPT_t)
## 
##  Shapiro-Wilk normality test
## 
## data:  norm_MI_PT$IPT_t
## W = 0.98028, p-value = 1.854e-13
shapiro.test(norm_MI_PT$IMI_t)
## 
##  Shapiro-Wilk normality test
## 
## data:  norm_MI_PT$IMI_t
## W = 0.99172, p-value = 1.874e-07

1.3 Verificação de ajuste do modelo

As estimativas são os coeficientes das variáveis independentes do modelo linear (todas as porcentagens) e refletem uma alteração estimada na variável IPT dependente (a qual foi avaliada em sua composição separadamente em Patentes, Produtos e Aplicativos), quando a variável independente correspondente é alterada.

Portanto, para cada aumento de 1% em porcentagem “Produção de Tecnologias (IPT)”, espera-se um aumento ou diminuição significativo (nos resultados em aparecem asteriscos) das variáveis independentes (Y), mantendo todas as outras variáveis constantes.

Variáveis dependente (Y)

Y = somatório(Y1, Y2, Y3)

Y1 = Quantitativo de produção de patentes por Programa (SPPP/QPPP)

Y2 = Quantitativo de produção de produtos por Programa (SPPPr/QPP)

Y3 = Quantitativo de produção de Aplicativos por Programa (SAPP/QPPP)

Variáveis independente (X)

X = somatório(FCDo, FCDi, CC, CP)

X1 = Formação do corpo docentes dos PPG (FCDo)

X2 = Formação do corpo discente dos PPG (FCDi)

X3 = Colaboração Científica (CC)

X4 = Contexto profissional (CP)

X5 = DEPENDENCIA ADM

X6 = CONCEITO

X7 = UFPROGRAMA

1.4 Produção de Tecnologia (IPT= Patentes+Produtos+Aplicativos)

1.4.1 Com outliers

1.4.1.1 Regressão linear

fitIPT<- lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = IMI_e_IPT)
summary(fitIPT)
## 
## Call:
## lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = IMI_e_IPT)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84317 -0.26218 -0.09848  0.17089  1.83521 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## FCDo                              0.296810   0.045703   6.494 9.82e-11 ***
## FCDi                              0.035470   0.114392   0.310 0.756529    
## CC                               -0.305536   0.093859  -3.255 0.001146 ** 
## CP                                0.078916   0.066954   1.179 0.238639    
## as.factor(DEPENDENCIAADM)PRIVADA  0.005782   0.214983   0.027 0.978545    
## as.factor(DEPENDENCIAADM)PÚBLICA -0.052682   0.213397  -0.247 0.805023    
## as.factor(CONCEITO)4              0.051471   0.017363   2.964 0.003059 ** 
## as.factor(CONCEITO)5              0.078061   0.023378   3.339 0.000851 ***
## as.factor(CONCEITO)6              0.143020   0.030833   4.638 3.67e-06 ***
## as.factor(CONCEITO)7              0.162759   0.040396   4.029 5.75e-05 ***
## as.factor(UFPROGRAMA)AL           0.336929   0.212819   1.583 0.113495    
## as.factor(UFPROGRAMA)AM           0.170054   0.210222   0.809 0.418627    
## as.factor(UFPROGRAMA)AP           0.286551   0.286711   0.999 0.317666    
## as.factor(UFPROGRAMA)BA           0.251209   0.204912   1.226 0.220324    
## as.factor(UFPROGRAMA)CE           0.244529   0.206071   1.187 0.235475    
## as.factor(UFPROGRAMA)DF           0.237316   0.206512   1.149 0.250585    
## as.factor(UFPROGRAMA)ES           0.195708   0.208323   0.939 0.347582    
## as.factor(UFPROGRAMA)GO           0.200451   0.206537   0.971 0.331863    
## as.factor(UFPROGRAMA)MA           0.205383   0.209770   0.979 0.327620    
## as.factor(UFPROGRAMA)MG           0.287319   0.203846   1.409 0.158799    
## as.factor(UFPROGRAMA)MS           0.178421   0.210932   0.846 0.397698    
## as.factor(UFPROGRAMA)MT           0.147492   0.208995   0.706 0.480420    
## as.factor(UFPROGRAMA)PA           0.245011   0.206412   1.187 0.235327    
## as.factor(UFPROGRAMA)PB           0.230307   0.213089   1.081 0.279877    
## as.factor(UFPROGRAMA)PE           0.291358   0.205269   1.419 0.155894    
## as.factor(UFPROGRAMA)PI           0.356397   0.231281   1.541 0.123436    
## as.factor(UFPROGRAMA)PR           0.291522   0.204107   1.428 0.153322    
## as.factor(UFPROGRAMA)RJ           0.204828   0.203971   1.004 0.315368    
## as.factor(UFPROGRAMA)RN           0.333091   0.206613   1.612 0.107042    
## as.factor(UFPROGRAMA)RO           0.228228   0.231169   0.987 0.323591    
## as.factor(UFPROGRAMA)RR          -0.055025   0.405356  -0.136 0.892033    
## as.factor(UFPROGRAMA)RS           0.268258   0.204146   1.314 0.188937    
## as.factor(UFPROGRAMA)SC           0.230534   0.205866   1.120 0.262883    
## as.factor(UFPROGRAMA)SE           0.717449   0.225157   3.186 0.001456 ** 
## as.factor(UFPROGRAMA)SP           0.186287   0.203858   0.914 0.360896    
## as.factor(UFPROGRAMA)TO           0.275421   0.217391   1.267 0.205282    
## FCDi:CC                           0.083228   0.177392   0.469 0.638980    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.351 on 2821 degrees of freedom
## Multiple R-squared:  0.4233, Adjusted R-squared:  0.4157 
## F-statistic: 55.96 on 37 and 2821 DF,  p-value: < 2.2e-16
plot(fitIPT)
## Warning: not plotting observations with leverage one:
##   1268

## Warning: not plotting observations with leverage one:
##   1268

Não possui normalidade nos dados para garantir os pressupostos da análise

1.4.1.2 Regressão de Poisson

regpoisson=glm(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data=IMI_e_IPT)

summary(regpoisson)
## 
## Call:
## glm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4073  -0.7081  -0.2271   0.3074   2.1756  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## FCDo                               1.073340   0.245773   4.367 1.26e-05 ***
## FCDi                               0.008651   0.642063   0.013  0.98925    
## CC                                -1.359096   0.565538  -2.403  0.01625 *  
## CP                                 0.292796   0.346992   0.844  0.39877    
## as.factor(DEPENDENCIAADM)PRIVADA -14.173973 440.683395  -0.032  0.97434    
## as.factor(DEPENDENCIAADM)PÚBLICA -14.363336 440.683374  -0.033  0.97400    
## as.factor(CONCEITO)4               0.181913   0.093508   1.945  0.05173 .  
## as.factor(CONCEITO)5               0.265754   0.123789   2.147  0.03181 *  
## as.factor(CONCEITO)6               0.473919   0.154755   3.062  0.00220 ** 
## as.factor(CONCEITO)7               0.562573   0.204935   2.745  0.00605 ** 
## as.factor(UFPROGRAMA)AL           13.178131 440.683337   0.030  0.97614    
## as.factor(UFPROGRAMA)AM           12.529444 440.683384   0.028  0.97732    
## as.factor(UFPROGRAMA)AP           13.029627 440.684315   0.030  0.97641    
## as.factor(UFPROGRAMA)BA           12.908784 440.683253   0.029  0.97663    
## as.factor(UFPROGRAMA)CE           12.876676 440.683271   0.029  0.97669    
## as.factor(UFPROGRAMA)DF           12.861083 440.683272   0.029  0.97672    
## as.factor(UFPROGRAMA)ES           12.683449 440.683319   0.029  0.97704    
## as.factor(UFPROGRAMA)GO           12.689389 440.683291   0.029  0.97703    
## as.factor(UFPROGRAMA)MA           12.705164 440.683340   0.029  0.97700    
## as.factor(UFPROGRAMA)MG           13.019330 440.683237   0.030  0.97643    
## as.factor(UFPROGRAMA)MS           12.574788 440.683379   0.029  0.97724    
## as.factor(UFPROGRAMA)MT           12.389935 440.683374   0.028  0.97757    
## as.factor(UFPROGRAMA)PA           12.887794 440.683276   0.029  0.97667    
## as.factor(UFPROGRAMA)PB           12.827642 440.683387   0.029  0.97678    
## as.factor(UFPROGRAMA)PE           13.039558 440.683254   0.030  0.97639    
## as.factor(UFPROGRAMA)PI           13.255723 440.683543   0.030  0.97600    
## as.factor(UFPROGRAMA)PR           13.038930 440.683240   0.030  0.97640    
## as.factor(UFPROGRAMA)RJ           12.740610 440.683242   0.029  0.97694    
## as.factor(UFPROGRAMA)RN           13.173844 440.683265   0.030  0.97615    
## as.factor(UFPROGRAMA)RO           12.799823 440.683760   0.029  0.97683    
## as.factor(UFPROGRAMA)RR           -0.201781 890.473538   0.000  0.99982    
## as.factor(UFPROGRAMA)RS           12.955616 440.683241   0.029  0.97655    
## as.factor(UFPROGRAMA)SC           12.835612 440.683264   0.029  0.97676    
## as.factor(UFPROGRAMA)SE           13.863189 440.683341   0.031  0.97490    
## as.factor(UFPROGRAMA)SP           12.673996 440.683240   0.029  0.97706    
## as.factor(UFPROGRAMA)TO           13.004187 440.683410   0.030  0.97646    
## FCDi:CC                            0.542113   1.045530   0.519  0.60411    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3359.1  on 2858  degrees of freedom
## Residual deviance: 1226.9  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 12

1.4.1.3 Árvore de decisão

library(rpart)
library(rpart.plot)

arvore <- rpart(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = IMI_e_IPT)

summary(arvore)
## Call:
## rpart(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = IMI_e_IPT)
##   n= 2858 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.01420417      0 1.0000000 1.0004892 0.03933496
## 2 0.01060735      2 0.9715917 0.9823271 0.03806442
## 3 0.01000000      3 0.9609843 0.9814367 0.03816198
## 
## Variable importance
##                      FCDo                        CC                      FCDi 
##                        50                        23                        14 
##       as.factor(CONCEITO) as.factor(DEPENDENCIAADM)                        CP 
##                         7                         3                         2 
## 
## Node number 1: 2858 observations,    complexity param=0.01420417
##   mean=0.2842771, MSE=0.1300202 
##   left son=2 (1465 obs) right son=3 (1393 obs)
##   Primary splits:
##       CC                        < 0.5714653 to the right, improve=0.012976080, (0 missing)
##       FCDo                      < 0.5373357 to the left,  improve=0.012306860, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.004831066, (0 missing)
##       as.factor(CONCEITO)       splits as  LRRRR, improve=0.004750481, (0 missing)
##       CP                        < 0.4605808 to the left,  improve=0.003011676, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.3727152 to the right, agree=0.687, adj=0.358, (0 split)
##       FCDo                      < 0.4293395 to the right, agree=0.648, adj=0.277, (0 split)
##       as.factor(CONCEITO)       splits as  LRRRR, agree=0.642, adj=0.266, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  LR, agree=0.537, adj=0.050, (0 split)
##       CP                        < 0.5328175 to the left,  agree=0.526, adj=0.027, (0 split)
## 
## Node number 2: 1465 observations,    complexity param=0.01060735
##   mean=0.2442242, MSE=0.1189336 
##   left son=4 (771 obs) right son=5 (694 obs)
##   Primary splits:
##       FCDo                      < 0.570028  to the left,  improve=0.022622340, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.012423700, (0 missing)
##       FCDi                      < 0.4847926 to the left,  improve=0.008504852, (0 missing)
##       as.factor(CONCEITO)       splits as  LRRRL, improve=0.006687891, (0 missing)
##       CC                        < 1.022328  to the right, improve=0.006214590, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.4644661 to the left,  agree=0.655, adj=0.271, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.579, adj=0.111, (0 split)
##       CP                        < 0.4581944 to the left,  agree=0.576, adj=0.105, (0 split)
##       CC                        < 0.6402778 to the left,  agree=0.560, adj=0.072, (0 split)
##       as.factor(CONCEITO)       splits as  RLLLL, agree=0.552, adj=0.055, (0 split)
## 
## Node number 3: 1393 observations,    complexity param=0.01420417
##   mean=0.3264003, MSE=0.1382183 
##   left son=6 (1039 obs) right son=7 (354 obs)
##   Primary splits:
##       FCDo                      < 0.5373357 to the left,  improve=0.029784190, (0 missing)
##       CP                        < 0.4523575 to the left,  improve=0.005847186, (0 missing)
##       FCDi                      < 0.5564663 to the left,  improve=0.004290214, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.002849396, (0 missing)
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.002368822, (0 missing)
##   Surrogate splits:
##       FCDi < 0.7564272 to the left,  agree=0.753, adj=0.028, (0 split)
##       CC   < 0.5708403 to the left,  agree=0.747, adj=0.003, (0 split)
## 
## Node number 4: 771 observations
##   mean=0.1950119, MSE=0.09876409 
## 
## Node number 5: 694 observations
##   mean=0.2988967, MSE=0.1356614 
## 
## Node number 6: 1039 observations
##   mean=0.2889487, MSE=0.1104584 
## 
## Node number 7: 354 observations
##   mean=0.4363215, MSE=0.203495
rpart.plot(arvore)

1.4.2 Sem outliers

fitIPT<- lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) - 1, data = no_outliers)
summary(fitIPT)
## 
## Call:
## lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) - 1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48865 -0.17996 -0.05381  0.13491  1.14266 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## FCDo                             -0.032993   0.107500  -0.307   0.7591    
## FCDi                              0.087610   0.091725   0.955   0.3402    
## CC                                0.029715   0.099763   0.298   0.7660    
## CP                                0.332823   0.160761   2.070   0.0392 *  
## as.factor(DEPENDENCIAADM)PRIVADA  0.886206   0.148588   5.964 6.60e-09 ***
## as.factor(DEPENDENCIAADM)PÚBLICA  0.823164   0.138450   5.946 7.31e-09 ***
## as.factor(CONCEITO)4              0.061117   0.040729   1.501   0.1345    
## as.factor(CONCEITO)5              0.005148   0.051596   0.100   0.9206    
## as.factor(CONCEITO)6              0.067716   0.063203   1.071   0.2848    
## as.factor(CONCEITO)7              0.117540   0.089545   1.313   0.1903    
## as.factor(UFPROGRAMA)AM           0.004078   0.189628   0.022   0.9829    
## as.factor(UFPROGRAMA)AP          -0.221887   0.293099  -0.757   0.4496    
## as.factor(UFPROGRAMA)BA          -0.068495   0.123099  -0.556   0.5783    
## as.factor(UFPROGRAMA)CE          -0.102848   0.137101  -0.750   0.4537    
## as.factor(UFPROGRAMA)DF          -0.163843   0.128162  -1.278   0.2021    
## as.factor(UFPROGRAMA)ES          -0.065050   0.171513  -0.379   0.7047    
## as.factor(UFPROGRAMA)GO          -0.114053   0.147563  -0.773   0.4402    
## as.factor(UFPROGRAMA)MA           0.086344   0.172345   0.501   0.6167    
## as.factor(UFPROGRAMA)MG          -0.082697   0.113330  -0.730   0.4661    
## as.factor(UFPROGRAMA)MS          -0.302045   0.195083  -1.548   0.1226    
## as.factor(UFPROGRAMA)MT           0.023450   0.222787   0.105   0.9162    
## as.factor(UFPROGRAMA)PA          -0.048164   0.160980  -0.299   0.7650    
## as.factor(UFPROGRAMA)PB          -0.052877   0.172490  -0.307   0.7594    
## as.factor(UFPROGRAMA)PE           0.021935   0.121647   0.180   0.8570    
## as.factor(UFPROGRAMA)PI          -0.135106   0.188768  -0.716   0.4747    
## as.factor(UFPROGRAMA)PR           0.007321   0.115117   0.064   0.9493    
## as.factor(UFPROGRAMA)RJ          -0.056194   0.116527  -0.482   0.6300    
## as.factor(UFPROGRAMA)RN           0.001444   0.125321   0.012   0.9908    
## as.factor(UFPROGRAMA)RO          -0.290385   0.293091  -0.991   0.3226    
## as.factor(UFPROGRAMA)RS           0.079893   0.114530   0.698   0.4860    
## as.factor(UFPROGRAMA)SC          -0.091232   0.128947  -0.708   0.4798    
## as.factor(UFPROGRAMA)SE           0.316064   0.149289   2.117   0.0350 *  
## as.factor(UFPROGRAMA)SP          -0.101283   0.117108  -0.865   0.3878    
## as.factor(UFPROGRAMA)TO          -0.186034   0.220446  -0.844   0.3994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2727 on 315 degrees of freedom
## Multiple R-squared:  0.9412, Adjusted R-squared:  0.9348 
## F-statistic: 148.2 on 34 and 315 DF,  p-value: < 2.2e-16
plot(fitIPT)
## Warning: not plotting observations with leverage one:
##   97, 106

## Warning: not plotting observations with leverage one:
##   97, 106

Próximo da normalidade mas ainda não consegue garantir os pressupostos

 print(paste("Correlação normalizada", cor(norm_MI_PT$IPT_t,norm_MI_PT$IMI_t, method = "spearman")))
## [1] "Correlação normalizada 0.0929039972121071"
 print(paste("Correlação sem normalização", cor(IMI_e_IPT$IPT,IMI_e_IPT$IMI, method = "spearman")))
## [1] "Correlação sem normalização -0.000893884909316986"

1.4.2.1 Regressão de Poisson

regpoisson=glm(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)
## 
## Call:
## glm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4073  -0.7081  -0.2271   0.3074   2.1756  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## FCDo                               1.073340   0.245773   4.367 1.26e-05 ***
## FCDi                               0.008651   0.642063   0.013  0.98925    
## CC                                -1.359096   0.565538  -2.403  0.01625 *  
## CP                                 0.292796   0.346992   0.844  0.39877    
## as.factor(DEPENDENCIAADM)PRIVADA -14.173973 440.683395  -0.032  0.97434    
## as.factor(DEPENDENCIAADM)PÚBLICA -14.363336 440.683374  -0.033  0.97400    
## as.factor(CONCEITO)4               0.181913   0.093508   1.945  0.05173 .  
## as.factor(CONCEITO)5               0.265754   0.123789   2.147  0.03181 *  
## as.factor(CONCEITO)6               0.473919   0.154755   3.062  0.00220 ** 
## as.factor(CONCEITO)7               0.562573   0.204935   2.745  0.00605 ** 
## as.factor(UFPROGRAMA)AL           13.178131 440.683337   0.030  0.97614    
## as.factor(UFPROGRAMA)AM           12.529444 440.683384   0.028  0.97732    
## as.factor(UFPROGRAMA)AP           13.029627 440.684315   0.030  0.97641    
## as.factor(UFPROGRAMA)BA           12.908784 440.683253   0.029  0.97663    
## as.factor(UFPROGRAMA)CE           12.876676 440.683271   0.029  0.97669    
## as.factor(UFPROGRAMA)DF           12.861083 440.683272   0.029  0.97672    
## as.factor(UFPROGRAMA)ES           12.683449 440.683319   0.029  0.97704    
## as.factor(UFPROGRAMA)GO           12.689389 440.683291   0.029  0.97703    
## as.factor(UFPROGRAMA)MA           12.705164 440.683340   0.029  0.97700    
## as.factor(UFPROGRAMA)MG           13.019330 440.683237   0.030  0.97643    
## as.factor(UFPROGRAMA)MS           12.574788 440.683379   0.029  0.97724    
## as.factor(UFPROGRAMA)MT           12.389935 440.683374   0.028  0.97757    
## as.factor(UFPROGRAMA)PA           12.887794 440.683276   0.029  0.97667    
## as.factor(UFPROGRAMA)PB           12.827642 440.683387   0.029  0.97678    
## as.factor(UFPROGRAMA)PE           13.039558 440.683254   0.030  0.97639    
## as.factor(UFPROGRAMA)PI           13.255723 440.683543   0.030  0.97600    
## as.factor(UFPROGRAMA)PR           13.038930 440.683240   0.030  0.97640    
## as.factor(UFPROGRAMA)RJ           12.740610 440.683242   0.029  0.97694    
## as.factor(UFPROGRAMA)RN           13.173844 440.683265   0.030  0.97615    
## as.factor(UFPROGRAMA)RO           12.799823 440.683760   0.029  0.97683    
## as.factor(UFPROGRAMA)RR           -0.201781 890.473538   0.000  0.99982    
## as.factor(UFPROGRAMA)RS           12.955616 440.683241   0.029  0.97655    
## as.factor(UFPROGRAMA)SC           12.835612 440.683264   0.029  0.97676    
## as.factor(UFPROGRAMA)SE           13.863189 440.683341   0.031  0.97490    
## as.factor(UFPROGRAMA)SP           12.673996 440.683240   0.029  0.97706    
## as.factor(UFPROGRAMA)TO           13.004187 440.683410   0.030  0.97646    
## FCDi:CC                            0.542113   1.045530   0.519  0.60411    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3359.1  on 2858  degrees of freedom
## Residual deviance: 1226.9  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 12

1.4.2.2 Árvore de decisão

arvore_nout <- rpart(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_nout)
## Call:
## rpart(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.02282279      0 1.0000000 1.005645 0.1197302
## 2 0.01774939      3 0.9294627 1.152528 0.1290911
## 3 0.01624681      8 0.8385899 1.226711 0.1335846
## 4 0.01436838      9 0.8223431 1.245826 0.1332226
## 5 0.01114782     10 0.8079747 1.290706 0.1375681
## 6 0.01084124     14 0.7633834 1.329958 0.1431779
## 7 0.01020310     16 0.7417010 1.328174 0.1432480
## 8 0.01000000     18 0.7212948 1.325013 0.1430175
## 
## Variable importance
##                      FCDi                      FCDo                        CC 
##                        30                        21                        20 
##       as.factor(CONCEITO)                        CP as.factor(DEPENDENCIAADM) 
##                        15                         9                         5 
## 
## Node number 1: 349 observations,    complexity param=0.02282279
##   mean=1.03088, MSE=0.07822102 
##   left son=2 (270 obs) right son=3 (79 obs)
##   Primary splits:
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.018491400, (0 missing)
##       CP                        < 0.6217262 to the left,  improve=0.016534390, (0 missing)
##       CC                        < 0.9772398 to the left,  improve=0.016472820, (0 missing)
##       FCDi                      < 0.9089069 to the left,  improve=0.011495880, (0 missing)
##       FCDo                      < 0.5055856 to the left,  improve=0.009905112, (0 missing)
##   Surrogate splits:
##       FCDo < 0.7656863 to the left,  agree=0.805, adj=0.139, (0 split)
##       FCDi < 0.8198052 to the left,  agree=0.785, adj=0.051, (0 split)
##       CP   < 0.2847222 to the right, agree=0.779, adj=0.025, (0 split)
## 
## Node number 2: 270 observations,    complexity param=0.01774939
##   mean=1.010308, MSE=0.06725982 
##   left son=4 (9 obs) right son=5 (261 obs)
##   Primary splits:
##       FCDi                < 0.8153409 to the right, improve=0.018277410, (0 missing)
##       CP                  < 0.4491571 to the left,  improve=0.017833270, (0 missing)
##       FCDo                < 0.5055856 to the left,  improve=0.012773410, (0 missing)
##       CC                  < 0.5241911 to the left,  improve=0.012177220, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRR, improve=0.005276337, (0 missing)
## 
## Node number 3: 79 observations,    complexity param=0.02282279
##   mean=1.101189, MSE=0.1092935 
##   left son=6 (72 obs) right son=7 (7 obs)
##   Primary splits:
##       FCDi                < 0.8568548 to the left,  improve=0.08585471, (0 missing)
##       as.factor(CONCEITO) splits as  LRLLL, improve=0.05595841, (0 missing)
##       CP                  < 0.5502976 to the left,  improve=0.04253452, (0 missing)
##       CC                  < 0.744621  to the left,  improve=0.02808771, (0 missing)
##       FCDo                < 0.5505051 to the right, improve=0.02545831, (0 missing)
## 
## Node number 4: 9 observations
##   mean=0.8214936, MSE=0.01130151 
## 
## Node number 5: 261 observations,    complexity param=0.01774939
##   mean=1.016818, MSE=0.06791769 
##   left son=10 (110 obs) right son=11 (151 obs)
##   Primary splits:
##       CC                  < 0.5241911 to the left,  improve=0.017179430, (0 missing)
##       FCDo                < 0.5055856 to the left,  improve=0.016499930, (0 missing)
##       CP                  < 0.4491571 to the left,  improve=0.013661420, (0 missing)
##       FCDi                < 0.305839  to the left,  improve=0.009885776, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRR, improve=0.003872234, (0 missing)
##   Surrogate splits:
##       FCDi                < 0.2837121 to the left,  agree=0.759, adj=0.427, (0 split)
##       as.factor(CONCEITO) splits as  RRLLL, agree=0.655, adj=0.182, (0 split)
##       FCDo                < 0.4359649 to the left,  agree=0.651, adj=0.173, (0 split)
##       CP                  < 0.3026042 to the left,  agree=0.586, adj=0.018, (0 split)
## 
## Node number 6: 72 observations,    complexity param=0.02282279
##   mean=1.070985, MSE=0.08319642 
##   left son=12 (44 obs) right son=13 (28 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LRLLL, improve=0.11344000, (0 missing)
##       CP                  < 0.5502976 to the left,  improve=0.10829070, (0 missing)
##       FCDo                < 0.4742063 to the right, improve=0.03983165, (0 missing)
##       CC                  < 0.4436264 to the right, improve=0.03289703, (0 missing)
##       FCDi                < 0.5931034 to the right, improve=0.03242660, (0 missing)
##   Surrogate splits:
##       FCDo < 0.4880952 to the right, agree=0.722, adj=0.286, (0 split)
##       CC   < 0.4681691 to the right, agree=0.653, adj=0.107, (0 split)
##       FCDi < 0.2265512 to the right, agree=0.625, adj=0.036, (0 split)
##       CP   < 0.5390857 to the left,  agree=0.625, adj=0.036, (0 split)
## 
## Node number 7: 7 observations
##   mean=1.411857, MSE=0.2718225 
## 
## Node number 10: 110 observations,    complexity param=0.01436838
##   mean=0.9767974, MSE=0.06031334 
##   left son=20 (99 obs) right son=21 (11 obs)
##   Primary splits:
##       FCDi                < 0.5776093 to the left,  improve=0.05912221, (0 missing)
##       CC                  < 0.3458333 to the right, improve=0.03839310, (0 missing)
##       as.factor(CONCEITO) splits as  LLLLR, improve=0.02705273, (0 missing)
##       CP                  < 0.4422619 to the left,  improve=0.01626740, (0 missing)
##       FCDo                < 0.3181515 to the left,  improve=0.01159944, (0 missing)
##   Surrogate splits:
##       FCDo < 0.6777778 to the left,  agree=0.909, adj=0.091, (0 split)
## 
## Node number 11: 151 observations,    complexity param=0.01774939
##   mean=1.045973, MSE=0.07144051 
##   left son=22 (78 obs) right son=23 (73 obs)
##   Primary splits:
##       FCDi                < 0.5116238 to the right, improve=0.049506290, (0 missing)
##       CC                  < 0.5288521 to the right, improve=0.047210890, (0 missing)
##       CP                  < 0.4922794 to the left,  improve=0.035483860, (0 missing)
##       FCDo                < 0.7064327 to the right, improve=0.013602440, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRL, improve=0.008312707, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  LRRRR, agree=0.709, adj=0.397, (0 split)
##       FCDo                < 0.53125   to the right, agree=0.689, adj=0.356, (0 split)
##       CC                  < 0.6512144 to the right, agree=0.649, adj=0.274, (0 split)
##       CP                  < 0.5114426 to the left,  agree=0.563, adj=0.096, (0 split)
## 
## Node number 12: 44 observations,    complexity param=0.0102031
##   mean=0.9934879, MSE=0.06276538 
##   left son=24 (35 obs) right son=25 (9 obs)
##   Primary splits:
##       CC                  < 0.5134056 to the right, improve=0.07949447, (0 missing)
##       FCDi                < 0.5528821 to the left,  improve=0.07729982, (0 missing)
##       CP                  < 0.4094907 to the left,  improve=0.07376566, (0 missing)
##       FCDo                < 0.8176638 to the right, improve=0.03895006, (0 missing)
##       as.factor(CONCEITO) splits as  R-LLL, improve=0.01075653, (0 missing)
## 
## Node number 13: 28 observations,    complexity param=0.01624681
##   mean=1.192767, MSE=0.09103369 
##   left son=26 (10 obs) right son=27 (18 obs)
##   Primary splits:
##       FCDi < 0.5760369 to the right, improve=0.17400310, (0 missing)
##       FCDo < 0.4742063 to the right, improve=0.08989864, (0 missing)
##       CP   < 0.4949875 to the left,  improve=0.08841769, (0 missing)
##       CC   < 0.6364286 to the right, improve=0.08680351, (0 missing)
##   Surrogate splits:
##       FCDo < 0.6237374 to the right, agree=0.679, adj=0.1, (0 split)
## 
## Node number 20: 99 observations,    complexity param=0.01114782
##   mean=0.9568925, MSE=0.04303043 
##   left son=40 (88 obs) right son=41 (11 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LLLLR, improve=0.05782292, (0 missing)
##       FCDo                < 0.6262255 to the right, improve=0.04835172, (0 missing)
##       CC                  < 0.4221709 to the left,  improve=0.03187391, (0 missing)
##       FCDi                < 0.3762626 to the right, improve=0.02416534, (0 missing)
##       CP                  < 0.3844157 to the right, improve=0.01702926, (0 missing)
## 
## Node number 21: 11 observations
##   mean=1.155942, MSE=0.180201 
## 
## Node number 22: 78 observations,    complexity param=0.01084124
##   mean=0.9884398, MSE=0.04855783 
##   left son=44 (53 obs) right son=45 (25 obs)
##   Primary splits:
##       FCDo                < 0.5555556 to the right, improve=0.05771495, (0 missing)
##       CP                  < 0.3622024 to the right, improve=0.04915763, (0 missing)
##       FCDi                < 0.6236111 to the right, improve=0.03517085, (0 missing)
##       as.factor(CONCEITO) splits as  LRRL-, improve=0.02845517, (0 missing)
##       CC                  < 0.5445374 to the right, improve=0.02243028, (0 missing)
##   Surrogate splits:
##       CC < 0.571131  to the right, agree=0.731, adj=0.16, (0 split)
##       CP < 0.3479701 to the right, agree=0.692, adj=0.04, (0 split)
## 
## Node number 23: 73 observations,    complexity param=0.01774939
##   mean=1.107446, MSE=0.08857475 
##   left son=46 (42 obs) right son=47 (31 obs)
##   Primary splits:
##       FCDo                < 0.5196759 to the left,  improve=0.11872500, (0 missing)
##       FCDi                < 0.4038198 to the left,  improve=0.08744170, (0 missing)
##       CC                  < 0.5299148 to the right, improve=0.07999502, (0 missing)
##       CP                  < 0.493595  to the left,  improve=0.06055149, (0 missing)
##       as.factor(CONCEITO) splits as  RLLRL, improve=0.04731229, (0 missing)
##   Surrogate splits:
##       FCDi                < 0.3636541 to the left,  agree=0.685, adj=0.258, (0 split)
##       CP                  < 0.493595  to the left,  agree=0.644, adj=0.161, (0 split)
##       as.factor(CONCEITO) splits as  RLLLL, agree=0.644, adj=0.161, (0 split)
##       CC                  < 0.5254464 to the right, agree=0.603, adj=0.065, (0 split)
## 
## Node number 24: 35 observations,    complexity param=0.0102031
##   mean=0.9576687, MSE=0.0545813 
##   left son=48 (22 obs) right son=49 (13 obs)
##   Primary splits:
##       CC                  < 0.7054784 to the left,  improve=0.17668720, (0 missing)
##       CP                  < 0.5079004 to the left,  improve=0.10888740, (0 missing)
##       FCDo                < 0.5677656 to the left,  improve=0.10299430, (0 missing)
##       FCDi                < 0.5278571 to the left,  improve=0.09328107, (0 missing)
##       as.factor(CONCEITO) splits as  R-LLL, improve=0.01092535, (0 missing)
##   Surrogate splits:
##       CP   < 0.2951389 to the right, agree=0.686, adj=0.154, (0 split)
##       FCDo < 0.4444444 to the right, agree=0.657, adj=0.077, (0 split)
## 
## Node number 25: 9 observations
##   mean=1.132785, MSE=0.07019924 
## 
## Node number 26: 10 observations
##   mean=1.023912, MSE=0.03782498 
## 
## Node number 27: 18 observations
##   mean=1.286576, MSE=0.09595387 
## 
## Node number 40: 88 observations,    complexity param=0.01114782
##   mean=0.9392568, MSE=0.03773218 
##   left son=80 (8 obs) right son=81 (80 obs)
##   Primary splits:
##       FCDo                < 0.6262255 to the right, improve=0.049166600, (0 missing)
##       CC                  < 0.3982491 to the left,  improve=0.034074560, (0 missing)
##       CP                  < 0.3844157 to the right, improve=0.018987410, (0 missing)
##       FCDi                < 0.1707642 to the left,  improve=0.018899550, (0 missing)
##       as.factor(CONCEITO) splits as  RRLL-, improve=0.005205054, (0 missing)
## 
## Node number 41: 11 observations
##   mean=1.097978, MSE=0.06302311 
## 
## Node number 44: 53 observations
##   mean=0.9520813, MSE=0.02939089 
## 
## Node number 45: 25 observations,    complexity param=0.01084124
##   mean=1.06552, MSE=0.08044789 
##   left son=90 (18 obs) right son=91 (7 obs)
##   Primary splits:
##       FCDo                < 0.5230856 to the left,  improve=0.18561910, (0 missing)
##       CC                  < 0.577178  to the left,  improve=0.07196665, (0 missing)
##       as.factor(CONCEITO) splits as  RLRL-, improve=0.07046449, (0 missing)
##       FCDi                < 0.591253  to the left,  improve=0.06953301, (0 missing)
##       CP                  < 0.399213  to the right, improve=0.06489596, (0 missing)
##   Surrogate splits:
##       FCDi < 0.5732143 to the right, agree=0.76, adj=0.143, (0 split)
##       CC   < 0.5436709 to the right, agree=0.76, adj=0.143, (0 split)
##       CP   < 0.4987637 to the left,  agree=0.76, adj=0.143, (0 split)
## 
## Node number 46: 42 observations
##   mean=1.019345, MSE=0.0388596 
## 
## Node number 47: 31 observations,    complexity param=0.01774939
##   mean=1.226809, MSE=0.1311672 
##   left son=94 (15 obs) right son=95 (16 obs)
##   Primary splits:
##       CP                  < 0.4890941 to the left,  improve=0.13343660, (0 missing)
##       FCDi                < 0.4038198 to the left,  improve=0.09019198, (0 missing)
##       as.factor(CONCEITO) splits as  RLLR-, improve=0.07319815, (0 missing)
##       CC                  < 0.5389888 to the right, improve=0.06864687, (0 missing)
##       FCDo                < 0.6306043 to the right, improve=0.04451223, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  RLRL-, agree=0.742, adj=0.467, (0 split)
##       FCDi                < 0.390628  to the left,  agree=0.677, adj=0.333, (0 split)
##       CC                  < 0.6080688 to the right, agree=0.581, adj=0.133, (0 split)
##       FCDo                < 0.5342262 to the right, agree=0.548, adj=0.067, (0 split)
## 
## Node number 48: 22 observations
##   mean=0.8821795, MSE=0.03318247 
## 
## Node number 49: 13 observations
##   mean=1.08542, MSE=0.0648306 
## 
## Node number 80: 8 observations
##   mean=0.8030524, MSE=0.002435638 
## 
## Node number 81: 80 observations,    complexity param=0.01114782
##   mean=0.9528772, MSE=0.03922116 
##   left son=162 (29 obs) right son=163 (51 obs)
##   Primary splits:
##       CC                  < 0.4221709 to the left,  improve=0.047450190, (0 missing)
##       FCDi                < 0.1707642 to the left,  improve=0.028815950, (0 missing)
##       CP                  < 0.4306469 to the left,  improve=0.021127690, (0 missing)
##       FCDo                < 0.5033333 to the left,  improve=0.013459220, (0 missing)
##       as.factor(CONCEITO) splits as  LRLL-, improve=0.008112287, (0 missing)
##   Surrogate splits:
##       FCDo                < 0.2472222 to the left,  agree=0.675, adj=0.103, (0 split)
##       FCDi                < 0.1084967 to the left,  agree=0.675, adj=0.103, (0 split)
##       CP                  < 0.3364286 to the left,  agree=0.650, adj=0.034, (0 split)
##       as.factor(CONCEITO) splits as  RRRL-, agree=0.650, adj=0.034, (0 split)
## 
## Node number 90: 18 observations
##   mean=0.9893152, MSE=0.05215333 
## 
## Node number 91: 7 observations
##   mean=1.261475, MSE=0.09987436 
## 
## Node number 94: 15 observations
##   mean=1.090174, MSE=0.05014305 
## 
## Node number 95: 16 observations
##   mean=1.354905, MSE=0.1732162 
## 
## Node number 162: 29 observations
##   mean=0.8956681, MSE=0.02279377 
## 
## Node number 163: 51 observations,    complexity param=0.01114782
##   mean=0.9854079, MSE=0.04564293 
##   left son=326 (44 obs) right son=327 (7 obs)
##   Primary splits:
##       CC                  < 0.4392778 to the right, improve=0.28303160, (0 missing)
##       FCDi                < 0.3762626 to the right, improve=0.05658093, (0 missing)
##       CP                  < 0.3914764 to the right, improve=0.02654500, (0 missing)
##       FCDo                < 0.5175926 to the right, improve=0.01990692, (0 missing)
##       as.factor(CONCEITO) splits as  LRRL-, improve=0.01343994, (0 missing)
## 
## Node number 326: 44 observations
##   mean=0.9400736, MSE=0.02508037 
## 
## Node number 327: 7 observations
##   mean=1.270366, MSE=0.08077355
rpart.plot(arvore_nout)

1.5 Patentes

fitPatente<- lm(formula = sqrt(SPPP/QPPP) ~  FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = no_outliers)

summary(fitPatente)
## 
## Call:
## lm(formula = sqrt(SPPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.04262 -0.22803  0.00558  0.20992  1.50475 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## FCDo                              0.11819    0.15310   0.772 0.440695    
## FCDi                              0.47621    0.35180   1.354 0.176827    
## CC                               -0.17147    0.30760  -0.557 0.577622    
## CP                                0.07917    0.22872   0.346 0.729457    
## as.factor(DEPENDENCIAADM)PRIVADA  0.58341    0.25823   2.259 0.024551 *  
## as.factor(DEPENDENCIAADM)PÚBLICA  0.60106    0.24599   2.443 0.015100 *  
## as.factor(CONCEITO)4              0.15022    0.05795   2.592 0.009978 ** 
## as.factor(CONCEITO)5              0.18623    0.07340   2.537 0.011660 *  
## as.factor(CONCEITO)6              0.31017    0.09000   3.446 0.000646 ***
## as.factor(CONCEITO)7              0.48170    0.12816   3.759 0.000204 ***
## as.factor(UFPROGRAMA)AM          -0.51719    0.26986  -1.917 0.056209 .  
## as.factor(UFPROGRAMA)AP           0.04558    0.41682   0.109 0.912994    
## as.factor(UFPROGRAMA)BA          -0.06116    0.17511  -0.349 0.727132    
## as.factor(UFPROGRAMA)CE          -0.23068    0.19504  -1.183 0.237802    
## as.factor(UFPROGRAMA)DF          -0.17233    0.18229  -0.945 0.345197    
## as.factor(UFPROGRAMA)ES          -0.07956    0.24396  -0.326 0.744542    
## as.factor(UFPROGRAMA)GO          -0.05098    0.21036  -0.242 0.808679    
## as.factor(UFPROGRAMA)MA          -0.02890    0.24510  -0.118 0.906223    
## as.factor(UFPROGRAMA)MG          -0.16847    0.16121  -1.045 0.296810    
## as.factor(UFPROGRAMA)MS          -0.16157    0.27755  -0.582 0.560882    
## as.factor(UFPROGRAMA)MT           0.27199    0.31745   0.857 0.392206    
## as.factor(UFPROGRAMA)PA           0.06686    0.22893   0.292 0.770424    
## as.factor(UFPROGRAMA)PB          -0.18183    0.24554  -0.741 0.459532    
## as.factor(UFPROGRAMA)PE           0.07924    0.17331   0.457 0.647826    
## as.factor(UFPROGRAMA)PI          -0.14271    0.26855  -0.531 0.595502    
## as.factor(UFPROGRAMA)PR          -0.04901    0.16373  -0.299 0.764872    
## as.factor(UFPROGRAMA)RJ          -0.39161    0.16574  -2.363 0.018743 *  
## as.factor(UFPROGRAMA)RN           0.09459    0.17828   0.531 0.596092    
## as.factor(UFPROGRAMA)RO          -0.24508    0.41685  -0.588 0.557000    
## as.factor(UFPROGRAMA)RS          -0.17845    0.16294  -1.095 0.274254    
## as.factor(UFPROGRAMA)SC          -0.34405    0.18345  -1.875 0.061653 .  
## as.factor(UFPROGRAMA)SE           0.32892    0.21231   1.549 0.122344    
## as.factor(UFPROGRAMA)SP          -0.16600    0.16654  -0.997 0.319661    
## as.factor(UFPROGRAMA)TO          -0.51461    0.31350  -1.642 0.101694    
## FCDi:CC                          -0.48437    0.55907  -0.866 0.386942    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3879 on 314 degrees of freedom
## Multiple R-squared:  0.785,  Adjusted R-squared:  0.761 
## F-statistic: 32.75 on 35 and 314 DF,  p-value: < 2.2e-16

1.5.0.1 Regressão de Poisson

regpoisson=glm(sqrt(SPPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)
## 
## Call:
## glm(formula = sqrt(SPPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2005  -0.5658  -0.4486   0.2305   2.5971  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## FCDo                               1.2155     0.3275   3.711 0.000206 ***
## FCDi                              -0.2236     0.8551  -0.262 0.793690    
## CC                                -2.6057     0.7764  -3.356 0.000790 ***
## CP                                 0.8352     0.4402   1.897 0.057779 .  
## as.factor(DEPENDENCIAADM)PRIVADA -13.9911   429.4776  -0.033 0.974012    
## as.factor(DEPENDENCIAADM)PÚBLICA -14.0189   429.4776  -0.033 0.973960    
## as.factor(CONCEITO)4               0.2469     0.1278   1.932 0.053351 .  
## as.factor(CONCEITO)5               0.4300     0.1638   2.625 0.008670 ** 
## as.factor(CONCEITO)6               0.7350     0.1982   3.709 0.000208 ***
## as.factor(CONCEITO)7               0.9958     0.2498   3.986 6.72e-05 ***
## as.factor(UFPROGRAMA)AL           12.6915   429.4775   0.030 0.976425    
## as.factor(UFPROGRAMA)AM           11.8077   429.4776   0.027 0.978066    
## as.factor(UFPROGRAMA)AP           12.8809   429.4785   0.030 0.976073    
## as.factor(UFPROGRAMA)BA           12.3216   429.4774   0.029 0.977112    
## as.factor(UFPROGRAMA)CE           12.2236   429.4774   0.028 0.977294    
## as.factor(UFPROGRAMA)DF           12.0511   429.4774   0.028 0.977614    
## as.factor(UFPROGRAMA)ES           12.1617   429.4775   0.028 0.977409    
## as.factor(UFPROGRAMA)GO           12.0979   429.4775   0.028 0.977527    
## as.factor(UFPROGRAMA)MA           12.2703   429.4775   0.029 0.977207    
## as.factor(UFPROGRAMA)MG           12.3975   429.4774   0.029 0.976971    
## as.factor(UFPROGRAMA)MS           12.1020   429.4776   0.028 0.977520    
## as.factor(UFPROGRAMA)MT           12.0404   429.4775   0.028 0.977634    
## as.factor(UFPROGRAMA)PA           12.1447   429.4774   0.028 0.977440    
## as.factor(UFPROGRAMA)PB           12.3053   429.4776   0.029 0.977142    
## as.factor(UFPROGRAMA)PE           12.6171   429.4774   0.029 0.976563    
## as.factor(UFPROGRAMA)PI           12.8485   429.4778   0.030 0.976134    
## as.factor(UFPROGRAMA)PR           12.4447   429.4774   0.029 0.976884    
## as.factor(UFPROGRAMA)RJ           11.6693   429.4774   0.027 0.978323    
## as.factor(UFPROGRAMA)RN           12.7194   429.4774   0.030 0.976373    
## as.factor(UFPROGRAMA)RO           11.8032   429.4788   0.027 0.978075    
## as.factor(UFPROGRAMA)RR           -0.3080   884.9815   0.000 0.999722    
## as.factor(UFPROGRAMA)RS           12.1394   429.4774   0.028 0.977450    
## as.factor(UFPROGRAMA)SC           11.9645   429.4774   0.028 0.977775    
## as.factor(UFPROGRAMA)SE           13.5189   429.4775   0.031 0.974889    
## as.factor(UFPROGRAMA)SP           11.9685   429.4774   0.028 0.977768    
## as.factor(UFPROGRAMA)TO           12.2209   429.4777   0.028 0.977299    
## FCDi:CC                            1.3029     1.4374   0.906 0.364703    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 4275.4  on 2858  degrees of freedom
## Residual deviance: 1079.5  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 12

1.5.0.2 Árvore de decisão

arvore_patente <- rpart(SPPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_patente)
## Call:
## rpart(formula = SPPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.03547831      0 1.0000000 1.007428 0.1963998
## 2 0.02239911      2 0.9290434 1.106991 0.1984569
## 3 0.01844940      5 0.8618460 1.144126 0.1978482
## 4 0.01239861      6 0.8433966 1.171633 0.2045524
## 5 0.01029784      7 0.8309980 1.186228 0.2054333
## 6 0.01000000     10 0.8001045 1.185827 0.2052137
## 
## Variable importance
##                      FCDi                      FCDo                        CC 
##                        29                        25                        18 
##                        CP       as.factor(CONCEITO) as.factor(DEPENDENCIAADM) 
##                        15                        12                         1 
## 
## Node number 1: 349 observations,    complexity param=0.03547831
##   mean=0.629482, MSE=0.424635 
##   left son=2 (148 obs) right son=3 (201 obs)
##   Primary splits:
##       CP                  < 0.4476111 to the left,  improve=0.03222377, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRR, improve=0.03009496, (0 missing)
##       CC                  < 0.3445861 to the right, improve=0.02901461, (0 missing)
##       FCDo                < 0.7421652 to the right, improve=0.01929228, (0 missing)
##       FCDi                < 0.4241932 to the right, improve=0.01206120, (0 missing)
##   Surrogate splits:
##       FCDo < 0.2593656 to the left,  agree=0.605, adj=0.068, (0 split)
##       FCDi < 0.1863082 to the left,  agree=0.605, adj=0.068, (0 split)
##       CC   < 0.8354167 to the right, agree=0.605, adj=0.068, (0 split)
## 
## Node number 2: 148 observations,    complexity param=0.0184494
##   mean=0.4931608, MSE=0.3104785 
##   left son=4 (141 obs) right son=5 (7 obs)
##   Primary splits:
##       FCDi                      < 0.9089069 to the left,  improve=0.05950184, (0 missing)
##       FCDo                      < 0.6794872 to the left,  improve=0.03972066, (0 missing)
##       CC                        < 0.8143056 to the left,  improve=0.03121239, (0 missing)
##       as.factor(CONCEITO)       splits as  LRLRR, improve=0.02188764, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.01734681, (0 missing)
## 
## Node number 3: 201 observations,    complexity param=0.03547831
##   mean=0.7298578, MSE=0.484932 
##   left son=6 (29 obs) right son=7 (172 obs)
##   Primary splits:
##       FCDo                      < 0.7421652 to the right, improve=0.05889031, (0 missing)
##       CC                        < 0.6693845 to the right, improve=0.05210645, (0 missing)
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.04265311, (0 missing)
##       FCDi                      < 0.343002  to the right, improve=0.03777498, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.03187693, (0 missing)
## 
## Node number 4: 141 observations,    complexity param=0.01239861
##   mean=0.4628763, MSE=0.2099164 
##   left son=8 (76 obs) right son=9 (65 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LRLRR, improve=0.06207956, (0 missing)
##       CC                  < 0.5957221 to the right, improve=0.02193168, (0 missing)
##       FCDi                < 0.6492599 to the right, improve=0.01779479, (0 missing)
##       FCDo                < 0.6794872 to the left,  improve=0.01572268, (0 missing)
##       CP                  < 0.3328947 to the left,  improve=0.01453018, (0 missing)
##   Surrogate splits:
##       FCDi < 0.4228551 to the right, agree=0.660, adj=0.262, (0 split)
##       CC   < 0.4529334 to the right, agree=0.603, adj=0.138, (0 split)
##       CP   < 0.4369372 to the left,  agree=0.603, adj=0.138, (0 split)
##       FCDo < 0.269697  to the right, agree=0.582, adj=0.092, (0 split)
## 
## Node number 5: 7 observations
##   mean=1.103177, MSE=1.945491 
## 
## Node number 6: 29 observations
##   mean=0.3183031, MSE=0.1521703 
## 
## Node number 7: 172 observations,    complexity param=0.02239911
##   mean=0.7992478, MSE=0.5076644 
##   left son=14 (26 obs) right son=15 (146 obs)
##   Primary splits:
##       CC                        < 0.6693845 to the right, improve=0.03664811, (0 missing)
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.03207323, (0 missing)
##       FCDi                      < 0.189441  to the right, improve=0.02366747, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.02292318, (0 missing)
##       CP                        < 0.7083333 to the right, improve=0.01980541, (0 missing)
## 
## Node number 8: 76 observations
##   mean=0.3573047, MSE=0.1272899 
## 
## Node number 9: 65 observations,    complexity param=0.01029784
##   mean=0.586314, MSE=0.2782576 
##   left son=18 (55 obs) right son=19 (10 obs)
##   Primary splits:
##       FCDo                < 0.6427432 to the left,  improve=0.06189628, (0 missing)
##       FCDi                < 0.3582888 to the left,  improve=0.05966784, (0 missing)
##       CC                  < 0.6019898 to the right, improve=0.04629516, (0 missing)
##       CP                  < 0.4375926 to the right, improve=0.02795421, (0 missing)
##       as.factor(CONCEITO) splits as  -L-RR, improve=0.01444040, (0 missing)
##   Surrogate splits:
##       FCDi < 0.7519201 to the left,  agree=0.862, adj=0.1, (0 split)
## 
## Node number 14: 26 observations
##   mean=0.4760236, MSE=0.213218 
## 
## Node number 15: 146 observations,    complexity param=0.02239911
##   mean=0.8568083, MSE=0.5381819 
##   left son=30 (121 obs) right son=31 (25 obs)
##   Primary splits:
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.03076330, (0 missing)
##       CC                        < 0.350456  to the right, improve=0.02644304, (0 missing)
##       FCDi                      < 0.189441  to the right, improve=0.01612835, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.01338604, (0 missing)
##       FCDo                      < 0.5465368 to the left,  improve=0.01098118, (0 missing)
##   Surrogate splits:
##       CC   < 0.3445861 to the right, agree=0.842, adj=0.08, (0 split)
##       FCDo < 0.2344055 to the right, agree=0.836, adj=0.04, (0 split)
## 
## Node number 18: 55 observations,    complexity param=0.01029784
##   mean=0.5303544, MSE=0.2043189 
##   left son=36 (14 obs) right son=37 (41 obs)
##   Primary splits:
##       CC                  < 0.6019898 to the right, improve=0.13981370, (0 missing)
##       as.factor(CONCEITO) splits as  -L-RR, improve=0.06595971, (0 missing)
##       CP                  < 0.3640523 to the left,  improve=0.06510467, (0 missing)
##       FCDi                < 0.3360526 to the left,  improve=0.02538762, (0 missing)
##       FCDo                < 0.5857843 to the right, improve=0.02263407, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.6546053 to the right, agree=0.800, adj=0.214, (0 split)
##       FCDo                      < 0.5522876 to the right, agree=0.782, adj=0.143, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  LR, agree=0.782, adj=0.143, (0 split)
##       CP                        < 0.3360417 to the left,  agree=0.764, adj=0.071, (0 split)
## 
## Node number 19: 10 observations
##   mean=0.8940917, MSE=0.5729703 
## 
## Node number 30: 121 observations
##   mean=0.7983214, MSE=0.3348285 
## 
## Node number 31: 25 observations,    complexity param=0.02239911
##   mean=1.139885, MSE=1.425724 
##   left son=62 (18 obs) right son=63 (7 obs)
##   Primary splits:
##       FCDi                < 0.3356331 to the left,  improve=1.217970e-01, (0 missing)
##       FCDo                < 0.5105856 to the left,  improve=8.984864e-02, (0 missing)
##       CP                  < 0.4816682 to the right, improve=3.685829e-02, (0 missing)
##       CC                  < 0.4971226 to the right, improve=2.548008e-02, (0 missing)
##       as.factor(CONCEITO) splits as  ---RL, improve=3.728631e-05, (0 missing)
##   Surrogate splits:
##       FCDo < 0.5022523 to the left,  agree=0.76, adj=0.143, (0 split)
## 
## Node number 36: 14 observations
##   mean=0.2411153, MSE=0.06078167 
## 
## Node number 37: 41 observations,    complexity param=0.01029784
##   mean=0.629119, MSE=0.2150106 
##   left son=74 (28 obs) right son=75 (13 obs)
##   Primary splits:
##       FCDi                < 0.336875  to the left,  improve=0.21413400, (0 missing)
##       CC                  < 0.5347393 to the left,  improve=0.11238680, (0 missing)
##       FCDo                < 0.5331439 to the left,  improve=0.08401116, (0 missing)
##       CP                  < 0.4375926 to the right, improve=0.06392309, (0 missing)
##       as.factor(CONCEITO) splits as  -L-RR, improve=0.05506055, (0 missing)
##   Surrogate splits:
##       CC   < 0.5197655 to the left,  agree=0.829, adj=0.462, (0 split)
##       FCDo < 0.4796296 to the left,  agree=0.805, adj=0.385, (0 split)
##       CP   < 0.342445  to the right, agree=0.707, adj=0.077, (0 split)
## 
## Node number 62: 18 observations
##   mean=0.8800191, MSE=0.4187633 
## 
## Node number 63: 7 observations
##   mean=1.808111, MSE=3.394877 
## 
## Node number 74: 28 observations
##   mean=0.4829129, MSE=0.1068154 
## 
## Node number 75: 13 observations
##   mean=0.9440244, MSE=0.3028397
rpart.plot(arvore_patente)

1.6 Produtos

fitProduto<- lm(formula = sqrt(SPPPr/QPPP) ~  FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = no_outliers)

summary(fitProduto)
## 
## Call:
## lm(formula = sqrt(SPPPr/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55609 -0.24614 -0.03405  0.15980  1.50068 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## FCDo                              0.115563   0.130668   0.884   0.3772  
## FCDi                             -0.222547   0.300259  -0.741   0.4591  
## CC                               -0.250075   0.262536  -0.953   0.3416  
## CP                                0.008614   0.195205   0.044   0.9648  
## as.factor(DEPENDENCIAADM)PRIVADA  0.436698   0.220394   1.981   0.0484 *
## as.factor(DEPENDENCIAADM)PÚBLICA  0.394790   0.209951   1.880   0.0610 .
## as.factor(CONCEITO)4             -0.014193   0.049457  -0.287   0.7743  
## as.factor(CONCEITO)5             -0.084674   0.062648  -1.352   0.1775  
## as.factor(CONCEITO)6             -0.162837   0.076814  -2.120   0.0348 *
## as.factor(CONCEITO)7             -0.204283   0.109380  -1.868   0.0627 .
## as.factor(UFPROGRAMA)AM           0.234963   0.230324   1.020   0.3084  
## as.factor(UFPROGRAMA)AP          -0.008091   0.355751  -0.023   0.9819  
## as.factor(UFPROGRAMA)BA           0.026207   0.149458   0.175   0.8609  
## as.factor(UFPROGRAMA)CE           0.020598   0.166463   0.124   0.9016  
## as.factor(UFPROGRAMA)DF           0.027819   0.155581   0.179   0.8582  
## as.factor(UFPROGRAMA)ES           0.047873   0.208212   0.230   0.8183  
## as.factor(UFPROGRAMA)GO          -0.057580   0.179537  -0.321   0.7486  
## as.factor(UFPROGRAMA)MA          -0.158047   0.209187  -0.756   0.4505  
## as.factor(UFPROGRAMA)MG           0.100455   0.137588   0.730   0.4659  
## as.factor(UFPROGRAMA)MS          -0.150784   0.236881  -0.637   0.5249  
## as.factor(UFPROGRAMA)MT          -0.258484   0.270938  -0.954   0.3408  
## as.factor(UFPROGRAMA)PA           0.068426   0.195390   0.350   0.7264  
## as.factor(UFPROGRAMA)PB           0.222162   0.209568   1.060   0.2899  
## as.factor(UFPROGRAMA)PE          -0.022290   0.147913  -0.151   0.8803  
## as.factor(UFPROGRAMA)PI          -0.108230   0.229204  -0.472   0.6371  
## as.factor(UFPROGRAMA)PR           0.095577   0.139740   0.684   0.4945  
## as.factor(UFPROGRAMA)RJ           0.042602   0.141453   0.301   0.7635  
## as.factor(UFPROGRAMA)RN          -0.140447   0.152159  -0.923   0.3567  
## as.factor(UFPROGRAMA)RO           0.173559   0.355771   0.488   0.6260  
## as.factor(UFPROGRAMA)RS           0.257228   0.139063   1.850   0.0653 .
## as.factor(UFPROGRAMA)SC           0.158146   0.156568   1.010   0.3132  
## as.factor(UFPROGRAMA)SE           0.048546   0.181207   0.268   0.7890  
## as.factor(UFPROGRAMA)SP           0.124219   0.142141   0.874   0.3828  
## as.factor(UFPROGRAMA)TO          -0.056985   0.267566  -0.213   0.8315  
## FCDi:CC                           0.219510   0.477159   0.460   0.6458  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.331 on 314 degrees of freedom
## Multiple R-squared:  0.5408, Adjusted R-squared:  0.4896 
## F-statistic: 10.56 on 35 and 314 DF,  p-value: < 2.2e-16

1.6.0.1 Regressão de Poisson

regpoisson=glm(sqrt(SPPPr/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)
## 
## Call:
## glm(formula = sqrt(SPPPr/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7312  -0.4113  -0.3454  -0.2339   2.6836  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)   
## FCDo                                1.3369     0.4763   2.807    0.005 **
## FCDi                               -0.8160     1.2430  -0.656    0.512   
## CC                                 -1.9591     1.0849  -1.806    0.071 . 
## CP                                  0.3492     0.6544   0.534    0.594   
## as.factor(DEPENDENCIAADM)PRIVADA  -14.6500   726.8327  -0.020    0.984   
## as.factor(DEPENDENCIAADM)PÚBLICA  -15.0541   726.8327  -0.021    0.983   
## as.factor(CONCEITO)4                0.1528     0.1781   0.858    0.391   
## as.factor(CONCEITO)5                0.3060     0.2297   1.332    0.183   
## as.factor(CONCEITO)6                0.1166     0.3205   0.364    0.716   
## as.factor(CONCEITO)7                0.3040     0.4142   0.734    0.463   
## as.factor(UFPROGRAMA)AL            12.5308   726.8327   0.017    0.986   
## as.factor(UFPROGRAMA)AM            12.6555   726.8326   0.017    0.986   
## as.factor(UFPROGRAMA)AP            12.6343   726.8350   0.017    0.986   
## as.factor(UFPROGRAMA)BA            12.4648   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)CE            12.5366   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)DF            12.6250   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)ES            11.8993   726.8327   0.016    0.987   
## as.factor(UFPROGRAMA)GO            12.0363   726.8325   0.017    0.987   
## as.factor(UFPROGRAMA)MA            11.5489   726.8329   0.016    0.987   
## as.factor(UFPROGRAMA)MG            12.6873   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)MS            11.7967   726.8329   0.016    0.987   
## as.factor(UFPROGRAMA)MT            10.1730   726.8346   0.014    0.989   
## as.factor(UFPROGRAMA)PA            12.8953   726.8324   0.018    0.986   
## as.factor(UFPROGRAMA)PB            12.6954   726.8326   0.017    0.986   
## as.factor(UFPROGRAMA)PE            12.0827   726.8325   0.017    0.987   
## as.factor(UFPROGRAMA)PI            12.2559   726.8338   0.017    0.987   
## as.factor(UFPROGRAMA)PR            12.8170   726.8324   0.018    0.986   
## as.factor(UFPROGRAMA)RJ            12.5721   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)RN            12.2530   726.8325   0.017    0.987   
## as.factor(UFPROGRAMA)RO            13.6260   726.8327   0.019    0.985   
## as.factor(UFPROGRAMA)RR            -0.2529  1468.2756   0.000    1.000   
## as.factor(UFPROGRAMA)RS            12.8192   726.8324   0.018    0.986   
## as.factor(UFPROGRAMA)SC            12.7093   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)SE            13.3714   726.8326   0.018    0.985   
## as.factor(UFPROGRAMA)SP            12.3229   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)TO            12.3185   726.8329   0.017    0.986   
## FCDi:CC                             1.4643     2.0123   0.728    0.467   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 4910.85  on 2858  degrees of freedom
## Residual deviance:  709.79  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 13

1.6.0.2 Árvore de decisão

arvore_produto <- rpart(SPPPr/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_produto)
## Call:
## rpart(formula = SPPPr/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.03178604      0 1.0000000 1.008583 0.3188953
## 2 0.01939137      1 0.9682140 1.105910 0.3215916
## 3 0.01489306      3 0.9294312 1.180256 0.3190358
## 4 0.01000000      6 0.8847520 1.240110 0.3232844
## 
## Variable importance
##                FCDi                  CP                  CC as.factor(CONCEITO) 
##                  40                  31                  14                   8 
##                FCDo 
##                   6 
## 
## Node number 1: 349 observations,    complexity param=0.03178604
##   mean=0.214688, MSE=0.1752302 
##   left son=2 (339 obs) right son=3 (10 obs)
##   Primary splits:
##       FCDi                      < 0.9615385 to the left,  improve=0.03178604, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02574168, (0 missing)
##       CP                        < 0.4987637 to the left,  improve=0.01765411, (0 missing)
##       FCDo                      < 0.4122316 to the left,  improve=0.01376318, (0 missing)
##       as.factor(CONCEITO)       splits as  RRLLL, improve=0.01106026, (0 missing)
## 
## Node number 2: 339 observations,    complexity param=0.01939137
##   mean=0.2018699, MSE=0.1308178 
##   left son=4 (307 obs) right son=5 (32 obs)
##   Primary splits:
##       CP                        < 0.550463  to the left,  improve=0.014634150, (0 missing)
##       FCDo                      < 0.4122316 to the left,  improve=0.013348420, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.011491610, (0 missing)
##       as.factor(CONCEITO)       splits as  RRRLL, improve=0.010421320, (0 missing)
##       FCDi                      < 0.3702381 to the left,  improve=0.009293826, (0 missing)
## 
## Node number 3: 10 observations
##   mean=0.6492208, MSE=1.486423 
## 
## Node number 4: 307 observations,    complexity param=0.01489306
##   mean=0.1877438, MSE=0.09350723 
##   left son=8 (204 obs) right son=9 (103 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  RLLLL, improve=0.017920130, (0 missing)
##       FCDo                < 0.8257576 to the left,  improve=0.014202210, (0 missing)
##       FCDi                < 0.4205263 to the left,  improve=0.011737620, (0 missing)
##       CP                  < 0.5413492 to the right, improve=0.008810057, (0 missing)
##       CC                  < 0.650146  to the left,  improve=0.006771424, (0 missing)
##   Surrogate splits:
##       FCDi < 0.5732143 to the left,  agree=0.785, adj=0.359, (0 split)
##       FCDo < 0.6376263 to the left,  agree=0.707, adj=0.126, (0 split)
##       CP   < 0.3026042 to the right, agree=0.678, adj=0.039, (0 split)
## 
## Node number 5: 32 observations,    complexity param=0.01939137
##   mean=0.3373924, MSE=0.4684851 
##   left son=10 (24 obs) right son=11 (8 obs)
##   Primary splits:
##       CP                        < 0.5688095 to the right, improve=0.11491750, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.09985498, (0 missing)
##       as.factor(CONCEITO)       splits as  LRLL-, improve=0.09578208, (0 missing)
##       FCDi                      < 0.5811404 to the right, improve=0.04275088, (0 missing)
##       CC                        < 0.5961445 to the right, improve=0.04187705, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  LLRL-, agree=0.781, adj=0.125, (0 split)
## 
## Node number 8: 204 observations
##   mean=0.1586569, MSE=0.05966425 
## 
## Node number 9: 103 observations,    complexity param=0.01489306
##   mean=0.2453527, MSE=0.1555416 
##   left son=18 (78 obs) right son=19 (25 obs)
##   Primary splits:
##       CC                        < 0.5289366 to the right, improve=0.0571028000, (0 missing)
##       FCDi                      < 0.5732143 to the right, improve=0.0382717800, (0 missing)
##       FCDo                      < 0.6171498 to the right, improve=0.0280340700, (0 missing)
##       CP                        < 0.5246181 to the right, improve=0.0181320300, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.0006905795, (0 missing)
##   Surrogate splits:
##       FCDi < 0.251179  to the right, agree=0.777, adj=0.08, (0 split)
##       FCDo < 0.3444444 to the right, agree=0.767, adj=0.04, (0 split)
## 
## Node number 10: 24 observations
##   mean=0.2034308, MSE=0.1841712 
## 
## Node number 11: 8 observations
##   mean=0.7392771, MSE=1.106078 
## 
## Node number 18: 78 observations
##   mean=0.1919977, MSE=0.1060473 
## 
## Node number 19: 25 observations,    complexity param=0.01489306
##   mean=0.4118201, MSE=0.2733707 
##   left son=38 (12 obs) right son=39 (13 obs)
##   Primary splits:
##       FCDi < 0.5668241 to the right, improve=0.19067320, (0 missing)
##       FCDo < 0.5358974 to the left,  improve=0.10093880, (0 missing)
##       CC   < 0.5011218 to the left,  improve=0.09637133, (0 missing)
##       CP   < 0.479386  to the left,  improve=0.03544710, (0 missing)
##   Surrogate splits:
##       FCDo < 0.5694444 to the right, agree=0.68, adj=0.333, (0 split)
##       CC   < 0.5011218 to the left,  agree=0.64, adj=0.250, (0 split)
##       CP   < 0.4449653 to the right, agree=0.64, adj=0.250, (0 split)
## 
## Node number 38: 12 observations
##   mean=0.1741898, MSE=0.03724166 
## 
## Node number 39: 13 observations
##   mean=0.6311712, MSE=0.3910966
rpart.plot(arvore_produto)

1.7 Aplicativos

fitApp<- lm(formula = sqrt(SAPP / QPPP) ~  FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = no_outliers)

summary(fitApp)
## 
## Call:
## lm(formula = sqrt(SAPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70947 -0.26896 -0.08895  0.24785  1.38975 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## FCDo                             -0.331245   0.152323  -2.175   0.0304 *
## FCDi                              0.217738   0.350020   0.622   0.5343  
## CC                                0.750816   0.306045   2.453   0.0147 *
## CP                                0.052085   0.227556   0.229   0.8191  
## as.factor(DEPENDENCIAADM)PRIVADA  0.096309   0.256919   0.375   0.7080  
## as.factor(DEPENDENCIAADM)PÚBLICA  0.011137   0.244746   0.046   0.9637  
## as.factor(CONCEITO)4             -0.026093   0.057654  -0.453   0.6512  
## as.factor(CONCEITO)5             -0.130041   0.073031  -1.781   0.0759 .
## as.factor(CONCEITO)6             -0.130877   0.089544  -1.462   0.1449  
## as.factor(CONCEITO)7             -0.302464   0.127508  -2.372   0.0183 *
## as.factor(UFPROGRAMA)AM           0.360370   0.268495   1.342   0.1805  
## as.factor(UFPROGRAMA)AP          -0.265171   0.414709  -0.639   0.5230  
## as.factor(UFPROGRAMA)BA           0.004714   0.174227   0.027   0.9784  
## as.factor(UFPROGRAMA)CE           0.132524   0.194050   0.683   0.4952  
## as.factor(UFPROGRAMA)DF           0.069679   0.181366   0.384   0.7011  
## as.factor(UFPROGRAMA)ES           0.131945   0.242719   0.544   0.5871  
## as.factor(UFPROGRAMA)GO          -0.012691   0.209291  -0.061   0.9517  
## as.factor(UFPROGRAMA)MA           0.323108   0.243855   1.325   0.1861  
## as.factor(UFPROGRAMA)MG           0.141139   0.160390   0.880   0.3795  
## as.factor(UFPROGRAMA)MS           0.091445   0.276138   0.331   0.7407  
## as.factor(UFPROGRAMA)MT          -0.224905   0.315840  -0.712   0.4769  
## as.factor(UFPROGRAMA)PA          -0.091803   0.227771  -0.403   0.6872  
## as.factor(UFPROGRAMA)PB          -0.124886   0.244299  -0.511   0.6096  
## as.factor(UFPROGRAMA)PE          -0.005428   0.172426  -0.031   0.9749  
## as.factor(UFPROGRAMA)PI          -0.030504   0.267189  -0.114   0.9092  
## as.factor(UFPROGRAMA)PR           0.081857   0.162899   0.502   0.6157  
## as.factor(UFPROGRAMA)RJ           0.373183   0.164896   2.263   0.0243 *
## as.factor(UFPROGRAMA)RN           0.031056   0.177376   0.175   0.8611  
## as.factor(UFPROGRAMA)RO           0.038910   0.414732   0.094   0.9253  
## as.factor(UFPROGRAMA)RS           0.150616   0.162110   0.929   0.3536  
## as.factor(UFPROGRAMA)SC           0.205903   0.182516   1.128   0.2601  
## as.factor(UFPROGRAMA)SE           0.022686   0.211238   0.107   0.9145  
## as.factor(UFPROGRAMA)SP           0.110609   0.165697   0.668   0.5049  
## as.factor(UFPROGRAMA)TO           0.445293   0.311909   1.428   0.1544  
## FCDi:CC                          -0.387726   0.556237  -0.697   0.4863  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3859 on 314 degrees of freedom
## Multiple R-squared:  0.5485, Adjusted R-squared:  0.4982 
## F-statistic:  10.9 on 35 and 314 DF,  p-value: < 2.2e-16

1.7.0.1 Regressão de Poisson

regpoisson=glm(sqrt(SAPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)
## 
## Call:
## glm(formula = sqrt(SAPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8264  -0.4847  -0.4213   0.1997   2.6427  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)  
## FCDo                              8.209e-01  3.857e-01   2.128   0.0333 *
## FCDi                              1.134e+00  9.870e-01   1.149   0.2505  
## CC                                5.941e-01  8.224e-01   0.722   0.4701  
## CP                               -8.244e-01  6.099e-01  -1.352   0.1765  
## as.factor(DEPENDENCIAADM)PRIVADA -1.570e+01  7.290e+02  -0.022   0.9828  
## as.factor(DEPENDENCIAADM)PÚBLICA -1.598e+01  7.290e+02  -0.022   0.9825  
## as.factor(CONCEITO)4              1.992e-01  1.436e-01   1.387   0.1654  
## as.factor(CONCEITO)5              8.424e-02  2.023e-01   0.416   0.6771  
## as.factor(CONCEITO)6              3.620e-01  2.511e-01   1.441   0.1494  
## as.factor(CONCEITO)7              8.261e-02  3.805e-01   0.217   0.8281  
## as.factor(UFPROGRAMA)AL           1.319e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)AM           1.251e+01  7.290e+02   0.017   0.9863  
## as.factor(UFPROGRAMA)AP          -2.363e-02  1.036e+03   0.000   1.0000  
## as.factor(UFPROGRAMA)BA           1.302e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)CE           1.297e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)DF           1.319e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)ES           1.280e+01  7.290e+02   0.018   0.9860  
## as.factor(UFPROGRAMA)GO           1.281e+01  7.290e+02   0.018   0.9860  
## as.factor(UFPROGRAMA)MA           1.286e+01  7.290e+02   0.018   0.9859  
## as.factor(UFPROGRAMA)MG           1.321e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)MS           1.256e+01  7.290e+02   0.017   0.9863  
## as.factor(UFPROGRAMA)MT           1.223e+01  7.290e+02   0.017   0.9866  
## as.factor(UFPROGRAMA)PA           1.277e+01  7.290e+02   0.018   0.9860  
## as.factor(UFPROGRAMA)PB           1.232e+01  7.290e+02   0.017   0.9865  
## as.factor(UFPROGRAMA)PE           1.293e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)PI           1.305e+01  7.290e+02   0.018   0.9857  
## as.factor(UFPROGRAMA)PR           1.311e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)RJ           1.324e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)RN           1.323e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)RO           1.163e+01  7.290e+02   0.016   0.9873  
## as.factor(UFPROGRAMA)RR           1.688e-03  1.469e+03   0.000   1.0000  
## as.factor(UFPROGRAMA)RS           1.322e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)SC           1.320e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)SE           1.349e+01  7.290e+02   0.019   0.9852  
## as.factor(UFPROGRAMA)SP           1.293e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)TO           1.351e+01  7.290e+02   0.019   0.9852  
## FCDi:CC                          -1.298e+00  1.548e+00  -0.839   0.4017  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 4605.88  on 2858  degrees of freedom
## Residual deviance:  886.23  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 13

1.7.0.2 Árvore de decisão

arvore_aplicativo <- rpart(SAPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_aplicativo)
## Call:
## rpart(formula = SAPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.06860452      0 1.0000000 1.005998 0.1579283
## 2 0.04033968      1 0.9313955 1.072299 0.1546801
## 3 0.02909134      2 0.8910558 1.050254 0.1533450
## 4 0.02413469      4 0.8328731 1.094798 0.1607488
## 5 0.02033243      5 0.8087384 1.111962 0.1607320
## 6 0.01369760      7 0.7680736 1.132793 0.1569157
## 7 0.01356819      9 0.7406784 1.116188 0.1527953
## 8 0.01055298     12 0.6999738 1.133826 0.1538961
## 9 0.01000000     13 0.6894208 1.144857 0.1546434
## 
## Variable importance
##                        CC                        CP                      FCDo 
##                        29                        29                        18 
##       as.factor(CONCEITO)                      FCDi as.factor(DEPENDENCIAADM) 
##                        12                        11                         2 
## 
## Node number 1: 349 observations,    complexity param=0.06860452
##   mean=0.2967639, MSE=0.2019442 
##   left son=2 (229 obs) right son=3 (120 obs)
##   Primary splits:
##       CC                        < 0.6032122 to the left,  improve=0.06860452, (0 missing)
##       CP                        < 0.78125   to the left,  improve=0.06359468, (0 missing)
##       FCDi                      < 0.4027376 to the left,  improve=0.03325919, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02282198, (0 missing)
##       FCDo                      < 0.739418  to the left,  improve=0.02128091, (0 missing)
##   Surrogate splits:
##       FCDo                      < 0.6933761 to the left,  agree=0.728, adj=0.208, (0 split)
##       FCDi                      < 0.6287683 to the left,  agree=0.688, adj=0.092, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.676, adj=0.058, (0 split)
##       CP                        < 0.78125   to the left,  agree=0.665, adj=0.025, (0 split)
## 
## Node number 2: 229 observations,    complexity param=0.02413469
##   mean=0.211559, MSE=0.118884 
##   left son=4 (166 obs) right son=5 (63 obs)
##   Primary splits:
##       CP                  < 0.4212456 to the right, improve=0.06247979, (0 missing)
##       as.factor(CONCEITO) splits as  RRLLL, improve=0.04016652, (0 missing)
##       CC                  < 0.4374782 to the left,  improve=0.03282737, (0 missing)
##       FCDi                < 0.2736185 to the left,  improve=0.02454798, (0 missing)
##       FCDo                < 0.739418  to the left,  improve=0.01621318, (0 missing)
##   Surrogate splits:
##       FCDo < 0.2384259 to the right, agree=0.747, adj=0.079, (0 split)
##       FCDi < 1.046875  to the left,  agree=0.729, adj=0.016, (0 split)
## 
## Node number 3: 120 observations,    complexity param=0.04033968
##   mean=0.4593634, MSE=0.3201578 
##   left son=6 (107 obs) right son=7 (13 obs)
##   Primary splits:
##       CP                        < 0.5533399 to the left,  improve=0.074002070, (0 missing)
##       FCDi                      < 0.729021  to the right, improve=0.036014390, (0 missing)
##       CC                        < 0.6384672 to the right, improve=0.025727530, (0 missing)
##       FCDo                      < 0.6399573 to the right, improve=0.025400750, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.008366206, (0 missing)
## 
## Node number 4: 166 observations,    complexity param=0.01055298
##   mean=0.1584647, MSE=0.08188266 
##   left son=8 (154 obs) right son=9 (12 obs)
##   Primary splits:
##       FCDo                      < 0.739418  to the left,  improve=0.05471820, (0 missing)
##       as.factor(CONCEITO)       splits as  RRLLL, improve=0.05190366, (0 missing)
##       FCDi                      < 0.3964286 to the left,  improve=0.03873325, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.03476798, (0 missing)
##       CP                        < 0.4971769 to the right, improve=0.03366370, (0 missing)
## 
## Node number 5: 63 observations,    complexity param=0.02033243
##   mean=0.3514582, MSE=0.1893799 
##   left son=10 (42 obs) right son=11 (21 obs)
##   Primary splits:
##       CC                  < 0.5442195 to the left,  improve=0.09792120, (0 missing)
##       FCDi                < 0.2716161 to the left,  improve=0.08624086, (0 missing)
##       as.factor(CONCEITO) splits as  RRRLL, improve=0.03867725, (0 missing)
##       CP                  < 0.3452536 to the left,  improve=0.01296667, (0 missing)
##       FCDo                < 0.6386946 to the right, improve=0.01108625, (0 missing)
## 
## Node number 6: 107 observations,    complexity param=0.02909134
##   mean=0.4057117, MSE=0.2309145 
##   left son=12 (82 obs) right son=13 (25 obs)
##   Primary splits:
##       CP                  < 0.4050694 to the right, improve=0.08159447, (0 missing)
##       CC                  < 0.6247379 to the right, improve=0.03772789, (0 missing)
##       as.factor(CONCEITO) splits as  LLRRL, improve=0.03368993, (0 missing)
##       FCDi                < 0.6636905 to the right, improve=0.03148115, (0 missing)
##       FCDo                < 0.6125541 to the right, improve=0.01568586, (0 missing)
##   Surrogate splits:
##       FCDo < 0.925     to the left,  agree=0.785, adj=0.08, (0 split)
## 
## Node number 7: 13 observations
##   mean=0.9009584, MSE=0.8360003 
## 
## Node number 8: 154 observations
##   mean=0.1397797, MSE=0.07479888 
## 
## Node number 9: 12 observations
##   mean=0.398255, MSE=0.1108114 
## 
## Node number 10: 42 observations
##   mean=0.2551661, MSE=0.09444689 
## 
## Node number 11: 21 observations,    complexity param=0.02033243
##   mean=0.5440422, MSE=0.323613 
##   left son=22 (11 obs) right son=23 (10 obs)
##   Primary splits:
##       CC   < 0.5671054 to the right, improve=0.24981450, (0 missing)
##       FCDi < 0.4063129 to the right, improve=0.14230470, (0 missing)
##       CP   < 0.3773148 to the right, improve=0.09521619, (0 missing)
##       FCDo < 0.4768519 to the right, improve=0.05262435, (0 missing)
##   Surrogate splits:
##       CP                        < 0.3773148 to the right, agree=0.714, adj=0.4, (0 split)
##       FCDo                      < 0.4768519 to the right, agree=0.667, adj=0.3, (0 split)
##       as.factor(CONCEITO)       splits as  RLLL-, agree=0.667, adj=0.3, (0 split)
##       FCDi                      < 0.3301574 to the right, agree=0.619, adj=0.2, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.571, adj=0.1, (0 split)
## 
## Node number 12: 82 observations,    complexity param=0.0136976
##   mean=0.3299204, MSE=0.1778742 
##   left son=24 (59 obs) right son=25 (23 obs)
##   Primary splits:
##       FCDo                      < 0.737037  to the left,  improve=0.06593106, (0 missing)
##       CP                        < 0.4119817 to the left,  improve=0.05386413, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02959628, (0 missing)
##       FCDi                      < 0.7236364 to the right, improve=0.02461049, (0 missing)
##       CC                        < 0.6184748 to the right, improve=0.01630720, (0 missing)
##   Surrogate splits:
##       CP   < 0.533637  to the left,  agree=0.780, adj=0.217, (0 split)
##       FCDi < 0.7703297 to the left,  agree=0.744, adj=0.087, (0 split)
##       CC   < 0.6044909 to the right, agree=0.732, adj=0.043, (0 split)
## 
## Node number 13: 25 observations,    complexity param=0.02909134
##   mean=0.6543069, MSE=0.3242456 
##   left son=26 (14 obs) right son=27 (11 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LRRR-, improve=0.25716370, (0 missing)
##       CC                  < 0.749     to the right, improve=0.18471290, (0 missing)
##       FCDo                < 0.5798319 to the right, improve=0.16045070, (0 missing)
##       CP                  < 0.3686075 to the left,  improve=0.06953281, (0 missing)
##       FCDi                < 0.5083333 to the right, improve=0.06574935, (0 missing)
##   Surrogate splits:
##       FCDo < 0.5634921 to the right, agree=0.76, adj=0.455, (0 split)
##       FCDi < 0.5083333 to the right, agree=0.76, adj=0.455, (0 split)
##       CP   < 0.3657617 to the right, agree=0.64, adj=0.182, (0 split)
##       CC   < 0.6129171 to the right, agree=0.60, adj=0.091, (0 split)
## 
## Node number 22: 11 observations
##   mean=0.2729448, MSE=0.09709803 
## 
## Node number 23: 10 observations
##   mean=0.8422494, MSE=0.4030088 
## 
## Node number 24: 59 observations,    complexity param=0.01356819
##   mean=0.262306, MSE=0.1268679 
##   left son=48 (16 obs) right son=49 (43 obs)
##   Primary splits:
##       FCDo                < 0.6742424 to the right, improve=0.11466020, (0 missing)
##       CC                  < 0.722433  to the left,  improve=0.05258454, (0 missing)
##       as.factor(CONCEITO) splits as  RLLRL, improve=0.04397665, (0 missing)
##       FCDi                < 0.7236364 to the right, improve=0.04356117, (0 missing)
##       CP                  < 0.4240385 to the right, improve=0.01656819, (0 missing)
## 
## Node number 25: 23 observations,    complexity param=0.0136976
##   mean=0.5033662, MSE=0.2669057 
##   left son=50 (10 obs) right son=51 (13 obs)
##   Primary splits:
##       as.factor(CONCEITO)       splits as  LRR--, improve=0.15786770, (0 missing)
##       FCDi                      < 0.5418752 to the right, improve=0.13674940, (0 missing)
##       CP                        < 0.4524184 to the left,  improve=0.10071030, (0 missing)
##       FCDo                      < 0.781746  to the right, improve=0.06780701, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02786219, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.5965812 to the right, agree=0.739, adj=0.4, (0 split)
##       CP                        < 0.424625  to the left,  agree=0.696, adj=0.3, (0 split)
##       FCDo                      < 0.7541478 to the left,  agree=0.652, adj=0.2, (0 split)
##       CC                        < 0.639881  to the right, agree=0.609, adj=0.1, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.609, adj=0.1, (0 split)
## 
## Node number 26: 14 observations
##   mean=0.3983457, MSE=0.130211 
## 
## Node number 27: 11 observations
##   mean=0.9800758, MSE=0.3816892 
## 
## Node number 48: 16 observations
##   mean=0.06458333, MSE=0.02259983 
## 
## Node number 49: 43 observations,    complexity param=0.01356819
##   mean=0.3358772, MSE=0.1457059 
##   left son=98 (16 obs) right son=99 (27 obs)
##   Primary splits:
##       CC                  < 0.6647727 to the left,  improve=0.14452500, (0 missing)
##       FCDo                < 0.5962963 to the left,  improve=0.11837030, (0 missing)
##       as.factor(CONCEITO) splits as  RLLRL, improve=0.09244643, (0 missing)
##       FCDi                < 0.7253846 to the right, improve=0.05993227, (0 missing)
##       CP                  < 0.4830791 to the left,  improve=0.04589235, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  RRLRL, agree=0.721, adj=0.250, (0 split)
##       FCDi                < 0.3690172 to the left,  agree=0.674, adj=0.125, (0 split)
##       CP                  < 0.4094742 to the left,  agree=0.651, adj=0.063, (0 split)
## 
## Node number 50: 10 observations
##   mean=0.2693223, MSE=0.09933087 
## 
## Node number 51: 13 observations
##   mean=0.6833999, MSE=0.3212614 
## 
## Node number 98: 16 observations
##   mean=0.1473683, MSE=0.0727452 
## 
## Node number 99: 27 observations,    complexity param=0.01356819
##   mean=0.4475862, MSE=0.1554048 
##   left son=198 (7 obs) right son=199 (20 obs)
##   Primary splits:
##       FCDi                      < 0.7253846 to the right, improve=0.26336040, (0 missing)
##       FCDo                      < 0.5962963 to the left,  improve=0.16101660, (0 missing)
##       CC                        < 0.8583423 to the right, improve=0.08094265, (0 missing)
##       as.factor(CONCEITO)       splits as  RLLR-, improve=0.05372785, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.02282374, (0 missing)
##   Surrogate splits:
##       CP < 0.4170718 to the left,  agree=0.889, adj=0.571, (0 split)
## 
## Node number 198: 7 observations
##   mean=0.1056277, MSE=0.01153202 
## 
## Node number 199: 20 observations
##   mean=0.5672717, MSE=0.1505082
rpart.plot(arvore_aplicativo)

1.8 Conclusões

As arvores de decisão paracem ser melhor para representar as relações entre os indicadores do modelo, pois não seguem a lógica linear e apresentam particularidades mais detalhadas.A regressão de Poisson provê um modelo mais ajustado.

2 Agradecimentos

Ao Instituto Stela, à UFSC, à CAPES e ao CNPq.

3 Referências

Checking normality for parametric tests in R https://www.sheffield.ac.uk/polopoly_fs/1.579191!/file/stcp-karadimitriou-normalR.pdf

Normality Test in R https://www.datanovia.com/en/lessons/normality-test-in-r/

Como realizar teste de normalidade no R ? https://rpubs.com/paternogbc/46768

Fazendo os testes de Kolmogorov-Smirnov e de Shapiro-Wilk para normalidade http://www.dpi.ufv.br/~peternelli/tutoriaisR/tutoriaisRempdf/tutorial.KS.SW.normalidade.11112004.pdf

BIOESTATÍSTICA USANDO R https://cran.r-project.org/doc/contrib/Beasley-BioestatisticaUsandoR.pdf

Delineamentos Experimentais https://smolski.github.io/livroavancado/analisf.html

Regression Models in R Multicollinearity in R https://datascienceplus.com/multicollinearity-in-r/

https://ibape-nacional.com.br/biblioteca/wp-content/uploads/2020/02/AO-27-Aplica%C3%A7%C3%A3o-do-Teste-de-Farrar-Glauber-para-An%C3%A1lise.pdf

Multicollinearity in R https://www.rpubs.com/dudubiologico/545528

Ajuste de Modelos Não Lineares http://www.leg.ufpr.br/~walmes/cursoR/mgest/3reg-nao-linear.html

Tutorial — Ajuste e Interpretação de Regressão Linear com R https://medium.com/data-hackers/tutorial-ajuste-e-interpreta%C3%A7%C3%A3o-de-regress%C3%A3o-linear-com-r-5b23c4ddb72

CURSO - Modelos de regressão não linear https://www.ime.unicamp.br/~cnaber/cursomodelosnaolinearesR.pdf

Aplicação de modelos de regressão linear e não linear em ciências agrárias http://www.leg.ufpr.br/~walmes/cursoR/cnpaf3/cnpaf02trailer.html

Recursos Computacionais Utilizando R http://www.dex.ufla.br/~danielff/meusarquivospdf/RRC0.pdf

Modelos Não Lineares e suas Aplicações https://www.ufjf.br/cursoestatistica/files/2014/04/Modelos-N%c3%a3o-Lineares-e-suas-Aplica%c3%a7%c3%b5es.pdf

Modeloagem - Aprendizado Estatístico http://material.curso-r.com/modelos/

MODELOS DE REGRESSÃO- com apoio computacional https://www.ime.unicamp.br/~cnaber/Livro_MLG.pdf

MODELOS DE REGRESSÃO LINEARES PARA ESTIMATIVA DE PRODUTIVIDADE DA SOJA NO OESTE DO PARANÁ, UTILIZANDO DADOS ESPECTRAIS https://www.scielo.br/pdf/eagri/v30n3/14.pdf

Aplicação do Teste de Farrar-Glauber para Análise de Multicolinearidade Em Regressões Lineares https://ibape-nacional.com.br/biblioteca/wp-content/uploads/2020/02/AO-27-Aplica%C3%A7%C3%A3o-do-Teste-de-Farrar-Glauber-para-An%C3%A1lise.pdf

Regressão Logística: O método estatístico mais utilizado para modelar variáveis categóricas. https://matheusfacure.github.io/2017/02/25/regr-log/

Linear Regression http://rstudio-pubs-static.s3.amazonaws.com/428179_4d1959eb7bda4ed1b9ae5bb86004eae3.html

Regression http://www.mat.ufrgs.br/~giacomo/Softwares/R/Crawley/Crawley%20-%20The%20Book%20R/ch10.pdf

Regressão de Poisson https://smolski.github.io/livroavancado/regressao-de-poisson.html

Tutorial — Ajuste e Interpretação de Regressão Linear com R https://medium.com/data-hackers/tutorial-ajuste-e-interpreta%C3%A7%C3%A3o-de-regress%C3%A3o-linear-com-r-5b23c4ddb72

Estatística Prática para Docentes e Pós-Graduandos de Geraldo Maia Campos 11. Aditividade e homogeneidade http://www.forp.usp.br/restauradora/gmc/gmc_livro/gmc_livro_cap11.html

TESTES DE NORMALIDADE EM ANÁLISES ESTATÍSTICAS: UMA ORIENTAÇÃO PARA PRATICANTES EM CIÊNCIAS DA SAÚDE E ATIVIDADE FÍSICA file:///C:/Users/Jacob/Documents/R/6583-Texto%20do%20artigo-43438-1-10-20171008.pdf

regressão logística https://www.rpubs.com/dudubiologico/545528

Teste para normalidade e homocedasticidade https://biostatistics-uem.github.io/Bio/aula8/teste_normalidade_homocedasticidade.html#:~:text=Em%20an%C3%A1lise%20de%20vari%C3%A2ncia(ANOVA,que%20a%20ANOVA%20tenha%20validade.