library(dplyr)
library(plotly)
library(ggplot2)
library(tidyr)
library(magrittr)
library(plotrix)
library(rgl)
library(car)
library(lubridate)
library(ggplot2)
library(GGally)
library(corrplot)
library(corrgram)
library(ppcor)

library(readxl)
IMI_e_IPT <- read_excel("IMI e IPT.xlsx", 
    col_types = c("numeric", "text", "text", 
        "text", "text", "text", "numeric", 
        "text", "numeric", "text", "text", 
        "text", "text", "text", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric", 
        "numeric", "numeric", "numeric"))

## New names:
## * `` -> ...1

IMI_e_IPT<- IMI_e_IPT %>% 
  mutate_all(replace_na, 0)

dim(IMI_e_IPT)

## [1] 2858   24

outliers <- boxplot(IMI_e_IPT$IPT, plot=FALSE)$out



no_outliers <- IMI_e_IPT 
no_outliers <-no_outliers[which(no_outliers$IPT %in% outliers),]

graf <- IMI_e_IPT[, 15:24]

dim(graf)

## [1] 2858   10

0.1 Coerência do modelo de análise

1 Testes de normalidade

Com seu papel no teorema central do limite, a distribuição normal é encontrada em muitos dos testes estatísticos chamados gaussianos ou assintoticamente gaussianos. O pressuposto de normalidade é feito sobre uma distribuição a priori em um teste de aderência para indicar que esta distribuição segue aproximadamente uma distribuição normal. Existem vários testes de normalidade.

panel.hist <- function(x, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks; nB <- length(breaks)
  y <- h$counts; y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col = "cyan", ...)
}

# 1.2 by Melina de Souza Leite 
panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"), 
                      cex = 1, col.line="red") {
  points(x, y, pch = pch, col = col, bg = bg, cex = cex)
  ok <- is.finite(x) & is.finite(y)
  if (any(ok)) {
    abline(lm(y[ok]~x[ok]), col = col.line)
  }
}

# 1.3 help(pairs) by Melina de Souza Leite 
panel.cor <- function(x, y, digits = 2, prefix = "", cex.cor, ...)
{
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(0, 1, 0, 1))
  r <- abs(cor(x, y))
  txt <- format(c(r, 0.123456789), digits = digits)[1]
  txt <- paste0(prefix, txt)
  if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
  text(0.5, 0.5, txt, cex = cex.cor * r)
}



pairs(graf,  
      diag.panel = panel.hist,
      upper.panel = panel.cor,
      lower.panel = panel.lm,
      main="Correlação Multivariável")

1.1 Dados não normalizados

summary(IMI_e_IPT$IPT)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.00000 0.04546 0.21083 0.21212 5.15000

summary(IMI_e_IPT$IMI)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2304  0.4178  0.5025  0.5051  0.5868  1.0234

dim(IMI_e_IPT)

## [1] 2858   24

par(mfrow=c(1,2))
hist(IMI_e_IPT$IMI)
hist(IMI_e_IPT$IPT)

1.1.1 Shapiro-Wilk test

O método de Shapiro-Wilk é amplamente recomendado para teste de normalidade e fornece melhor potência que o K-S. É baseado na correlação entre os dados e as pontuações normais correspondentes.

A partir da saída, o valor p> 0,05 implica que a distribuição dos dados não é significativamente diferente da distribuição normal. Em outras palavras, podemos assumir a normalidade.

Neste caso o teste mostra que a distribuição não é normal nos dados não normalizados.

shapiro.test(IMI_e_IPT$IPT)

## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IPT
## W = 0.52019, p-value < 2.2e-16

shapiro.test(IMI_e_IPT$IMI)

## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IMI
## W = 0.99093, p-value = 1.775e-12

shapiro.test(IMI_e_IPT$IPT)

## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IPT
## W = 0.52019, p-value < 2.2e-16

shapiro.test(IMI_e_IPT$IMI)

## 
##  Shapiro-Wilk normality test
## 
## data:  IMI_e_IPT$IMI
## W = 0.99093, p-value = 1.775e-12

1.2 Dados normalizados

norm_MI_PT <- IMI_e_IPT %>%
   mutate_all(replace_na, 0) %>%
   mutate(IPT_t = 0.510 + log(IPT)) %>%
   mutate(IMI_t = 10 + IMI)  %>%
   filter(IPT_t != "-Inf")
  


summary(norm_MI_PT$IPT_t)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -3.8207 -1.9749 -1.0994 -1.0274 -0.1831  2.1490

summary(norm_MI_PT$IMI_t)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   10.23   10.42   10.50   10.50   10.58   10.94

dim(norm_MI_PT)

## [1] 1496   26

par(mfrow=c(1,2))
hist(norm_MI_PT$IPT_t)
hist(norm_MI_PT$IMI_t)

1.2.1 Shapiro-Wilk test

shapiro.test(norm_MI_PT$IPT_t)

## 
##  Shapiro-Wilk normality test
## 
## data:  norm_MI_PT$IPT_t
## W = 0.98028, p-value = 1.854e-13

shapiro.test(norm_MI_PT$IMI_t)

## 
##  Shapiro-Wilk normality test
## 
## data:  norm_MI_PT$IMI_t
## W = 0.99172, p-value = 1.874e-07

1.3 Verificação de ajuste do modelo

As estimativas são os coeficientes das variáveis independentes do modelo linear (todas as porcentagens) e refletem uma alteração estimada na variável IPT dependente (a qual foi avaliada em sua composição separadamente em Patentes, Produtos e Aplicativos), quando a variável independente correspondente é alterada.

Portanto, para cada aumento de 1% em porcentagem “Produção de Tecnologias (IPT)”, espera-se um aumento ou diminuição significativo (nos resultados em aparecem asteriscos) das variáveis independentes (Y), mantendo todas as outras variáveis constantes.

Variáveis dependente (Y)

Y = somatório(Y1, Y2, Y3)

Y1 = Quantitativo de produção de patentes por Programa (SPPP/QPPP)

Y2 = Quantitativo de produção de produtos por Programa (SPPPr/QPP)

Y3 = Quantitativo de produção de Aplicativos por Programa (SAPP/QPPP)

Variáveis independente (X)

X = somatório(FCDo, FCDi, CC, CP)

X1 = Formação do corpo docentes dos PPG (FCDo)

X2 = Formação do corpo discente dos PPG (FCDi)

X3 = Colaboração Científica (CC)

X4 = Contexto profissional (CP)

X5 = DEPENDENCIA ADM

X6 = CONCEITO

X7 = UFPROGRAMA

1.4 Produção de Tecnologia (IPT= Patentes+Produtos+Aplicativos)

1.4.1 Com outliers

1.4.1.1 Regressão linear

fitIPT<- lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = IMI_e_IPT)
summary(fitIPT)

## 
## Call:
## lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = IMI_e_IPT)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84317 -0.26218 -0.09848  0.17089  1.83521 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## FCDo                              0.296810   0.045703   6.494 9.82e-11 ***
## FCDi                              0.035470   0.114392   0.310 0.756529    
## CC                               -0.305536   0.093859  -3.255 0.001146 ** 
## CP                                0.078916   0.066954   1.179 0.238639    
## as.factor(DEPENDENCIAADM)PRIVADA  0.005782   0.214983   0.027 0.978545    
## as.factor(DEPENDENCIAADM)PÚBLICA -0.052682   0.213397  -0.247 0.805023    
## as.factor(CONCEITO)4              0.051471   0.017363   2.964 0.003059 ** 
## as.factor(CONCEITO)5              0.078061   0.023378   3.339 0.000851 ***
## as.factor(CONCEITO)6              0.143020   0.030833   4.638 3.67e-06 ***
## as.factor(CONCEITO)7              0.162759   0.040396   4.029 5.75e-05 ***
## as.factor(UFPROGRAMA)AL           0.336929   0.212819   1.583 0.113495    
## as.factor(UFPROGRAMA)AM           0.170054   0.210222   0.809 0.418627    
## as.factor(UFPROGRAMA)AP           0.286551   0.286711   0.999 0.317666    
## as.factor(UFPROGRAMA)BA           0.251209   0.204912   1.226 0.220324    
## as.factor(UFPROGRAMA)CE           0.244529   0.206071   1.187 0.235475    
## as.factor(UFPROGRAMA)DF           0.237316   0.206512   1.149 0.250585    
## as.factor(UFPROGRAMA)ES           0.195708   0.208323   0.939 0.347582    
## as.factor(UFPROGRAMA)GO           0.200451   0.206537   0.971 0.331863    
## as.factor(UFPROGRAMA)MA           0.205383   0.209770   0.979 0.327620    
## as.factor(UFPROGRAMA)MG           0.287319   0.203846   1.409 0.158799    
## as.factor(UFPROGRAMA)MS           0.178421   0.210932   0.846 0.397698    
## as.factor(UFPROGRAMA)MT           0.147492   0.208995   0.706 0.480420    
## as.factor(UFPROGRAMA)PA           0.245011   0.206412   1.187 0.235327    
## as.factor(UFPROGRAMA)PB           0.230307   0.213089   1.081 0.279877    
## as.factor(UFPROGRAMA)PE           0.291358   0.205269   1.419 0.155894    
## as.factor(UFPROGRAMA)PI           0.356397   0.231281   1.541 0.123436    
## as.factor(UFPROGRAMA)PR           0.291522   0.204107   1.428 0.153322    
## as.factor(UFPROGRAMA)RJ           0.204828   0.203971   1.004 0.315368    
## as.factor(UFPROGRAMA)RN           0.333091   0.206613   1.612 0.107042    
## as.factor(UFPROGRAMA)RO           0.228228   0.231169   0.987 0.323591    
## as.factor(UFPROGRAMA)RR          -0.055025   0.405356  -0.136 0.892033    
## as.factor(UFPROGRAMA)RS           0.268258   0.204146   1.314 0.188937    
## as.factor(UFPROGRAMA)SC           0.230534   0.205866   1.120 0.262883    
## as.factor(UFPROGRAMA)SE           0.717449   0.225157   3.186 0.001456 ** 
## as.factor(UFPROGRAMA)SP           0.186287   0.203858   0.914 0.360896    
## as.factor(UFPROGRAMA)TO           0.275421   0.217391   1.267 0.205282    
## FCDi:CC                           0.083228   0.177392   0.469 0.638980    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.351 on 2821 degrees of freedom
## Multiple R-squared:  0.4233, Adjusted R-squared:  0.4157 
## F-statistic: 55.96 on 37 and 2821 DF,  p-value: < 2.2e-16

plot(fitIPT)

## Warning: not plotting observations with leverage one:
##   1268

## Warning: not plotting observations with leverage one:
##   1268

Não possui normalidade nos dados para garantir os pressupostos da análise

1.4.1.2 Regressão de Poisson

regpoisson=glm(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data=IMI_e_IPT)

summary(regpoisson)

## 
## Call:
## glm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4073  -0.7081  -0.2271   0.3074   2.1756  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## FCDo                               1.073340   0.245773   4.367 1.26e-05 ***
## FCDi                               0.008651   0.642063   0.013  0.98925    
## CC                                -1.359096   0.565538  -2.403  0.01625 *  
## CP                                 0.292796   0.346992   0.844  0.39877    
## as.factor(DEPENDENCIAADM)PRIVADA -14.173973 440.683395  -0.032  0.97434    
## as.factor(DEPENDENCIAADM)PÚBLICA -14.363336 440.683374  -0.033  0.97400    
## as.factor(CONCEITO)4               0.181913   0.093508   1.945  0.05173 .  
## as.factor(CONCEITO)5               0.265754   0.123789   2.147  0.03181 *  
## as.factor(CONCEITO)6               0.473919   0.154755   3.062  0.00220 ** 
## as.factor(CONCEITO)7               0.562573   0.204935   2.745  0.00605 ** 
## as.factor(UFPROGRAMA)AL           13.178131 440.683337   0.030  0.97614    
## as.factor(UFPROGRAMA)AM           12.529444 440.683384   0.028  0.97732    
## as.factor(UFPROGRAMA)AP           13.029627 440.684315   0.030  0.97641    
## as.factor(UFPROGRAMA)BA           12.908784 440.683253   0.029  0.97663    
## as.factor(UFPROGRAMA)CE           12.876676 440.683271   0.029  0.97669    
## as.factor(UFPROGRAMA)DF           12.861083 440.683272   0.029  0.97672    
## as.factor(UFPROGRAMA)ES           12.683449 440.683319   0.029  0.97704    
## as.factor(UFPROGRAMA)GO           12.689389 440.683291   0.029  0.97703    
## as.factor(UFPROGRAMA)MA           12.705164 440.683340   0.029  0.97700    
## as.factor(UFPROGRAMA)MG           13.019330 440.683237   0.030  0.97643    
## as.factor(UFPROGRAMA)MS           12.574788 440.683379   0.029  0.97724    
## as.factor(UFPROGRAMA)MT           12.389935 440.683374   0.028  0.97757    
## as.factor(UFPROGRAMA)PA           12.887794 440.683276   0.029  0.97667    
## as.factor(UFPROGRAMA)PB           12.827642 440.683387   0.029  0.97678    
## as.factor(UFPROGRAMA)PE           13.039558 440.683254   0.030  0.97639    
## as.factor(UFPROGRAMA)PI           13.255723 440.683543   0.030  0.97600    
## as.factor(UFPROGRAMA)PR           13.038930 440.683240   0.030  0.97640    
## as.factor(UFPROGRAMA)RJ           12.740610 440.683242   0.029  0.97694    
## as.factor(UFPROGRAMA)RN           13.173844 440.683265   0.030  0.97615    
## as.factor(UFPROGRAMA)RO           12.799823 440.683760   0.029  0.97683    
## as.factor(UFPROGRAMA)RR           -0.201781 890.473538   0.000  0.99982    
## as.factor(UFPROGRAMA)RS           12.955616 440.683241   0.029  0.97655    
## as.factor(UFPROGRAMA)SC           12.835612 440.683264   0.029  0.97676    
## as.factor(UFPROGRAMA)SE           13.863189 440.683341   0.031  0.97490    
## as.factor(UFPROGRAMA)SP           12.673996 440.683240   0.029  0.97706    
## as.factor(UFPROGRAMA)TO           13.004187 440.683410   0.030  0.97646    
## FCDi:CC                            0.542113   1.045530   0.519  0.60411    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3359.1  on 2858  degrees of freedom
## Residual deviance: 1226.9  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 12

1.4.1.3 Árvore de decisão

library(rpart)
library(rpart.plot)

arvore <- rpart(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = IMI_e_IPT)

summary(arvore)

## Call:
## rpart(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = IMI_e_IPT)
##   n= 2858 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.01420417      0 1.0000000 1.0004892 0.03933496
## 2 0.01060735      2 0.9715917 0.9823271 0.03806442
## 3 0.01000000      3 0.9609843 0.9814367 0.03816198
## 
## Variable importance
##                      FCDo                        CC                      FCDi 
##                        50                        23                        14 
##       as.factor(CONCEITO) as.factor(DEPENDENCIAADM)                        CP 
##                         7                         3                         2 
## 
## Node number 1: 2858 observations,    complexity param=0.01420417
##   mean=0.2842771, MSE=0.1300202 
##   left son=2 (1465 obs) right son=3 (1393 obs)
##   Primary splits:
##       CC                        < 0.5714653 to the right, improve=0.012976080, (0 missing)
##       FCDo                      < 0.5373357 to the left,  improve=0.012306860, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.004831066, (0 missing)
##       as.factor(CONCEITO)       splits as  LRRRR, improve=0.004750481, (0 missing)
##       CP                        < 0.4605808 to the left,  improve=0.003011676, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.3727152 to the right, agree=0.687, adj=0.358, (0 split)
##       FCDo                      < 0.4293395 to the right, agree=0.648, adj=0.277, (0 split)
##       as.factor(CONCEITO)       splits as  LRRRR, agree=0.642, adj=0.266, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  LR, agree=0.537, adj=0.050, (0 split)
##       CP                        < 0.5328175 to the left,  agree=0.526, adj=0.027, (0 split)
## 
## Node number 2: 1465 observations,    complexity param=0.01060735
##   mean=0.2442242, MSE=0.1189336 
##   left son=4 (771 obs) right son=5 (694 obs)
##   Primary splits:
##       FCDo                      < 0.570028  to the left,  improve=0.022622340, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.012423700, (0 missing)
##       FCDi                      < 0.4847926 to the left,  improve=0.008504852, (0 missing)
##       as.factor(CONCEITO)       splits as  LRRRL, improve=0.006687891, (0 missing)
##       CC                        < 1.022328  to the right, improve=0.006214590, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.4644661 to the left,  agree=0.655, adj=0.271, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.579, adj=0.111, (0 split)
##       CP                        < 0.4581944 to the left,  agree=0.576, adj=0.105, (0 split)
##       CC                        < 0.6402778 to the left,  agree=0.560, adj=0.072, (0 split)
##       as.factor(CONCEITO)       splits as  RLLLL, agree=0.552, adj=0.055, (0 split)
## 
## Node number 3: 1393 observations,    complexity param=0.01420417
##   mean=0.3264003, MSE=0.1382183 
##   left son=6 (1039 obs) right son=7 (354 obs)
##   Primary splits:
##       FCDo                      < 0.5373357 to the left,  improve=0.029784190, (0 missing)
##       CP                        < 0.4523575 to the left,  improve=0.005847186, (0 missing)
##       FCDi                      < 0.5564663 to the left,  improve=0.004290214, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.002849396, (0 missing)
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.002368822, (0 missing)
##   Surrogate splits:
##       FCDi < 0.7564272 to the left,  agree=0.753, adj=0.028, (0 split)
##       CC   < 0.5708403 to the left,  agree=0.747, adj=0.003, (0 split)
## 
## Node number 4: 771 observations
##   mean=0.1950119, MSE=0.09876409 
## 
## Node number 5: 694 observations
##   mean=0.2988967, MSE=0.1356614 
## 
## Node number 6: 1039 observations
##   mean=0.2889487, MSE=0.1104584 
## 
## Node number 7: 354 observations
##   mean=0.4363215, MSE=0.203495

rpart.plot(arvore)

1.4.2 Sem outliers

fitIPT<- lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) - 1, data = no_outliers)
summary(fitIPT)

## 
## Call:
## lm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) - 1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.48865 -0.17996 -0.05381  0.13491  1.14266 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)    
## FCDo                             -0.032993   0.107500  -0.307   0.7591    
## FCDi                              0.087610   0.091725   0.955   0.3402    
## CC                                0.029715   0.099763   0.298   0.7660    
## CP                                0.332823   0.160761   2.070   0.0392 *  
## as.factor(DEPENDENCIAADM)PRIVADA  0.886206   0.148588   5.964 6.60e-09 ***
## as.factor(DEPENDENCIAADM)PÚBLICA  0.823164   0.138450   5.946 7.31e-09 ***
## as.factor(CONCEITO)4              0.061117   0.040729   1.501   0.1345    
## as.factor(CONCEITO)5              0.005148   0.051596   0.100   0.9206    
## as.factor(CONCEITO)6              0.067716   0.063203   1.071   0.2848    
## as.factor(CONCEITO)7              0.117540   0.089545   1.313   0.1903    
## as.factor(UFPROGRAMA)AM           0.004078   0.189628   0.022   0.9829    
## as.factor(UFPROGRAMA)AP          -0.221887   0.293099  -0.757   0.4496    
## as.factor(UFPROGRAMA)BA          -0.068495   0.123099  -0.556   0.5783    
## as.factor(UFPROGRAMA)CE          -0.102848   0.137101  -0.750   0.4537    
## as.factor(UFPROGRAMA)DF          -0.163843   0.128162  -1.278   0.2021    
## as.factor(UFPROGRAMA)ES          -0.065050   0.171513  -0.379   0.7047    
## as.factor(UFPROGRAMA)GO          -0.114053   0.147563  -0.773   0.4402    
## as.factor(UFPROGRAMA)MA           0.086344   0.172345   0.501   0.6167    
## as.factor(UFPROGRAMA)MG          -0.082697   0.113330  -0.730   0.4661    
## as.factor(UFPROGRAMA)MS          -0.302045   0.195083  -1.548   0.1226    
## as.factor(UFPROGRAMA)MT           0.023450   0.222787   0.105   0.9162    
## as.factor(UFPROGRAMA)PA          -0.048164   0.160980  -0.299   0.7650    
## as.factor(UFPROGRAMA)PB          -0.052877   0.172490  -0.307   0.7594    
## as.factor(UFPROGRAMA)PE           0.021935   0.121647   0.180   0.8570    
## as.factor(UFPROGRAMA)PI          -0.135106   0.188768  -0.716   0.4747    
## as.factor(UFPROGRAMA)PR           0.007321   0.115117   0.064   0.9493    
## as.factor(UFPROGRAMA)RJ          -0.056194   0.116527  -0.482   0.6300    
## as.factor(UFPROGRAMA)RN           0.001444   0.125321   0.012   0.9908    
## as.factor(UFPROGRAMA)RO          -0.290385   0.293091  -0.991   0.3226    
## as.factor(UFPROGRAMA)RS           0.079893   0.114530   0.698   0.4860    
## as.factor(UFPROGRAMA)SC          -0.091232   0.128947  -0.708   0.4798    
## as.factor(UFPROGRAMA)SE           0.316064   0.149289   2.117   0.0350 *  
## as.factor(UFPROGRAMA)SP          -0.101283   0.117108  -0.865   0.3878    
## as.factor(UFPROGRAMA)TO          -0.186034   0.220446  -0.844   0.3994    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2727 on 315 degrees of freedom
## Multiple R-squared:  0.9412, Adjusted R-squared:  0.9348 
## F-statistic: 148.2 on 34 and 315 DF,  p-value: < 2.2e-16

plot(fitIPT)

## Warning: not plotting observations with leverage one:
##   97, 106

## Warning: not plotting observations with leverage one:
##   97, 106

Próximo da normalidade mas ainda não consegue garantir os pressupostos

 print(paste("Correlação normalizada", cor(norm_MI_PT$IPT_t,norm_MI_PT$IMI_t, method = "spearman")))

## [1] "Correlação normalizada 0.0929039972121071"

 print(paste("Correlação sem normalização", cor(IMI_e_IPT$IPT,IMI_e_IPT$IMI, method = "spearman")))

## [1] "Correlação sem normalização -0.000893884909316986"

1.4.2.1 Regressão de Poisson

regpoisson=glm(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)

## 
## Call:
## glm(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4073  -0.7081  -0.2271   0.3074   2.1756  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## FCDo                               1.073340   0.245773   4.367 1.26e-05 ***
## FCDi                               0.008651   0.642063   0.013  0.98925    
## CC                                -1.359096   0.565538  -2.403  0.01625 *  
## CP                                 0.292796   0.346992   0.844  0.39877    
## as.factor(DEPENDENCIAADM)PRIVADA -14.173973 440.683395  -0.032  0.97434    
## as.factor(DEPENDENCIAADM)PÚBLICA -14.363336 440.683374  -0.033  0.97400    
## as.factor(CONCEITO)4               0.181913   0.093508   1.945  0.05173 .  
## as.factor(CONCEITO)5               0.265754   0.123789   2.147  0.03181 *  
## as.factor(CONCEITO)6               0.473919   0.154755   3.062  0.00220 ** 
## as.factor(CONCEITO)7               0.562573   0.204935   2.745  0.00605 ** 
## as.factor(UFPROGRAMA)AL           13.178131 440.683337   0.030  0.97614    
## as.factor(UFPROGRAMA)AM           12.529444 440.683384   0.028  0.97732    
## as.factor(UFPROGRAMA)AP           13.029627 440.684315   0.030  0.97641    
## as.factor(UFPROGRAMA)BA           12.908784 440.683253   0.029  0.97663    
## as.factor(UFPROGRAMA)CE           12.876676 440.683271   0.029  0.97669    
## as.factor(UFPROGRAMA)DF           12.861083 440.683272   0.029  0.97672    
## as.factor(UFPROGRAMA)ES           12.683449 440.683319   0.029  0.97704    
## as.factor(UFPROGRAMA)GO           12.689389 440.683291   0.029  0.97703    
## as.factor(UFPROGRAMA)MA           12.705164 440.683340   0.029  0.97700    
## as.factor(UFPROGRAMA)MG           13.019330 440.683237   0.030  0.97643    
## as.factor(UFPROGRAMA)MS           12.574788 440.683379   0.029  0.97724    
## as.factor(UFPROGRAMA)MT           12.389935 440.683374   0.028  0.97757    
## as.factor(UFPROGRAMA)PA           12.887794 440.683276   0.029  0.97667    
## as.factor(UFPROGRAMA)PB           12.827642 440.683387   0.029  0.97678    
## as.factor(UFPROGRAMA)PE           13.039558 440.683254   0.030  0.97639    
## as.factor(UFPROGRAMA)PI           13.255723 440.683543   0.030  0.97600    
## as.factor(UFPROGRAMA)PR           13.038930 440.683240   0.030  0.97640    
## as.factor(UFPROGRAMA)RJ           12.740610 440.683242   0.029  0.97694    
## as.factor(UFPROGRAMA)RN           13.173844 440.683265   0.030  0.97615    
## as.factor(UFPROGRAMA)RO           12.799823 440.683760   0.029  0.97683    
## as.factor(UFPROGRAMA)RR           -0.201781 890.473538   0.000  0.99982    
## as.factor(UFPROGRAMA)RS           12.955616 440.683241   0.029  0.97655    
## as.factor(UFPROGRAMA)SC           12.835612 440.683264   0.029  0.97676    
## as.factor(UFPROGRAMA)SE           13.863189 440.683341   0.031  0.97490    
## as.factor(UFPROGRAMA)SP           12.673996 440.683240   0.029  0.97706    
## as.factor(UFPROGRAMA)TO           13.004187 440.683410   0.030  0.97646    
## FCDi:CC                            0.542113   1.045530   0.519  0.60411    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 3359.1  on 2858  degrees of freedom
## Residual deviance: 1226.9  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 12

1.4.2.2 Árvore de decisão

arvore_nout <- rpart(sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_nout)

## Call:
## rpart(formula = sqrt(IPT) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.02282279      0 1.0000000 1.005645 0.1197302
## 2 0.01774939      3 0.9294627 1.152528 0.1290911
## 3 0.01624681      8 0.8385899 1.226711 0.1335846
## 4 0.01436838      9 0.8223431 1.245826 0.1332226
## 5 0.01114782     10 0.8079747 1.290706 0.1375681
## 6 0.01084124     14 0.7633834 1.329958 0.1431779
## 7 0.01020310     16 0.7417010 1.328174 0.1432480
## 8 0.01000000     18 0.7212948 1.325013 0.1430175
## 
## Variable importance
##                      FCDi                      FCDo                        CC 
##                        30                        21                        20 
##       as.factor(CONCEITO)                        CP as.factor(DEPENDENCIAADM) 
##                        15                         9                         5 
## 
## Node number 1: 349 observations,    complexity param=0.02282279
##   mean=1.03088, MSE=0.07822102 
##   left son=2 (270 obs) right son=3 (79 obs)
##   Primary splits:
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.018491400, (0 missing)
##       CP                        < 0.6217262 to the left,  improve=0.016534390, (0 missing)
##       CC                        < 0.9772398 to the left,  improve=0.016472820, (0 missing)
##       FCDi                      < 0.9089069 to the left,  improve=0.011495880, (0 missing)
##       FCDo                      < 0.5055856 to the left,  improve=0.009905112, (0 missing)
##   Surrogate splits:
##       FCDo < 0.7656863 to the left,  agree=0.805, adj=0.139, (0 split)
##       FCDi < 0.8198052 to the left,  agree=0.785, adj=0.051, (0 split)
##       CP   < 0.2847222 to the right, agree=0.779, adj=0.025, (0 split)
## 
## Node number 2: 270 observations,    complexity param=0.01774939
##   mean=1.010308, MSE=0.06725982 
##   left son=4 (9 obs) right son=5 (261 obs)
##   Primary splits:
##       FCDi                < 0.8153409 to the right, improve=0.018277410, (0 missing)
##       CP                  < 0.4491571 to the left,  improve=0.017833270, (0 missing)
##       FCDo                < 0.5055856 to the left,  improve=0.012773410, (0 missing)
##       CC                  < 0.5241911 to the left,  improve=0.012177220, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRR, improve=0.005276337, (0 missing)
## 
## Node number 3: 79 observations,    complexity param=0.02282279
##   mean=1.101189, MSE=0.1092935 
##   left son=6 (72 obs) right son=7 (7 obs)
##   Primary splits:
##       FCDi                < 0.8568548 to the left,  improve=0.08585471, (0 missing)
##       as.factor(CONCEITO) splits as  LRLLL, improve=0.05595841, (0 missing)
##       CP                  < 0.5502976 to the left,  improve=0.04253452, (0 missing)
##       CC                  < 0.744621  to the left,  improve=0.02808771, (0 missing)
##       FCDo                < 0.5505051 to the right, improve=0.02545831, (0 missing)
## 
## Node number 4: 9 observations
##   mean=0.8214936, MSE=0.01130151 
## 
## Node number 5: 261 observations,    complexity param=0.01774939
##   mean=1.016818, MSE=0.06791769 
##   left son=10 (110 obs) right son=11 (151 obs)
##   Primary splits:
##       CC                  < 0.5241911 to the left,  improve=0.017179430, (0 missing)
##       FCDo                < 0.5055856 to the left,  improve=0.016499930, (0 missing)
##       CP                  < 0.4491571 to the left,  improve=0.013661420, (0 missing)
##       FCDi                < 0.305839  to the left,  improve=0.009885776, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRR, improve=0.003872234, (0 missing)
##   Surrogate splits:
##       FCDi                < 0.2837121 to the left,  agree=0.759, adj=0.427, (0 split)
##       as.factor(CONCEITO) splits as  RRLLL, agree=0.655, adj=0.182, (0 split)
##       FCDo                < 0.4359649 to the left,  agree=0.651, adj=0.173, (0 split)
##       CP                  < 0.3026042 to the left,  agree=0.586, adj=0.018, (0 split)
## 
## Node number 6: 72 observations,    complexity param=0.02282279
##   mean=1.070985, MSE=0.08319642 
##   left son=12 (44 obs) right son=13 (28 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LRLLL, improve=0.11344000, (0 missing)
##       CP                  < 0.5502976 to the left,  improve=0.10829070, (0 missing)
##       FCDo                < 0.4742063 to the right, improve=0.03983165, (0 missing)
##       CC                  < 0.4436264 to the right, improve=0.03289703, (0 missing)
##       FCDi                < 0.5931034 to the right, improve=0.03242660, (0 missing)
##   Surrogate splits:
##       FCDo < 0.4880952 to the right, agree=0.722, adj=0.286, (0 split)
##       CC   < 0.4681691 to the right, agree=0.653, adj=0.107, (0 split)
##       FCDi < 0.2265512 to the right, agree=0.625, adj=0.036, (0 split)
##       CP   < 0.5390857 to the left,  agree=0.625, adj=0.036, (0 split)
## 
## Node number 7: 7 observations
##   mean=1.411857, MSE=0.2718225 
## 
## Node number 10: 110 observations,    complexity param=0.01436838
##   mean=0.9767974, MSE=0.06031334 
##   left son=20 (99 obs) right son=21 (11 obs)
##   Primary splits:
##       FCDi                < 0.5776093 to the left,  improve=0.05912221, (0 missing)
##       CC                  < 0.3458333 to the right, improve=0.03839310, (0 missing)
##       as.factor(CONCEITO) splits as  LLLLR, improve=0.02705273, (0 missing)
##       CP                  < 0.4422619 to the left,  improve=0.01626740, (0 missing)
##       FCDo                < 0.3181515 to the left,  improve=0.01159944, (0 missing)
##   Surrogate splits:
##       FCDo < 0.6777778 to the left,  agree=0.909, adj=0.091, (0 split)
## 
## Node number 11: 151 observations,    complexity param=0.01774939
##   mean=1.045973, MSE=0.07144051 
##   left son=22 (78 obs) right son=23 (73 obs)
##   Primary splits:
##       FCDi                < 0.5116238 to the right, improve=0.049506290, (0 missing)
##       CC                  < 0.5288521 to the right, improve=0.047210890, (0 missing)
##       CP                  < 0.4922794 to the left,  improve=0.035483860, (0 missing)
##       FCDo                < 0.7064327 to the right, improve=0.013602440, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRL, improve=0.008312707, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  LRRRR, agree=0.709, adj=0.397, (0 split)
##       FCDo                < 0.53125   to the right, agree=0.689, adj=0.356, (0 split)
##       CC                  < 0.6512144 to the right, agree=0.649, adj=0.274, (0 split)
##       CP                  < 0.5114426 to the left,  agree=0.563, adj=0.096, (0 split)
## 
## Node number 12: 44 observations,    complexity param=0.0102031
##   mean=0.9934879, MSE=0.06276538 
##   left son=24 (35 obs) right son=25 (9 obs)
##   Primary splits:
##       CC                  < 0.5134056 to the right, improve=0.07949447, (0 missing)
##       FCDi                < 0.5528821 to the left,  improve=0.07729982, (0 missing)
##       CP                  < 0.4094907 to the left,  improve=0.07376566, (0 missing)
##       FCDo                < 0.8176638 to the right, improve=0.03895006, (0 missing)
##       as.factor(CONCEITO) splits as  R-LLL, improve=0.01075653, (0 missing)
## 
## Node number 13: 28 observations,    complexity param=0.01624681
##   mean=1.192767, MSE=0.09103369 
##   left son=26 (10 obs) right son=27 (18 obs)
##   Primary splits:
##       FCDi < 0.5760369 to the right, improve=0.17400310, (0 missing)
##       FCDo < 0.4742063 to the right, improve=0.08989864, (0 missing)
##       CP   < 0.4949875 to the left,  improve=0.08841769, (0 missing)
##       CC   < 0.6364286 to the right, improve=0.08680351, (0 missing)
##   Surrogate splits:
##       FCDo < 0.6237374 to the right, agree=0.679, adj=0.1, (0 split)
## 
## Node number 20: 99 observations,    complexity param=0.01114782
##   mean=0.9568925, MSE=0.04303043 
##   left son=40 (88 obs) right son=41 (11 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LLLLR, improve=0.05782292, (0 missing)
##       FCDo                < 0.6262255 to the right, improve=0.04835172, (0 missing)
##       CC                  < 0.4221709 to the left,  improve=0.03187391, (0 missing)
##       FCDi                < 0.3762626 to the right, improve=0.02416534, (0 missing)
##       CP                  < 0.3844157 to the right, improve=0.01702926, (0 missing)
## 
## Node number 21: 11 observations
##   mean=1.155942, MSE=0.180201 
## 
## Node number 22: 78 observations,    complexity param=0.01084124
##   mean=0.9884398, MSE=0.04855783 
##   left son=44 (53 obs) right son=45 (25 obs)
##   Primary splits:
##       FCDo                < 0.5555556 to the right, improve=0.05771495, (0 missing)
##       CP                  < 0.3622024 to the right, improve=0.04915763, (0 missing)
##       FCDi                < 0.6236111 to the right, improve=0.03517085, (0 missing)
##       as.factor(CONCEITO) splits as  LRRL-, improve=0.02845517, (0 missing)
##       CC                  < 0.5445374 to the right, improve=0.02243028, (0 missing)
##   Surrogate splits:
##       CC < 0.571131  to the right, agree=0.731, adj=0.16, (0 split)
##       CP < 0.3479701 to the right, agree=0.692, adj=0.04, (0 split)
## 
## Node number 23: 73 observations,    complexity param=0.01774939
##   mean=1.107446, MSE=0.08857475 
##   left son=46 (42 obs) right son=47 (31 obs)
##   Primary splits:
##       FCDo                < 0.5196759 to the left,  improve=0.11872500, (0 missing)
##       FCDi                < 0.4038198 to the left,  improve=0.08744170, (0 missing)
##       CC                  < 0.5299148 to the right, improve=0.07999502, (0 missing)
##       CP                  < 0.493595  to the left,  improve=0.06055149, (0 missing)
##       as.factor(CONCEITO) splits as  RLLRL, improve=0.04731229, (0 missing)
##   Surrogate splits:
##       FCDi                < 0.3636541 to the left,  agree=0.685, adj=0.258, (0 split)
##       CP                  < 0.493595  to the left,  agree=0.644, adj=0.161, (0 split)
##       as.factor(CONCEITO) splits as  RLLLL, agree=0.644, adj=0.161, (0 split)
##       CC                  < 0.5254464 to the right, agree=0.603, adj=0.065, (0 split)
## 
## Node number 24: 35 observations,    complexity param=0.0102031
##   mean=0.9576687, MSE=0.0545813 
##   left son=48 (22 obs) right son=49 (13 obs)
##   Primary splits:
##       CC                  < 0.7054784 to the left,  improve=0.17668720, (0 missing)
##       CP                  < 0.5079004 to the left,  improve=0.10888740, (0 missing)
##       FCDo                < 0.5677656 to the left,  improve=0.10299430, (0 missing)
##       FCDi                < 0.5278571 to the left,  improve=0.09328107, (0 missing)
##       as.factor(CONCEITO) splits as  R-LLL, improve=0.01092535, (0 missing)
##   Surrogate splits:
##       CP   < 0.2951389 to the right, agree=0.686, adj=0.154, (0 split)
##       FCDo < 0.4444444 to the right, agree=0.657, adj=0.077, (0 split)
## 
## Node number 25: 9 observations
##   mean=1.132785, MSE=0.07019924 
## 
## Node number 26: 10 observations
##   mean=1.023912, MSE=0.03782498 
## 
## Node number 27: 18 observations
##   mean=1.286576, MSE=0.09595387 
## 
## Node number 40: 88 observations,    complexity param=0.01114782
##   mean=0.9392568, MSE=0.03773218 
##   left son=80 (8 obs) right son=81 (80 obs)
##   Primary splits:
##       FCDo                < 0.6262255 to the right, improve=0.049166600, (0 missing)
##       CC                  < 0.3982491 to the left,  improve=0.034074560, (0 missing)
##       CP                  < 0.3844157 to the right, improve=0.018987410, (0 missing)
##       FCDi                < 0.1707642 to the left,  improve=0.018899550, (0 missing)
##       as.factor(CONCEITO) splits as  RRLL-, improve=0.005205054, (0 missing)
## 
## Node number 41: 11 observations
##   mean=1.097978, MSE=0.06302311 
## 
## Node number 44: 53 observations
##   mean=0.9520813, MSE=0.02939089 
## 
## Node number 45: 25 observations,    complexity param=0.01084124
##   mean=1.06552, MSE=0.08044789 
##   left son=90 (18 obs) right son=91 (7 obs)
##   Primary splits:
##       FCDo                < 0.5230856 to the left,  improve=0.18561910, (0 missing)
##       CC                  < 0.577178  to the left,  improve=0.07196665, (0 missing)
##       as.factor(CONCEITO) splits as  RLRL-, improve=0.07046449, (0 missing)
##       FCDi                < 0.591253  to the left,  improve=0.06953301, (0 missing)
##       CP                  < 0.399213  to the right, improve=0.06489596, (0 missing)
##   Surrogate splits:
##       FCDi < 0.5732143 to the right, agree=0.76, adj=0.143, (0 split)
##       CC   < 0.5436709 to the right, agree=0.76, adj=0.143, (0 split)
##       CP   < 0.4987637 to the left,  agree=0.76, adj=0.143, (0 split)
## 
## Node number 46: 42 observations
##   mean=1.019345, MSE=0.0388596 
## 
## Node number 47: 31 observations,    complexity param=0.01774939
##   mean=1.226809, MSE=0.1311672 
##   left son=94 (15 obs) right son=95 (16 obs)
##   Primary splits:
##       CP                  < 0.4890941 to the left,  improve=0.13343660, (0 missing)
##       FCDi                < 0.4038198 to the left,  improve=0.09019198, (0 missing)
##       as.factor(CONCEITO) splits as  RLLR-, improve=0.07319815, (0 missing)
##       CC                  < 0.5389888 to the right, improve=0.06864687, (0 missing)
##       FCDo                < 0.6306043 to the right, improve=0.04451223, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  RLRL-, agree=0.742, adj=0.467, (0 split)
##       FCDi                < 0.390628  to the left,  agree=0.677, adj=0.333, (0 split)
##       CC                  < 0.6080688 to the right, agree=0.581, adj=0.133, (0 split)
##       FCDo                < 0.5342262 to the right, agree=0.548, adj=0.067, (0 split)
## 
## Node number 48: 22 observations
##   mean=0.8821795, MSE=0.03318247 
## 
## Node number 49: 13 observations
##   mean=1.08542, MSE=0.0648306 
## 
## Node number 80: 8 observations
##   mean=0.8030524, MSE=0.002435638 
## 
## Node number 81: 80 observations,    complexity param=0.01114782
##   mean=0.9528772, MSE=0.03922116 
##   left son=162 (29 obs) right son=163 (51 obs)
##   Primary splits:
##       CC                  < 0.4221709 to the left,  improve=0.047450190, (0 missing)
##       FCDi                < 0.1707642 to the left,  improve=0.028815950, (0 missing)
##       CP                  < 0.4306469 to the left,  improve=0.021127690, (0 missing)
##       FCDo                < 0.5033333 to the left,  improve=0.013459220, (0 missing)
##       as.factor(CONCEITO) splits as  LRLL-, improve=0.008112287, (0 missing)
##   Surrogate splits:
##       FCDo                < 0.2472222 to the left,  agree=0.675, adj=0.103, (0 split)
##       FCDi                < 0.1084967 to the left,  agree=0.675, adj=0.103, (0 split)
##       CP                  < 0.3364286 to the left,  agree=0.650, adj=0.034, (0 split)
##       as.factor(CONCEITO) splits as  RRRL-, agree=0.650, adj=0.034, (0 split)
## 
## Node number 90: 18 observations
##   mean=0.9893152, MSE=0.05215333 
## 
## Node number 91: 7 observations
##   mean=1.261475, MSE=0.09987436 
## 
## Node number 94: 15 observations
##   mean=1.090174, MSE=0.05014305 
## 
## Node number 95: 16 observations
##   mean=1.354905, MSE=0.1732162 
## 
## Node number 162: 29 observations
##   mean=0.8956681, MSE=0.02279377 
## 
## Node number 163: 51 observations,    complexity param=0.01114782
##   mean=0.9854079, MSE=0.04564293 
##   left son=326 (44 obs) right son=327 (7 obs)
##   Primary splits:
##       CC                  < 0.4392778 to the right, improve=0.28303160, (0 missing)
##       FCDi                < 0.3762626 to the right, improve=0.05658093, (0 missing)
##       CP                  < 0.3914764 to the right, improve=0.02654500, (0 missing)
##       FCDo                < 0.5175926 to the right, improve=0.01990692, (0 missing)
##       as.factor(CONCEITO) splits as  LRRL-, improve=0.01343994, (0 missing)
## 
## Node number 326: 44 observations
##   mean=0.9400736, MSE=0.02508037 
## 
## Node number 327: 7 observations
##   mean=1.270366, MSE=0.08077355

rpart.plot(arvore_nout)

1.5 Patentes

fitPatente<- lm(formula = sqrt(SPPP/QPPP) ~  FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = no_outliers)

summary(fitPatente)

## 
## Call:
## lm(formula = sqrt(SPPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.04262 -0.22803  0.00558  0.20992  1.50475 
## 
## Coefficients:
##                                  Estimate Std. Error t value Pr(>|t|)    
## FCDo                              0.11819    0.15310   0.772 0.440695    
## FCDi                              0.47621    0.35180   1.354 0.176827    
## CC                               -0.17147    0.30760  -0.557 0.577622    
## CP                                0.07917    0.22872   0.346 0.729457    
## as.factor(DEPENDENCIAADM)PRIVADA  0.58341    0.25823   2.259 0.024551 *  
## as.factor(DEPENDENCIAADM)PÚBLICA  0.60106    0.24599   2.443 0.015100 *  
## as.factor(CONCEITO)4              0.15022    0.05795   2.592 0.009978 ** 
## as.factor(CONCEITO)5              0.18623    0.07340   2.537 0.011660 *  
## as.factor(CONCEITO)6              0.31017    0.09000   3.446 0.000646 ***
## as.factor(CONCEITO)7              0.48170    0.12816   3.759 0.000204 ***
## as.factor(UFPROGRAMA)AM          -0.51719    0.26986  -1.917 0.056209 .  
## as.factor(UFPROGRAMA)AP           0.04558    0.41682   0.109 0.912994    
## as.factor(UFPROGRAMA)BA          -0.06116    0.17511  -0.349 0.727132    
## as.factor(UFPROGRAMA)CE          -0.23068    0.19504  -1.183 0.237802    
## as.factor(UFPROGRAMA)DF          -0.17233    0.18229  -0.945 0.345197    
## as.factor(UFPROGRAMA)ES          -0.07956    0.24396  -0.326 0.744542    
## as.factor(UFPROGRAMA)GO          -0.05098    0.21036  -0.242 0.808679    
## as.factor(UFPROGRAMA)MA          -0.02890    0.24510  -0.118 0.906223    
## as.factor(UFPROGRAMA)MG          -0.16847    0.16121  -1.045 0.296810    
## as.factor(UFPROGRAMA)MS          -0.16157    0.27755  -0.582 0.560882    
## as.factor(UFPROGRAMA)MT           0.27199    0.31745   0.857 0.392206    
## as.factor(UFPROGRAMA)PA           0.06686    0.22893   0.292 0.770424    
## as.factor(UFPROGRAMA)PB          -0.18183    0.24554  -0.741 0.459532    
## as.factor(UFPROGRAMA)PE           0.07924    0.17331   0.457 0.647826    
## as.factor(UFPROGRAMA)PI          -0.14271    0.26855  -0.531 0.595502    
## as.factor(UFPROGRAMA)PR          -0.04901    0.16373  -0.299 0.764872    
## as.factor(UFPROGRAMA)RJ          -0.39161    0.16574  -2.363 0.018743 *  
## as.factor(UFPROGRAMA)RN           0.09459    0.17828   0.531 0.596092    
## as.factor(UFPROGRAMA)RO          -0.24508    0.41685  -0.588 0.557000    
## as.factor(UFPROGRAMA)RS          -0.17845    0.16294  -1.095 0.274254    
## as.factor(UFPROGRAMA)SC          -0.34405    0.18345  -1.875 0.061653 .  
## as.factor(UFPROGRAMA)SE           0.32892    0.21231   1.549 0.122344    
## as.factor(UFPROGRAMA)SP          -0.16600    0.16654  -0.997 0.319661    
## as.factor(UFPROGRAMA)TO          -0.51461    0.31350  -1.642 0.101694    
## FCDi:CC                          -0.48437    0.55907  -0.866 0.386942    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3879 on 314 degrees of freedom
## Multiple R-squared:  0.785,  Adjusted R-squared:  0.761 
## F-statistic: 32.75 on 35 and 314 DF,  p-value: < 2.2e-16

1.5.0.1 Regressão de Poisson

regpoisson=glm(sqrt(SPPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)

## 
## Call:
## glm(formula = sqrt(SPPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2005  -0.5658  -0.4486   0.2305   2.5971  
## 
## Coefficients:
##                                  Estimate Std. Error z value Pr(>|z|)    
## FCDo                               1.2155     0.3275   3.711 0.000206 ***
## FCDi                              -0.2236     0.8551  -0.262 0.793690    
## CC                                -2.6057     0.7764  -3.356 0.000790 ***
## CP                                 0.8352     0.4402   1.897 0.057779 .  
## as.factor(DEPENDENCIAADM)PRIVADA -13.9911   429.4776  -0.033 0.974012    
## as.factor(DEPENDENCIAADM)PÚBLICA -14.0189   429.4776  -0.033 0.973960    
## as.factor(CONCEITO)4               0.2469     0.1278   1.932 0.053351 .  
## as.factor(CONCEITO)5               0.4300     0.1638   2.625 0.008670 ** 
## as.factor(CONCEITO)6               0.7350     0.1982   3.709 0.000208 ***
## as.factor(CONCEITO)7               0.9958     0.2498   3.986 6.72e-05 ***
## as.factor(UFPROGRAMA)AL           12.6915   429.4775   0.030 0.976425    
## as.factor(UFPROGRAMA)AM           11.8077   429.4776   0.027 0.978066    
## as.factor(UFPROGRAMA)AP           12.8809   429.4785   0.030 0.976073    
## as.factor(UFPROGRAMA)BA           12.3216   429.4774   0.029 0.977112    
## as.factor(UFPROGRAMA)CE           12.2236   429.4774   0.028 0.977294    
## as.factor(UFPROGRAMA)DF           12.0511   429.4774   0.028 0.977614    
## as.factor(UFPROGRAMA)ES           12.1617   429.4775   0.028 0.977409    
## as.factor(UFPROGRAMA)GO           12.0979   429.4775   0.028 0.977527    
## as.factor(UFPROGRAMA)MA           12.2703   429.4775   0.029 0.977207    
## as.factor(UFPROGRAMA)MG           12.3975   429.4774   0.029 0.976971    
## as.factor(UFPROGRAMA)MS           12.1020   429.4776   0.028 0.977520    
## as.factor(UFPROGRAMA)MT           12.0404   429.4775   0.028 0.977634    
## as.factor(UFPROGRAMA)PA           12.1447   429.4774   0.028 0.977440    
## as.factor(UFPROGRAMA)PB           12.3053   429.4776   0.029 0.977142    
## as.factor(UFPROGRAMA)PE           12.6171   429.4774   0.029 0.976563    
## as.factor(UFPROGRAMA)PI           12.8485   429.4778   0.030 0.976134    
## as.factor(UFPROGRAMA)PR           12.4447   429.4774   0.029 0.976884    
## as.factor(UFPROGRAMA)RJ           11.6693   429.4774   0.027 0.978323    
## as.factor(UFPROGRAMA)RN           12.7194   429.4774   0.030 0.976373    
## as.factor(UFPROGRAMA)RO           11.8032   429.4788   0.027 0.978075    
## as.factor(UFPROGRAMA)RR           -0.3080   884.9815   0.000 0.999722    
## as.factor(UFPROGRAMA)RS           12.1394   429.4774   0.028 0.977450    
## as.factor(UFPROGRAMA)SC           11.9645   429.4774   0.028 0.977775    
## as.factor(UFPROGRAMA)SE           13.5189   429.4775   0.031 0.974889    
## as.factor(UFPROGRAMA)SP           11.9685   429.4774   0.028 0.977768    
## as.factor(UFPROGRAMA)TO           12.2209   429.4777   0.028 0.977299    
## FCDi:CC                            1.3029     1.4374   0.906 0.364703    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 4275.4  on 2858  degrees of freedom
## Residual deviance: 1079.5  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 12

1.5.0.2 Árvore de decisão

arvore_patente <- rpart(SPPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_patente)

## Call:
## rpart(formula = SPPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.03547831      0 1.0000000 1.007428 0.1963998
## 2 0.02239911      2 0.9290434 1.106991 0.1984569
## 3 0.01844940      5 0.8618460 1.144126 0.1978482
## 4 0.01239861      6 0.8433966 1.171633 0.2045524
## 5 0.01029784      7 0.8309980 1.186228 0.2054333
## 6 0.01000000     10 0.8001045 1.185827 0.2052137
## 
## Variable importance
##                      FCDi                      FCDo                        CC 
##                        29                        25                        18 
##                        CP       as.factor(CONCEITO) as.factor(DEPENDENCIAADM) 
##                        15                        12                         1 
## 
## Node number 1: 349 observations,    complexity param=0.03547831
##   mean=0.629482, MSE=0.424635 
##   left son=2 (148 obs) right son=3 (201 obs)
##   Primary splits:
##       CP                  < 0.4476111 to the left,  improve=0.03222377, (0 missing)
##       as.factor(CONCEITO) splits as  LLLRR, improve=0.03009496, (0 missing)
##       CC                  < 0.3445861 to the right, improve=0.02901461, (0 missing)
##       FCDo                < 0.7421652 to the right, improve=0.01929228, (0 missing)
##       FCDi                < 0.4241932 to the right, improve=0.01206120, (0 missing)
##   Surrogate splits:
##       FCDo < 0.2593656 to the left,  agree=0.605, adj=0.068, (0 split)
##       FCDi < 0.1863082 to the left,  agree=0.605, adj=0.068, (0 split)
##       CC   < 0.8354167 to the right, agree=0.605, adj=0.068, (0 split)
## 
## Node number 2: 148 observations,    complexity param=0.0184494
##   mean=0.4931608, MSE=0.3104785 
##   left son=4 (141 obs) right son=5 (7 obs)
##   Primary splits:
##       FCDi                      < 0.9089069 to the left,  improve=0.05950184, (0 missing)
##       FCDo                      < 0.6794872 to the left,  improve=0.03972066, (0 missing)
##       CC                        < 0.8143056 to the left,  improve=0.03121239, (0 missing)
##       as.factor(CONCEITO)       splits as  LRLRR, improve=0.02188764, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.01734681, (0 missing)
## 
## Node number 3: 201 observations,    complexity param=0.03547831
##   mean=0.7298578, MSE=0.484932 
##   left son=6 (29 obs) right son=7 (172 obs)
##   Primary splits:
##       FCDo                      < 0.7421652 to the right, improve=0.05889031, (0 missing)
##       CC                        < 0.6693845 to the right, improve=0.05210645, (0 missing)
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.04265311, (0 missing)
##       FCDi                      < 0.343002  to the right, improve=0.03777498, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.03187693, (0 missing)
## 
## Node number 4: 141 observations,    complexity param=0.01239861
##   mean=0.4628763, MSE=0.2099164 
##   left son=8 (76 obs) right son=9 (65 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LRLRR, improve=0.06207956, (0 missing)
##       CC                  < 0.5957221 to the right, improve=0.02193168, (0 missing)
##       FCDi                < 0.6492599 to the right, improve=0.01779479, (0 missing)
##       FCDo                < 0.6794872 to the left,  improve=0.01572268, (0 missing)
##       CP                  < 0.3328947 to the left,  improve=0.01453018, (0 missing)
##   Surrogate splits:
##       FCDi < 0.4228551 to the right, agree=0.660, adj=0.262, (0 split)
##       CC   < 0.4529334 to the right, agree=0.603, adj=0.138, (0 split)
##       CP   < 0.4369372 to the left,  agree=0.603, adj=0.138, (0 split)
##       FCDo < 0.269697  to the right, agree=0.582, adj=0.092, (0 split)
## 
## Node number 5: 7 observations
##   mean=1.103177, MSE=1.945491 
## 
## Node number 6: 29 observations
##   mean=0.3183031, MSE=0.1521703 
## 
## Node number 7: 172 observations,    complexity param=0.02239911
##   mean=0.7992478, MSE=0.5076644 
##   left son=14 (26 obs) right son=15 (146 obs)
##   Primary splits:
##       CC                        < 0.6693845 to the right, improve=0.03664811, (0 missing)
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.03207323, (0 missing)
##       FCDi                      < 0.189441  to the right, improve=0.02366747, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.02292318, (0 missing)
##       CP                        < 0.7083333 to the right, improve=0.01980541, (0 missing)
## 
## Node number 8: 76 observations
##   mean=0.3573047, MSE=0.1272899 
## 
## Node number 9: 65 observations,    complexity param=0.01029784
##   mean=0.586314, MSE=0.2782576 
##   left son=18 (55 obs) right son=19 (10 obs)
##   Primary splits:
##       FCDo                < 0.6427432 to the left,  improve=0.06189628, (0 missing)
##       FCDi                < 0.3582888 to the left,  improve=0.05966784, (0 missing)
##       CC                  < 0.6019898 to the right, improve=0.04629516, (0 missing)
##       CP                  < 0.4375926 to the right, improve=0.02795421, (0 missing)
##       as.factor(CONCEITO) splits as  -L-RR, improve=0.01444040, (0 missing)
##   Surrogate splits:
##       FCDi < 0.7519201 to the left,  agree=0.862, adj=0.1, (0 split)
## 
## Node number 14: 26 observations
##   mean=0.4760236, MSE=0.213218 
## 
## Node number 15: 146 observations,    complexity param=0.02239911
##   mean=0.8568083, MSE=0.5381819 
##   left son=30 (121 obs) right son=31 (25 obs)
##   Primary splits:
##       as.factor(CONCEITO)       splits as  LLLRR, improve=0.03076330, (0 missing)
##       CC                        < 0.350456  to the right, improve=0.02644304, (0 missing)
##       FCDi                      < 0.189441  to the right, improve=0.01612835, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.01338604, (0 missing)
##       FCDo                      < 0.5465368 to the left,  improve=0.01098118, (0 missing)
##   Surrogate splits:
##       CC   < 0.3445861 to the right, agree=0.842, adj=0.08, (0 split)
##       FCDo < 0.2344055 to the right, agree=0.836, adj=0.04, (0 split)
## 
## Node number 18: 55 observations,    complexity param=0.01029784
##   mean=0.5303544, MSE=0.2043189 
##   left son=36 (14 obs) right son=37 (41 obs)
##   Primary splits:
##       CC                  < 0.6019898 to the right, improve=0.13981370, (0 missing)
##       as.factor(CONCEITO) splits as  -L-RR, improve=0.06595971, (0 missing)
##       CP                  < 0.3640523 to the left,  improve=0.06510467, (0 missing)
##       FCDi                < 0.3360526 to the left,  improve=0.02538762, (0 missing)
##       FCDo                < 0.5857843 to the right, improve=0.02263407, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.6546053 to the right, agree=0.800, adj=0.214, (0 split)
##       FCDo                      < 0.5522876 to the right, agree=0.782, adj=0.143, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  LR, agree=0.782, adj=0.143, (0 split)
##       CP                        < 0.3360417 to the left,  agree=0.764, adj=0.071, (0 split)
## 
## Node number 19: 10 observations
##   mean=0.8940917, MSE=0.5729703 
## 
## Node number 30: 121 observations
##   mean=0.7983214, MSE=0.3348285 
## 
## Node number 31: 25 observations,    complexity param=0.02239911
##   mean=1.139885, MSE=1.425724 
##   left son=62 (18 obs) right son=63 (7 obs)
##   Primary splits:
##       FCDi                < 0.3356331 to the left,  improve=1.217970e-01, (0 missing)
##       FCDo                < 0.5105856 to the left,  improve=8.984864e-02, (0 missing)
##       CP                  < 0.4816682 to the right, improve=3.685829e-02, (0 missing)
##       CC                  < 0.4971226 to the right, improve=2.548008e-02, (0 missing)
##       as.factor(CONCEITO) splits as  ---RL, improve=3.728631e-05, (0 missing)
##   Surrogate splits:
##       FCDo < 0.5022523 to the left,  agree=0.76, adj=0.143, (0 split)
## 
## Node number 36: 14 observations
##   mean=0.2411153, MSE=0.06078167 
## 
## Node number 37: 41 observations,    complexity param=0.01029784
##   mean=0.629119, MSE=0.2150106 
##   left son=74 (28 obs) right son=75 (13 obs)
##   Primary splits:
##       FCDi                < 0.336875  to the left,  improve=0.21413400, (0 missing)
##       CC                  < 0.5347393 to the left,  improve=0.11238680, (0 missing)
##       FCDo                < 0.5331439 to the left,  improve=0.08401116, (0 missing)
##       CP                  < 0.4375926 to the right, improve=0.06392309, (0 missing)
##       as.factor(CONCEITO) splits as  -L-RR, improve=0.05506055, (0 missing)
##   Surrogate splits:
##       CC   < 0.5197655 to the left,  agree=0.829, adj=0.462, (0 split)
##       FCDo < 0.4796296 to the left,  agree=0.805, adj=0.385, (0 split)
##       CP   < 0.342445  to the right, agree=0.707, adj=0.077, (0 split)
## 
## Node number 62: 18 observations
##   mean=0.8800191, MSE=0.4187633 
## 
## Node number 63: 7 observations
##   mean=1.808111, MSE=3.394877 
## 
## Node number 74: 28 observations
##   mean=0.4829129, MSE=0.1068154 
## 
## Node number 75: 13 observations
##   mean=0.9440244, MSE=0.3028397

rpart.plot(arvore_patente)

1.6 Produtos

fitProduto<- lm(formula = sqrt(SPPPr/QPPP) ~  FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = no_outliers)

summary(fitProduto)

## 
## Call:
## lm(formula = sqrt(SPPPr/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55609 -0.24614 -0.03405  0.15980  1.50068 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## FCDo                              0.115563   0.130668   0.884   0.3772  
## FCDi                             -0.222547   0.300259  -0.741   0.4591  
## CC                               -0.250075   0.262536  -0.953   0.3416  
## CP                                0.008614   0.195205   0.044   0.9648  
## as.factor(DEPENDENCIAADM)PRIVADA  0.436698   0.220394   1.981   0.0484 *
## as.factor(DEPENDENCIAADM)PÚBLICA  0.394790   0.209951   1.880   0.0610 .
## as.factor(CONCEITO)4             -0.014193   0.049457  -0.287   0.7743  
## as.factor(CONCEITO)5             -0.084674   0.062648  -1.352   0.1775  
## as.factor(CONCEITO)6             -0.162837   0.076814  -2.120   0.0348 *
## as.factor(CONCEITO)7             -0.204283   0.109380  -1.868   0.0627 .
## as.factor(UFPROGRAMA)AM           0.234963   0.230324   1.020   0.3084  
## as.factor(UFPROGRAMA)AP          -0.008091   0.355751  -0.023   0.9819  
## as.factor(UFPROGRAMA)BA           0.026207   0.149458   0.175   0.8609  
## as.factor(UFPROGRAMA)CE           0.020598   0.166463   0.124   0.9016  
## as.factor(UFPROGRAMA)DF           0.027819   0.155581   0.179   0.8582  
## as.factor(UFPROGRAMA)ES           0.047873   0.208212   0.230   0.8183  
## as.factor(UFPROGRAMA)GO          -0.057580   0.179537  -0.321   0.7486  
## as.factor(UFPROGRAMA)MA          -0.158047   0.209187  -0.756   0.4505  
## as.factor(UFPROGRAMA)MG           0.100455   0.137588   0.730   0.4659  
## as.factor(UFPROGRAMA)MS          -0.150784   0.236881  -0.637   0.5249  
## as.factor(UFPROGRAMA)MT          -0.258484   0.270938  -0.954   0.3408  
## as.factor(UFPROGRAMA)PA           0.068426   0.195390   0.350   0.7264  
## as.factor(UFPROGRAMA)PB           0.222162   0.209568   1.060   0.2899  
## as.factor(UFPROGRAMA)PE          -0.022290   0.147913  -0.151   0.8803  
## as.factor(UFPROGRAMA)PI          -0.108230   0.229204  -0.472   0.6371  
## as.factor(UFPROGRAMA)PR           0.095577   0.139740   0.684   0.4945  
## as.factor(UFPROGRAMA)RJ           0.042602   0.141453   0.301   0.7635  
## as.factor(UFPROGRAMA)RN          -0.140447   0.152159  -0.923   0.3567  
## as.factor(UFPROGRAMA)RO           0.173559   0.355771   0.488   0.6260  
## as.factor(UFPROGRAMA)RS           0.257228   0.139063   1.850   0.0653 .
## as.factor(UFPROGRAMA)SC           0.158146   0.156568   1.010   0.3132  
## as.factor(UFPROGRAMA)SE           0.048546   0.181207   0.268   0.7890  
## as.factor(UFPROGRAMA)SP           0.124219   0.142141   0.874   0.3828  
## as.factor(UFPROGRAMA)TO          -0.056985   0.267566  -0.213   0.8315  
## FCDi:CC                           0.219510   0.477159   0.460   0.6458  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.331 on 314 degrees of freedom
## Multiple R-squared:  0.5408, Adjusted R-squared:  0.4896 
## F-statistic: 10.56 on 35 and 314 DF,  p-value: < 2.2e-16

1.6.0.1 Regressão de Poisson

regpoisson=glm(sqrt(SPPPr/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)

## 
## Call:
## glm(formula = sqrt(SPPPr/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.7312  -0.4113  -0.3454  -0.2339   2.6836  
## 
## Coefficients:
##                                   Estimate Std. Error z value Pr(>|z|)   
## FCDo                                1.3369     0.4763   2.807    0.005 **
## FCDi                               -0.8160     1.2430  -0.656    0.512   
## CC                                 -1.9591     1.0849  -1.806    0.071 . 
## CP                                  0.3492     0.6544   0.534    0.594   
## as.factor(DEPENDENCIAADM)PRIVADA  -14.6500   726.8327  -0.020    0.984   
## as.factor(DEPENDENCIAADM)PÚBLICA  -15.0541   726.8327  -0.021    0.983   
## as.factor(CONCEITO)4                0.1528     0.1781   0.858    0.391   
## as.factor(CONCEITO)5                0.3060     0.2297   1.332    0.183   
## as.factor(CONCEITO)6                0.1166     0.3205   0.364    0.716   
## as.factor(CONCEITO)7                0.3040     0.4142   0.734    0.463   
## as.factor(UFPROGRAMA)AL            12.5308   726.8327   0.017    0.986   
## as.factor(UFPROGRAMA)AM            12.6555   726.8326   0.017    0.986   
## as.factor(UFPROGRAMA)AP            12.6343   726.8350   0.017    0.986   
## as.factor(UFPROGRAMA)BA            12.4648   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)CE            12.5366   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)DF            12.6250   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)ES            11.8993   726.8327   0.016    0.987   
## as.factor(UFPROGRAMA)GO            12.0363   726.8325   0.017    0.987   
## as.factor(UFPROGRAMA)MA            11.5489   726.8329   0.016    0.987   
## as.factor(UFPROGRAMA)MG            12.6873   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)MS            11.7967   726.8329   0.016    0.987   
## as.factor(UFPROGRAMA)MT            10.1730   726.8346   0.014    0.989   
## as.factor(UFPROGRAMA)PA            12.8953   726.8324   0.018    0.986   
## as.factor(UFPROGRAMA)PB            12.6954   726.8326   0.017    0.986   
## as.factor(UFPROGRAMA)PE            12.0827   726.8325   0.017    0.987   
## as.factor(UFPROGRAMA)PI            12.2559   726.8338   0.017    0.987   
## as.factor(UFPROGRAMA)PR            12.8170   726.8324   0.018    0.986   
## as.factor(UFPROGRAMA)RJ            12.5721   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)RN            12.2530   726.8325   0.017    0.987   
## as.factor(UFPROGRAMA)RO            13.6260   726.8327   0.019    0.985   
## as.factor(UFPROGRAMA)RR            -0.2529  1468.2756   0.000    1.000   
## as.factor(UFPROGRAMA)RS            12.8192   726.8324   0.018    0.986   
## as.factor(UFPROGRAMA)SC            12.7093   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)SE            13.3714   726.8326   0.018    0.985   
## as.factor(UFPROGRAMA)SP            12.3229   726.8324   0.017    0.986   
## as.factor(UFPROGRAMA)TO            12.3185   726.8329   0.017    0.986   
## FCDi:CC                             1.4643     2.0123   0.728    0.467   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 4910.85  on 2858  degrees of freedom
## Residual deviance:  709.79  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 13

1.6.0.2 Árvore de decisão

arvore_produto <- rpart(SPPPr/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_produto)

## Call:
## rpart(formula = SPPPr/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.03178604      0 1.0000000 1.008583 0.3188953
## 2 0.01939137      1 0.9682140 1.105910 0.3215916
## 3 0.01489306      3 0.9294312 1.180256 0.3190358
## 4 0.01000000      6 0.8847520 1.240110 0.3232844
## 
## Variable importance
##                FCDi                  CP                  CC as.factor(CONCEITO) 
##                  40                  31                  14                   8 
##                FCDo 
##                   6 
## 
## Node number 1: 349 observations,    complexity param=0.03178604
##   mean=0.214688, MSE=0.1752302 
##   left son=2 (339 obs) right son=3 (10 obs)
##   Primary splits:
##       FCDi                      < 0.9615385 to the left,  improve=0.03178604, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02574168, (0 missing)
##       CP                        < 0.4987637 to the left,  improve=0.01765411, (0 missing)
##       FCDo                      < 0.4122316 to the left,  improve=0.01376318, (0 missing)
##       as.factor(CONCEITO)       splits as  RRLLL, improve=0.01106026, (0 missing)
## 
## Node number 2: 339 observations,    complexity param=0.01939137
##   mean=0.2018699, MSE=0.1308178 
##   left son=4 (307 obs) right son=5 (32 obs)
##   Primary splits:
##       CP                        < 0.550463  to the left,  improve=0.014634150, (0 missing)
##       FCDo                      < 0.4122316 to the left,  improve=0.013348420, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.011491610, (0 missing)
##       as.factor(CONCEITO)       splits as  RRRLL, improve=0.010421320, (0 missing)
##       FCDi                      < 0.3702381 to the left,  improve=0.009293826, (0 missing)
## 
## Node number 3: 10 observations
##   mean=0.6492208, MSE=1.486423 
## 
## Node number 4: 307 observations,    complexity param=0.01489306
##   mean=0.1877438, MSE=0.09350723 
##   left son=8 (204 obs) right son=9 (103 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  RLLLL, improve=0.017920130, (0 missing)
##       FCDo                < 0.8257576 to the left,  improve=0.014202210, (0 missing)
##       FCDi                < 0.4205263 to the left,  improve=0.011737620, (0 missing)
##       CP                  < 0.5413492 to the right, improve=0.008810057, (0 missing)
##       CC                  < 0.650146  to the left,  improve=0.006771424, (0 missing)
##   Surrogate splits:
##       FCDi < 0.5732143 to the left,  agree=0.785, adj=0.359, (0 split)
##       FCDo < 0.6376263 to the left,  agree=0.707, adj=0.126, (0 split)
##       CP   < 0.3026042 to the right, agree=0.678, adj=0.039, (0 split)
## 
## Node number 5: 32 observations,    complexity param=0.01939137
##   mean=0.3373924, MSE=0.4684851 
##   left son=10 (24 obs) right son=11 (8 obs)
##   Primary splits:
##       CP                        < 0.5688095 to the right, improve=0.11491750, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.09985498, (0 missing)
##       as.factor(CONCEITO)       splits as  LRLL-, improve=0.09578208, (0 missing)
##       FCDi                      < 0.5811404 to the right, improve=0.04275088, (0 missing)
##       CC                        < 0.5961445 to the right, improve=0.04187705, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  LLRL-, agree=0.781, adj=0.125, (0 split)
## 
## Node number 8: 204 observations
##   mean=0.1586569, MSE=0.05966425 
## 
## Node number 9: 103 observations,    complexity param=0.01489306
##   mean=0.2453527, MSE=0.1555416 
##   left son=18 (78 obs) right son=19 (25 obs)
##   Primary splits:
##       CC                        < 0.5289366 to the right, improve=0.0571028000, (0 missing)
##       FCDi                      < 0.5732143 to the right, improve=0.0382717800, (0 missing)
##       FCDo                      < 0.6171498 to the right, improve=0.0280340700, (0 missing)
##       CP                        < 0.5246181 to the right, improve=0.0181320300, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.0006905795, (0 missing)
##   Surrogate splits:
##       FCDi < 0.251179  to the right, agree=0.777, adj=0.08, (0 split)
##       FCDo < 0.3444444 to the right, agree=0.767, adj=0.04, (0 split)
## 
## Node number 10: 24 observations
##   mean=0.2034308, MSE=0.1841712 
## 
## Node number 11: 8 observations
##   mean=0.7392771, MSE=1.106078 
## 
## Node number 18: 78 observations
##   mean=0.1919977, MSE=0.1060473 
## 
## Node number 19: 25 observations,    complexity param=0.01489306
##   mean=0.4118201, MSE=0.2733707 
##   left son=38 (12 obs) right son=39 (13 obs)
##   Primary splits:
##       FCDi < 0.5668241 to the right, improve=0.19067320, (0 missing)
##       FCDo < 0.5358974 to the left,  improve=0.10093880, (0 missing)
##       CC   < 0.5011218 to the left,  improve=0.09637133, (0 missing)
##       CP   < 0.479386  to the left,  improve=0.03544710, (0 missing)
##   Surrogate splits:
##       FCDo < 0.5694444 to the right, agree=0.68, adj=0.333, (0 split)
##       CC   < 0.5011218 to the left,  agree=0.64, adj=0.250, (0 split)
##       CP   < 0.4449653 to the right, agree=0.64, adj=0.250, (0 split)
## 
## Node number 38: 12 observations
##   mean=0.1741898, MSE=0.03724166 
## 
## Node number 39: 13 observations
##   mean=0.6311712, MSE=0.3910966

rpart.plot(arvore_produto)

1.7 Aplicativos

fitApp<- lm(formula = sqrt(SAPP / QPPP) ~  FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 1, data = no_outliers)

summary(fitApp)

## 
## Call:
## lm(formula = sqrt(SAPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, data = no_outliers)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70947 -0.26896 -0.08895  0.24785  1.38975 
## 
## Coefficients:
##                                   Estimate Std. Error t value Pr(>|t|)  
## FCDo                             -0.331245   0.152323  -2.175   0.0304 *
## FCDi                              0.217738   0.350020   0.622   0.5343  
## CC                                0.750816   0.306045   2.453   0.0147 *
## CP                                0.052085   0.227556   0.229   0.8191  
## as.factor(DEPENDENCIAADM)PRIVADA  0.096309   0.256919   0.375   0.7080  
## as.factor(DEPENDENCIAADM)PÚBLICA  0.011137   0.244746   0.046   0.9637  
## as.factor(CONCEITO)4             -0.026093   0.057654  -0.453   0.6512  
## as.factor(CONCEITO)5             -0.130041   0.073031  -1.781   0.0759 .
## as.factor(CONCEITO)6             -0.130877   0.089544  -1.462   0.1449  
## as.factor(CONCEITO)7             -0.302464   0.127508  -2.372   0.0183 *
## as.factor(UFPROGRAMA)AM           0.360370   0.268495   1.342   0.1805  
## as.factor(UFPROGRAMA)AP          -0.265171   0.414709  -0.639   0.5230  
## as.factor(UFPROGRAMA)BA           0.004714   0.174227   0.027   0.9784  
## as.factor(UFPROGRAMA)CE           0.132524   0.194050   0.683   0.4952  
## as.factor(UFPROGRAMA)DF           0.069679   0.181366   0.384   0.7011  
## as.factor(UFPROGRAMA)ES           0.131945   0.242719   0.544   0.5871  
## as.factor(UFPROGRAMA)GO          -0.012691   0.209291  -0.061   0.9517  
## as.factor(UFPROGRAMA)MA           0.323108   0.243855   1.325   0.1861  
## as.factor(UFPROGRAMA)MG           0.141139   0.160390   0.880   0.3795  
## as.factor(UFPROGRAMA)MS           0.091445   0.276138   0.331   0.7407  
## as.factor(UFPROGRAMA)MT          -0.224905   0.315840  -0.712   0.4769  
## as.factor(UFPROGRAMA)PA          -0.091803   0.227771  -0.403   0.6872  
## as.factor(UFPROGRAMA)PB          -0.124886   0.244299  -0.511   0.6096  
## as.factor(UFPROGRAMA)PE          -0.005428   0.172426  -0.031   0.9749  
## as.factor(UFPROGRAMA)PI          -0.030504   0.267189  -0.114   0.9092  
## as.factor(UFPROGRAMA)PR           0.081857   0.162899   0.502   0.6157  
## as.factor(UFPROGRAMA)RJ           0.373183   0.164896   2.263   0.0243 *
## as.factor(UFPROGRAMA)RN           0.031056   0.177376   0.175   0.8611  
## as.factor(UFPROGRAMA)RO           0.038910   0.414732   0.094   0.9253  
## as.factor(UFPROGRAMA)RS           0.150616   0.162110   0.929   0.3536  
## as.factor(UFPROGRAMA)SC           0.205903   0.182516   1.128   0.2601  
## as.factor(UFPROGRAMA)SE           0.022686   0.211238   0.107   0.9145  
## as.factor(UFPROGRAMA)SP           0.110609   0.165697   0.668   0.5049  
## as.factor(UFPROGRAMA)TO           0.445293   0.311909   1.428   0.1544  
## FCDi:CC                          -0.387726   0.556237  -0.697   0.4863  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3859 on 314 degrees of freedom
## Multiple R-squared:  0.5485, Adjusted R-squared:  0.4982 
## F-statistic:  10.9 on 35 and 314 DF,  p-value: < 2.2e-16

1.7.0.1 Regressão de Poisson

regpoisson=glm(sqrt(SAPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC -1, family="poisson", data= IMI_e_IPT)

summary(regpoisson)

## 
## Call:
## glm(formula = sqrt(SAPP/QPPP) ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO) + as.factor(UFPROGRAMA) + FCDi * CC - 
##     1, family = "poisson", data = IMI_e_IPT)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8264  -0.4847  -0.4213   0.1997   2.6427  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)  
## FCDo                              8.209e-01  3.857e-01   2.128   0.0333 *
## FCDi                              1.134e+00  9.870e-01   1.149   0.2505  
## CC                                5.941e-01  8.224e-01   0.722   0.4701  
## CP                               -8.244e-01  6.099e-01  -1.352   0.1765  
## as.factor(DEPENDENCIAADM)PRIVADA -1.570e+01  7.290e+02  -0.022   0.9828  
## as.factor(DEPENDENCIAADM)PÚBLICA -1.598e+01  7.290e+02  -0.022   0.9825  
## as.factor(CONCEITO)4              1.992e-01  1.436e-01   1.387   0.1654  
## as.factor(CONCEITO)5              8.424e-02  2.023e-01   0.416   0.6771  
## as.factor(CONCEITO)6              3.620e-01  2.511e-01   1.441   0.1494  
## as.factor(CONCEITO)7              8.261e-02  3.805e-01   0.217   0.8281  
## as.factor(UFPROGRAMA)AL           1.319e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)AM           1.251e+01  7.290e+02   0.017   0.9863  
## as.factor(UFPROGRAMA)AP          -2.363e-02  1.036e+03   0.000   1.0000  
## as.factor(UFPROGRAMA)BA           1.302e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)CE           1.297e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)DF           1.319e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)ES           1.280e+01  7.290e+02   0.018   0.9860  
## as.factor(UFPROGRAMA)GO           1.281e+01  7.290e+02   0.018   0.9860  
## as.factor(UFPROGRAMA)MA           1.286e+01  7.290e+02   0.018   0.9859  
## as.factor(UFPROGRAMA)MG           1.321e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)MS           1.256e+01  7.290e+02   0.017   0.9863  
## as.factor(UFPROGRAMA)MT           1.223e+01  7.290e+02   0.017   0.9866  
## as.factor(UFPROGRAMA)PA           1.277e+01  7.290e+02   0.018   0.9860  
## as.factor(UFPROGRAMA)PB           1.232e+01  7.290e+02   0.017   0.9865  
## as.factor(UFPROGRAMA)PE           1.293e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)PI           1.305e+01  7.290e+02   0.018   0.9857  
## as.factor(UFPROGRAMA)PR           1.311e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)RJ           1.324e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)RN           1.323e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)RO           1.163e+01  7.290e+02   0.016   0.9873  
## as.factor(UFPROGRAMA)RR           1.688e-03  1.469e+03   0.000   1.0000  
## as.factor(UFPROGRAMA)RS           1.322e+01  7.290e+02   0.018   0.9855  
## as.factor(UFPROGRAMA)SC           1.320e+01  7.290e+02   0.018   0.9856  
## as.factor(UFPROGRAMA)SE           1.349e+01  7.290e+02   0.019   0.9852  
## as.factor(UFPROGRAMA)SP           1.293e+01  7.290e+02   0.018   0.9858  
## as.factor(UFPROGRAMA)TO           1.351e+01  7.290e+02   0.019   0.9852  
## FCDi:CC                          -1.298e+00  1.548e+00  -0.839   0.4017  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 4605.88  on 2858  degrees of freedom
## Residual deviance:  886.23  on 2821  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 13

1.7.0.2 Árvore de decisão

arvore_aplicativo <- rpart(SAPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM)  + as.factor(CONCEITO), data = no_outliers)

summary(arvore_aplicativo)

## Call:
## rpart(formula = SAPP/QPPP ~ FCDo + FCDi + CC + CP + as.factor(DEPENDENCIAADM) + 
##     as.factor(CONCEITO), data = no_outliers)
##   n= 349 
## 
##           CP nsplit rel error   xerror      xstd
## 1 0.06860452      0 1.0000000 1.005998 0.1579283
## 2 0.04033968      1 0.9313955 1.072299 0.1546801
## 3 0.02909134      2 0.8910558 1.050254 0.1533450
## 4 0.02413469      4 0.8328731 1.094798 0.1607488
## 5 0.02033243      5 0.8087384 1.111962 0.1607320
## 6 0.01369760      7 0.7680736 1.132793 0.1569157
## 7 0.01356819      9 0.7406784 1.116188 0.1527953
## 8 0.01055298     12 0.6999738 1.133826 0.1538961
## 9 0.01000000     13 0.6894208 1.144857 0.1546434
## 
## Variable importance
##                        CC                        CP                      FCDo 
##                        29                        29                        18 
##       as.factor(CONCEITO)                      FCDi as.factor(DEPENDENCIAADM) 
##                        12                        11                         2 
## 
## Node number 1: 349 observations,    complexity param=0.06860452
##   mean=0.2967639, MSE=0.2019442 
##   left son=2 (229 obs) right son=3 (120 obs)
##   Primary splits:
##       CC                        < 0.6032122 to the left,  improve=0.06860452, (0 missing)
##       CP                        < 0.78125   to the left,  improve=0.06359468, (0 missing)
##       FCDi                      < 0.4027376 to the left,  improve=0.03325919, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02282198, (0 missing)
##       FCDo                      < 0.739418  to the left,  improve=0.02128091, (0 missing)
##   Surrogate splits:
##       FCDo                      < 0.6933761 to the left,  agree=0.728, adj=0.208, (0 split)
##       FCDi                      < 0.6287683 to the left,  agree=0.688, adj=0.092, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.676, adj=0.058, (0 split)
##       CP                        < 0.78125   to the left,  agree=0.665, adj=0.025, (0 split)
## 
## Node number 2: 229 observations,    complexity param=0.02413469
##   mean=0.211559, MSE=0.118884 
##   left son=4 (166 obs) right son=5 (63 obs)
##   Primary splits:
##       CP                  < 0.4212456 to the right, improve=0.06247979, (0 missing)
##       as.factor(CONCEITO) splits as  RRLLL, improve=0.04016652, (0 missing)
##       CC                  < 0.4374782 to the left,  improve=0.03282737, (0 missing)
##       FCDi                < 0.2736185 to the left,  improve=0.02454798, (0 missing)
##       FCDo                < 0.739418  to the left,  improve=0.01621318, (0 missing)
##   Surrogate splits:
##       FCDo < 0.2384259 to the right, agree=0.747, adj=0.079, (0 split)
##       FCDi < 1.046875  to the left,  agree=0.729, adj=0.016, (0 split)
## 
## Node number 3: 120 observations,    complexity param=0.04033968
##   mean=0.4593634, MSE=0.3201578 
##   left son=6 (107 obs) right son=7 (13 obs)
##   Primary splits:
##       CP                        < 0.5533399 to the left,  improve=0.074002070, (0 missing)
##       FCDi                      < 0.729021  to the right, improve=0.036014390, (0 missing)
##       CC                        < 0.6384672 to the right, improve=0.025727530, (0 missing)
##       FCDo                      < 0.6399573 to the right, improve=0.025400750, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.008366206, (0 missing)
## 
## Node number 4: 166 observations,    complexity param=0.01055298
##   mean=0.1584647, MSE=0.08188266 
##   left son=8 (154 obs) right son=9 (12 obs)
##   Primary splits:
##       FCDo                      < 0.739418  to the left,  improve=0.05471820, (0 missing)
##       as.factor(CONCEITO)       splits as  RRLLL, improve=0.05190366, (0 missing)
##       FCDi                      < 0.3964286 to the left,  improve=0.03873325, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.03476798, (0 missing)
##       CP                        < 0.4971769 to the right, improve=0.03366370, (0 missing)
## 
## Node number 5: 63 observations,    complexity param=0.02033243
##   mean=0.3514582, MSE=0.1893799 
##   left son=10 (42 obs) right son=11 (21 obs)
##   Primary splits:
##       CC                  < 0.5442195 to the left,  improve=0.09792120, (0 missing)
##       FCDi                < 0.2716161 to the left,  improve=0.08624086, (0 missing)
##       as.factor(CONCEITO) splits as  RRRLL, improve=0.03867725, (0 missing)
##       CP                  < 0.3452536 to the left,  improve=0.01296667, (0 missing)
##       FCDo                < 0.6386946 to the right, improve=0.01108625, (0 missing)
## 
## Node number 6: 107 observations,    complexity param=0.02909134
##   mean=0.4057117, MSE=0.2309145 
##   left son=12 (82 obs) right son=13 (25 obs)
##   Primary splits:
##       CP                  < 0.4050694 to the right, improve=0.08159447, (0 missing)
##       CC                  < 0.6247379 to the right, improve=0.03772789, (0 missing)
##       as.factor(CONCEITO) splits as  LLRRL, improve=0.03368993, (0 missing)
##       FCDi                < 0.6636905 to the right, improve=0.03148115, (0 missing)
##       FCDo                < 0.6125541 to the right, improve=0.01568586, (0 missing)
##   Surrogate splits:
##       FCDo < 0.925     to the left,  agree=0.785, adj=0.08, (0 split)
## 
## Node number 7: 13 observations
##   mean=0.9009584, MSE=0.8360003 
## 
## Node number 8: 154 observations
##   mean=0.1397797, MSE=0.07479888 
## 
## Node number 9: 12 observations
##   mean=0.398255, MSE=0.1108114 
## 
## Node number 10: 42 observations
##   mean=0.2551661, MSE=0.09444689 
## 
## Node number 11: 21 observations,    complexity param=0.02033243
##   mean=0.5440422, MSE=0.323613 
##   left son=22 (11 obs) right son=23 (10 obs)
##   Primary splits:
##       CC   < 0.5671054 to the right, improve=0.24981450, (0 missing)
##       FCDi < 0.4063129 to the right, improve=0.14230470, (0 missing)
##       CP   < 0.3773148 to the right, improve=0.09521619, (0 missing)
##       FCDo < 0.4768519 to the right, improve=0.05262435, (0 missing)
##   Surrogate splits:
##       CP                        < 0.3773148 to the right, agree=0.714, adj=0.4, (0 split)
##       FCDo                      < 0.4768519 to the right, agree=0.667, adj=0.3, (0 split)
##       as.factor(CONCEITO)       splits as  RLLL-, agree=0.667, adj=0.3, (0 split)
##       FCDi                      < 0.3301574 to the right, agree=0.619, adj=0.2, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.571, adj=0.1, (0 split)
## 
## Node number 12: 82 observations,    complexity param=0.0136976
##   mean=0.3299204, MSE=0.1778742 
##   left son=24 (59 obs) right son=25 (23 obs)
##   Primary splits:
##       FCDo                      < 0.737037  to the left,  improve=0.06593106, (0 missing)
##       CP                        < 0.4119817 to the left,  improve=0.05386413, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02959628, (0 missing)
##       FCDi                      < 0.7236364 to the right, improve=0.02461049, (0 missing)
##       CC                        < 0.6184748 to the right, improve=0.01630720, (0 missing)
##   Surrogate splits:
##       CP   < 0.533637  to the left,  agree=0.780, adj=0.217, (0 split)
##       FCDi < 0.7703297 to the left,  agree=0.744, adj=0.087, (0 split)
##       CC   < 0.6044909 to the right, agree=0.732, adj=0.043, (0 split)
## 
## Node number 13: 25 observations,    complexity param=0.02909134
##   mean=0.6543069, MSE=0.3242456 
##   left son=26 (14 obs) right son=27 (11 obs)
##   Primary splits:
##       as.factor(CONCEITO) splits as  LRRR-, improve=0.25716370, (0 missing)
##       CC                  < 0.749     to the right, improve=0.18471290, (0 missing)
##       FCDo                < 0.5798319 to the right, improve=0.16045070, (0 missing)
##       CP                  < 0.3686075 to the left,  improve=0.06953281, (0 missing)
##       FCDi                < 0.5083333 to the right, improve=0.06574935, (0 missing)
##   Surrogate splits:
##       FCDo < 0.5634921 to the right, agree=0.76, adj=0.455, (0 split)
##       FCDi < 0.5083333 to the right, agree=0.76, adj=0.455, (0 split)
##       CP   < 0.3657617 to the right, agree=0.64, adj=0.182, (0 split)
##       CC   < 0.6129171 to the right, agree=0.60, adj=0.091, (0 split)
## 
## Node number 22: 11 observations
##   mean=0.2729448, MSE=0.09709803 
## 
## Node number 23: 10 observations
##   mean=0.8422494, MSE=0.4030088 
## 
## Node number 24: 59 observations,    complexity param=0.01356819
##   mean=0.262306, MSE=0.1268679 
##   left son=48 (16 obs) right son=49 (43 obs)
##   Primary splits:
##       FCDo                < 0.6742424 to the right, improve=0.11466020, (0 missing)
##       CC                  < 0.722433  to the left,  improve=0.05258454, (0 missing)
##       as.factor(CONCEITO) splits as  RLLRL, improve=0.04397665, (0 missing)
##       FCDi                < 0.7236364 to the right, improve=0.04356117, (0 missing)
##       CP                  < 0.4240385 to the right, improve=0.01656819, (0 missing)
## 
## Node number 25: 23 observations,    complexity param=0.0136976
##   mean=0.5033662, MSE=0.2669057 
##   left son=50 (10 obs) right son=51 (13 obs)
##   Primary splits:
##       as.factor(CONCEITO)       splits as  LRR--, improve=0.15786770, (0 missing)
##       FCDi                      < 0.5418752 to the right, improve=0.13674940, (0 missing)
##       CP                        < 0.4524184 to the left,  improve=0.10071030, (0 missing)
##       FCDo                      < 0.781746  to the right, improve=0.06780701, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  RL, improve=0.02786219, (0 missing)
##   Surrogate splits:
##       FCDi                      < 0.5965812 to the right, agree=0.739, adj=0.4, (0 split)
##       CP                        < 0.424625  to the left,  agree=0.696, adj=0.3, (0 split)
##       FCDo                      < 0.7541478 to the left,  agree=0.652, adj=0.2, (0 split)
##       CC                        < 0.639881  to the right, agree=0.609, adj=0.1, (0 split)
##       as.factor(DEPENDENCIAADM) splits as  RL, agree=0.609, adj=0.1, (0 split)
## 
## Node number 26: 14 observations
##   mean=0.3983457, MSE=0.130211 
## 
## Node number 27: 11 observations
##   mean=0.9800758, MSE=0.3816892 
## 
## Node number 48: 16 observations
##   mean=0.06458333, MSE=0.02259983 
## 
## Node number 49: 43 observations,    complexity param=0.01356819
##   mean=0.3358772, MSE=0.1457059 
##   left son=98 (16 obs) right son=99 (27 obs)
##   Primary splits:
##       CC                  < 0.6647727 to the left,  improve=0.14452500, (0 missing)
##       FCDo                < 0.5962963 to the left,  improve=0.11837030, (0 missing)
##       as.factor(CONCEITO) splits as  RLLRL, improve=0.09244643, (0 missing)
##       FCDi                < 0.7253846 to the right, improve=0.05993227, (0 missing)
##       CP                  < 0.4830791 to the left,  improve=0.04589235, (0 missing)
##   Surrogate splits:
##       as.factor(CONCEITO) splits as  RRLRL, agree=0.721, adj=0.250, (0 split)
##       FCDi                < 0.3690172 to the left,  agree=0.674, adj=0.125, (0 split)
##       CP                  < 0.4094742 to the left,  agree=0.651, adj=0.063, (0 split)
## 
## Node number 50: 10 observations
##   mean=0.2693223, MSE=0.09933087 
## 
## Node number 51: 13 observations
##   mean=0.6833999, MSE=0.3212614 
## 
## Node number 98: 16 observations
##   mean=0.1473683, MSE=0.0727452 
## 
## Node number 99: 27 observations,    complexity param=0.01356819
##   mean=0.4475862, MSE=0.1554048 
##   left son=198 (7 obs) right son=199 (20 obs)
##   Primary splits:
##       FCDi                      < 0.7253846 to the right, improve=0.26336040, (0 missing)
##       FCDo                      < 0.5962963 to the left,  improve=0.16101660, (0 missing)
##       CC                        < 0.8583423 to the right, improve=0.08094265, (0 missing)
##       as.factor(CONCEITO)       splits as  RLLR-, improve=0.05372785, (0 missing)
##       as.factor(DEPENDENCIAADM) splits as  LR, improve=0.02282374, (0 missing)
##   Surrogate splits:
##       CP < 0.4170718 to the left,  agree=0.889, adj=0.571, (0 split)
## 
## Node number 198: 7 observations
##   mean=0.1056277, MSE=0.01153202 
## 
## Node number 199: 20 observations
##   mean=0.5672717, MSE=0.1505082

rpart.plot(arvore_aplicativo)

1.8 Conclusões

As arvores de decisão paracem ser melhor para representar as relações entre os indicadores do modelo, pois não seguem a lógica linear e apresentam particularidades mais detalhadas.A regressão de Poisson provê um modelo mais ajustado.

2 Agradecimentos

Ao Instituto Stela, à UFSC, à CAPES e ao CNPq.

3 Referências

Checking normality for parametric tests in R https://www.sheffield.ac.uk/polopoly_fs/1.579191!/file/stcp-karadimitriou-normalR.pdf

Normality Test in R https://www.datanovia.com/en/lessons/normality-test-in-r/

Como realizar teste de normalidade no R ? https://rpubs.com/paternogbc/46768

Fazendo os testes de Kolmogorov-Smirnov e de Shapiro-Wilk para normalidade http://www.dpi.ufv.br/~peternelli/tutoriaisR/tutoriaisRempdf/tutorial.KS.SW.normalidade.11112004.pdf

BIOESTATÍSTICA USANDO R https://cran.r-project.org/doc/contrib/Beasley-BioestatisticaUsandoR.pdf

Delineamentos Experimentais https://smolski.github.io/livroavancado/analisf.html

Regression Models in R Multicollinearity in R https://datascienceplus.com/multicollinearity-in-r/

https://ibape-nacional.com.br/biblioteca/wp-content/uploads/2020/02/AO-27-Aplica%C3%A7%C3%A3o-do-Teste-de-Farrar-Glauber-para-An%C3%A1lise.pdf

Multicollinearity in R https://www.rpubs.com/dudubiologico/545528

Ajuste de Modelos Não Lineares http://www.leg.ufpr.br/~walmes/cursoR/mgest/3reg-nao-linear.html

Tutorial — Ajuste e Interpretação de Regressão Linear com R https://medium.com/data-hackers/tutorial-ajuste-e-interpreta%C3%A7%C3%A3o-de-regress%C3%A3o-linear-com-r-5b23c4ddb72

CURSO - Modelos de regressão não linear https://www.ime.unicamp.br/~cnaber/cursomodelosnaolinearesR.pdf

Aplicação de modelos de regressão linear e não linear em ciências agrárias http://www.leg.ufpr.br/~walmes/cursoR/cnpaf3/cnpaf02trailer.html

Recursos Computacionais Utilizando R http://www.dex.ufla.br/~danielff/meusarquivospdf/RRC0.pdf

Modelos Não Lineares e suas Aplicações https://www.ufjf.br/cursoestatistica/files/2014/04/Modelos-N%c3%a3o-Lineares-e-suas-Aplica%c3%a7%c3%b5es.pdf

Modeloagem - Aprendizado Estatístico http://material.curso-r.com/modelos/

MODELOS DE REGRESSÃO- com apoio computacional https://www.ime.unicamp.br/~cnaber/Livro_MLG.pdf

MODELOS DE REGRESSÃO LINEARES PARA ESTIMATIVA DE PRODUTIVIDADE DA SOJA NO OESTE DO PARANÁ, UTILIZANDO DADOS ESPECTRAIS https://www.scielo.br/pdf/eagri/v30n3/14.pdf

Aplicação do Teste de Farrar-Glauber para Análise de Multicolinearidade Em Regressões Lineares https://ibape-nacional.com.br/biblioteca/wp-content/uploads/2020/02/AO-27-Aplica%C3%A7%C3%A3o-do-Teste-de-Farrar-Glauber-para-An%C3%A1lise.pdf

Regressão Logística: O método estatístico mais utilizado para modelar variáveis categóricas. https://matheusfacure.github.io/2017/02/25/regr-log/

Linear Regression http://rstudio-pubs-static.s3.amazonaws.com/428179_4d1959eb7bda4ed1b9ae5bb86004eae3.html

Regression http://www.mat.ufrgs.br/~giacomo/Softwares/R/Crawley/Crawley%20-%20The%20Book%20R/ch10.pdf

Regressão de Poisson https://smolski.github.io/livroavancado/regressao-de-poisson.html

Tutorial — Ajuste e Interpretação de Regressão Linear com R https://medium.com/data-hackers/tutorial-ajuste-e-interpreta%C3%A7%C3%A3o-de-regress%C3%A3o-linear-com-r-5b23c4ddb72

Estatística Prática para Docentes e Pós-Graduandos de Geraldo Maia Campos 11. Aditividade e homogeneidade http://www.forp.usp.br/restauradora/gmc/gmc_livro/gmc_livro_cap11.html

TESTES DE NORMALIDADE EM ANÁLISES ESTATÍSTICAS: UMA ORIENTAÇÃO PARA PRATICANTES EM CIÊNCIAS DA SAÚDE E ATIVIDADE FÍSICA file:///C:/Users/Jacob/Documents/R/6583-Texto%20do%20artigo-43438-1-10-20171008.pdf

regressão logística https://www.rpubs.com/dudubiologico/545528

Teste para normalidade e homocedasticidade https://biostatistics-uem.github.io/Bio/aula8/teste_normalidade_homocedasticidade.html#:~:text=Em%20an%C3%A1lise%20de%20vari%C3%A2ncia(ANOVA,que%20a%20ANOVA%20tenha%20validade.

A contribuição dos programas de pós-graduação multi e interdisciplinares para a produção de tecnologias no Brasil: uma análise exploratória e preditiva

Métricas e modelo: Vivian Alves (tese UFSC), apoio Viviane Schneider. Análise estatística e Código fonte: Viviane Schneider

inicio em 24/7/2020. última atualização em: Versão 4. Data:04/8/2020.

0.1 Coerência do modelo de análise

1 Testes de normalidade

1.1 Dados não normalizados

1.1.1 Shapiro-Wilk test

1.2 Dados normalizados

1.2.1 Shapiro-Wilk test

1.3 Verificação de ajuste do modelo

1.4 Produção de Tecnologia (IPT= Patentes+Produtos+Aplicativos)

1.4.1 Com outliers

1.4.1.1 Regressão linear

1.4.1.2 Regressão de Poisson

1.4.1.3 Árvore de decisão

1.4.2 Sem outliers

1.4.2.1 Regressão de Poisson

1.4.2.2 Árvore de decisão

1.5 Patentes

1.5.0.1 Regressão de Poisson

1.5.0.2 Árvore de decisão

1.6 Produtos

1.6.0.1 Regressão de Poisson

1.6.0.2 Árvore de decisão

1.7 Aplicativos

1.7.0.1 Regressão de Poisson

1.7.0.2 Árvore de decisão

1.8 Conclusões

2 Agradecimentos

3 Referências