library(tidymodels)
library(nnet)
library(vip)
library(recipes)
library(rsample)
library(parsnip)
library(workflows)
library(dials)
library(tune)
library(yardstick)
library(readr)
library(tidyverse)
library(nnet)
library(vip)

Juntar dados



#SSEH44 <- readRDS("D:/danilo.dantas/Weeberb/Dataframes/dados/SSEH/2_RDS/SSEH44.rds")
#SSEH44$CODIGO_ESCOLA<-as.character(SSEH44$CODIGO_ESCOLA)
#SSEH44$Ano <-SSEH44$Ano %>% as.numeric()
#df2 %>% group_by(NU_ANO,CO_ESCOLA) %>% 
  summarise(Nota_Red=mean(Nota_Red),
            Nota_objetiva=mean(Nota_objetiva))->t
#t<-inner_join(SSEH44,t,by=c("Ano"="NU_ANO","CODIGO_ESCOLA"="CO_ESCOLA"))
# Instale se necessário
#rm(df2,SSEH44)
#write.csv(t,"test_nn.csv")
#dados <- read_csv("test_nn.csv")

1) Limpar dados

t$homicidio %>% str_replace(",",".") %>% as.numeric()->t$homicidio
t$idade_90_94 %>% str_replace(",",".") %>% as.numeric()->t$idade_90_94
t$idade_90_94[is.na(t$idade_90_94)]<-0
t$idade_95_99 %>% str_replace(",",".") %>% as.numeric()->t$idade_95_99
t$idade_95_99[is.na(t$idade_95_99)]<-0
t$idade_100 %>% str_replace(",",".") %>% as.numeric()->t$idade_100
t$idade_100[is.na(t$idade_100)]<-0
homem_90_94
t$homem_85_89 %>% str_replace(",",".") %>% as.numeric()->t$homem_85_89
t$homem_85_89[is.na(t$homem_85_89)]#<-0
t$homem_90_94 %>% str_replace(",",".") %>% as.numeric()->t$homem_90_94
t$homem_90_94[is.na(t$homem_90_94)]<-0
t$homem_95_99 %>% str_replace(",",".") %>% as.numeric()->t$homem_95_99
t$homem_95_99[is.na(t$homem_95_99)]<-0
t$mulher_85_89 %>% str_replace(",",".") %>% as.numeric()->t$mulher_85_89
t$mulher_85_89[is.na(t$mulher_85_89)]<-0
t$mulher_90_94 %>% str_replace(",",".") %>% as.numeric()->t$mulher_90_94
t$mulher_90_94[is.na(t$mulher_90_94)]<-0
         
t$homem_100 %>% str_replace(",",".") %>% as.numeric()->t$homem_100
t$homem_100[is.na(t$homem_100)]<-0

t$mulher_95_99 %>% str_replace(",",".") %>% as.numeric()->t$mulher_95_99
t$mulher_95_99[is.na(t$mulher_95_99)]<-0
t$mulher_100 %>% str_replace(",",".") %>% as.numeric()->t$mulher_100
t$mulher_100[is.na(t$mulher_100)]<-0
t$pop_amarela %>% str_replace(",",".") %>% as.numeric()->t$pop_amarela
t$pop_amarela[is.na(t$pop_amarela)]<-0
t$pop_indigena %>% str_replace(",",".") %>% as.numeric()->t$pop_indigena
t$pop_indigena[is.na(t$pop_indigena)]<-0






t$CODIGO_ESCOLA
dados_limpos <- t %>%
  mutate()
  mutate(across(where(is.character), as.factor)) %>%
  mutate(across(where(is.factor), ~fct_na_value_to_level(.))) %>%
  mutate(across(where(is.numeric), ~ifelse(is.na(.), mean(., na.rm = TRUE), .))) %>% 
  select(-CODIGO_ESCOLA,-id_municipio)

2) Padronizar numéricos com scale (e dummies para categorias?)

dummy não deu muito certo, preciso melhorar a limpeza

receita <- recipe(Nota_objetiva ~ ., data = dados_limpos) %>%
  #update_role(...1, CODIGO_ESCOLA, id_municipio, new_role = "ID") %>%
  #step_rm(...1, CODIGO_ESCOLA, id_municipio) %>%
  step_zv(all_predictors()) %>%      # remove colunas com variância zero
  step_impute_mean(all_numeric(), -all_outcomes()) %>%
  #step_dummy(all_nominal(), one_hot = TRUE) %>%
  step_normalize(all_numeric(), -all_outcomes())
# Separar treino/teste
set.seed(123)
split <- initial_split(dados_limpos, prop = 0.8, strata = Nota_objetiva)
train <- training(split)
test <- testing(split)

5) Ranking das variáveis



#library(DALEX)

# Preparar os dados de entrada
#dados_juice <- juice(prep(recipe(Nota_objetiva ~ ., data = dados_limpos)))

# Criar o explicador para o modelo
#explainer_nn <- explain(
#  model = modelo_final$fit$fit,         # objeto de classe nnet
#  data = select(dados_juice, -Nota_objetiva),
#  y = dados_juice$Nota_objetiva,
#  label = "Rede Neural nnet"
#)
#importancia <- model_parts(explainer_nn, loss_function = loss_root_mean_square)

# Visualizar
#importancia %>% data.frame %>% arrange(dropout_loss) %>% filter(!permutation==0) %>%  plot() 
vip(modelo_final$fit$fit, num_features = 40)

NA
NA

6) Cross-validação final

LS0tDQp0aXRsZTogIlJlZGUgTmV1cmFsIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQpkYXRlOiAiMjAyNS0wNS0xNSINCi0tLQ0KYGBge3J9DQpsaWJyYXJ5KHRpZHltb2RlbHMpDQpsaWJyYXJ5KG5uZXQpDQpsaWJyYXJ5KHZpcCkNCmxpYnJhcnkocmVjaXBlcykNCmxpYnJhcnkocnNhbXBsZSkNCmxpYnJhcnkocGFyc25pcCkNCmxpYnJhcnkod29ya2Zsb3dzKQ0KbGlicmFyeShkaWFscykNCmxpYnJhcnkodHVuZSkNCmxpYnJhcnkoeWFyZHN0aWNrKQ0KbGlicmFyeShyZWFkcikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShubmV0KQ0KbGlicmFyeSh2aXApDQpgYGANCg0KSnVudGFyIGRhZG9zDQpgYGB7cn0NCg0KDQojU1NFSDQ0IDwtIHJlYWRSRFMoIkQ6L2Rhbmlsby5kYW50YXMvV2VlYmVyYi9EYXRhZnJhbWVzL2RhZG9zL1NTRUgvMl9SRFMvU1NFSDQ0LnJkcyIpDQojU1NFSDQ0JENPRElHT19FU0NPTEE8LWFzLmNoYXJhY3RlcihTU0VINDQkQ09ESUdPX0VTQ09MQSkNCiNTU0VINDQkQW5vIDwtU1NFSDQ0JEFubyAlPiUgYXMubnVtZXJpYygpDQojZGYyICU+JSBncm91cF9ieShOVV9BTk8sQ09fRVNDT0xBKSAlPiUgDQogIHN1bW1hcmlzZShOb3RhX1JlZD1tZWFuKE5vdGFfUmVkKSwNCiAgICAgICAgICAgIE5vdGFfb2JqZXRpdmE9bWVhbihOb3RhX29iamV0aXZhKSktPnQNCiN0PC1pbm5lcl9qb2luKFNTRUg0NCx0LGJ5PWMoIkFubyI9Ik5VX0FOTyIsIkNPRElHT19FU0NPTEEiPSJDT19FU0NPTEEiKSkNCiMgSW5zdGFsZSBzZSBuZWNlc3PDoXJpbw0KI3JtKGRmMixTU0VINDQpDQojd3JpdGUuY3N2KHQsInRlc3Rfbm4uY3N2IikNCiNkYWRvcyA8LSByZWFkX2NzdigidGVzdF9ubi5jc3YiKQ0KDQpgYGANCiMgMSkgTGltcGFyIGRhZG9zDQpgYGB7cn0NCnQkaG9taWNpZGlvICU+JSBzdHJfcmVwbGFjZSgiLCIsIi4iKSAlPiUgYXMubnVtZXJpYygpLT50JGhvbWljaWRpbw0KdCRpZGFkZV85MF85NCAlPiUgc3RyX3JlcGxhY2UoIiwiLCIuIikgJT4lIGFzLm51bWVyaWMoKS0+dCRpZGFkZV85MF85NA0KdCRpZGFkZV85MF85NFtpcy5uYSh0JGlkYWRlXzkwXzk0KV08LTANCnQkaWRhZGVfOTVfOTkgJT4lIHN0cl9yZXBsYWNlKCIsIiwiLiIpICU+JSBhcy5udW1lcmljKCktPnQkaWRhZGVfOTVfOTkNCnQkaWRhZGVfOTVfOTlbaXMubmEodCRpZGFkZV85NV85OSldPC0wDQp0JGlkYWRlXzEwMCAlPiUgc3RyX3JlcGxhY2UoIiwiLCIuIikgJT4lIGFzLm51bWVyaWMoKS0+dCRpZGFkZV8xMDANCnQkaWRhZGVfMTAwW2lzLm5hKHQkaWRhZGVfMTAwKV08LTANCmhvbWVtXzkwXzk0DQp0JGhvbWVtXzg1Xzg5ICU+JSBzdHJfcmVwbGFjZSgiLCIsIi4iKSAlPiUgYXMubnVtZXJpYygpLT50JGhvbWVtXzg1Xzg5DQp0JGhvbWVtXzg1Xzg5W2lzLm5hKHQkaG9tZW1fODVfODkpXSM8LTANCnQkaG9tZW1fOTBfOTQgJT4lIHN0cl9yZXBsYWNlKCIsIiwiLiIpICU+JSBhcy5udW1lcmljKCktPnQkaG9tZW1fOTBfOTQNCnQkaG9tZW1fOTBfOTRbaXMubmEodCRob21lbV85MF85NCldPC0wDQp0JGhvbWVtXzk1Xzk5ICU+JSBzdHJfcmVwbGFjZSgiLCIsIi4iKSAlPiUgYXMubnVtZXJpYygpLT50JGhvbWVtXzk1Xzk5DQp0JGhvbWVtXzk1Xzk5W2lzLm5hKHQkaG9tZW1fOTVfOTkpXTwtMA0KdCRtdWxoZXJfODVfODkgJT4lIHN0cl9yZXBsYWNlKCIsIiwiLiIpICU+JSBhcy5udW1lcmljKCktPnQkbXVsaGVyXzg1Xzg5DQp0JG11bGhlcl84NV84OVtpcy5uYSh0JG11bGhlcl84NV84OSldPC0wDQp0JG11bGhlcl85MF85NCAlPiUgc3RyX3JlcGxhY2UoIiwiLCIuIikgJT4lIGFzLm51bWVyaWMoKS0+dCRtdWxoZXJfOTBfOTQNCnQkbXVsaGVyXzkwXzk0W2lzLm5hKHQkbXVsaGVyXzkwXzk0KV08LTANCiAgICAgICAgIA0KdCRob21lbV8xMDAgJT4lIHN0cl9yZXBsYWNlKCIsIiwiLiIpICU+JSBhcy5udW1lcmljKCktPnQkaG9tZW1fMTAwDQp0JGhvbWVtXzEwMFtpcy5uYSh0JGhvbWVtXzEwMCldPC0wDQoNCnQkbXVsaGVyXzk1Xzk5ICU+JSBzdHJfcmVwbGFjZSgiLCIsIi4iKSAlPiUgYXMubnVtZXJpYygpLT50JG11bGhlcl85NV85OQ0KdCRtdWxoZXJfOTVfOTlbaXMubmEodCRtdWxoZXJfOTVfOTkpXTwtMA0KdCRtdWxoZXJfMTAwICU+JSBzdHJfcmVwbGFjZSgiLCIsIi4iKSAlPiUgYXMubnVtZXJpYygpLT50JG11bGhlcl8xMDANCnQkbXVsaGVyXzEwMFtpcy5uYSh0JG11bGhlcl8xMDApXTwtMA0KdCRwb3BfYW1hcmVsYSAlPiUgc3RyX3JlcGxhY2UoIiwiLCIuIikgJT4lIGFzLm51bWVyaWMoKS0+dCRwb3BfYW1hcmVsYQ0KdCRwb3BfYW1hcmVsYVtpcy5uYSh0JHBvcF9hbWFyZWxhKV08LTANCnQkcG9wX2luZGlnZW5hICU+JSBzdHJfcmVwbGFjZSgiLCIsIi4iKSAlPiUgYXMubnVtZXJpYygpLT50JHBvcF9pbmRpZ2VuYQ0KdCRwb3BfaW5kaWdlbmFbaXMubmEodCRwb3BfaW5kaWdlbmEpXTwtMA0KDQoNCg0KDQoNCg0KdCRDT0RJR09fRVNDT0xBDQpkYWRvc19saW1wb3MgPC0gdCAlPiUNCiAgbXV0YXRlKCkNCiAgbXV0YXRlKGFjcm9zcyh3aGVyZShpcy5jaGFyYWN0ZXIpLCBhcy5mYWN0b3IpKSAlPiUNCiAgbXV0YXRlKGFjcm9zcyh3aGVyZShpcy5mYWN0b3IpLCB+ZmN0X25hX3ZhbHVlX3RvX2xldmVsKC4pKSkgJT4lDQogIG11dGF0ZShhY3Jvc3Mod2hlcmUoaXMubnVtZXJpYyksIH5pZmVsc2UoaXMubmEoLiksIG1lYW4oLiwgbmEucm0gPSBUUlVFKSwgLikpKSAlPiUgDQogIHNlbGVjdCgtQ09ESUdPX0VTQ09MQSwtaWRfbXVuaWNpcGlvKQ0KDQpgYGANCiMgMikgUGFkcm9uaXphciBudW3DqXJpY29zIGNvbSBzY2FsZSAoZSBkdW1taWVzIHBhcmEgY2F0ZWdvcmlhcz8pDQoNCmR1bW15IG7Do28gZGV1IG11aXRvIGNlcnRvLCBwcmVjaXNvIG1lbGhvcmFyIGEgbGltcGV6YQ0KYGBge3J9DQpyZWNlaXRhIDwtIHJlY2lwZShOb3RhX29iamV0aXZhIH4gLiwgZGF0YSA9IGRhZG9zX2xpbXBvcykgJT4lDQogICN1cGRhdGVfcm9sZSguLi4xLCBDT0RJR09fRVNDT0xBLCBpZF9tdW5pY2lwaW8sIG5ld19yb2xlID0gIklEIikgJT4lDQogICNzdGVwX3JtKC4uLjEsIENPRElHT19FU0NPTEEsIGlkX211bmljaXBpbykgJT4lDQogIHN0ZXBfenYoYWxsX3ByZWRpY3RvcnMoKSkgJT4lICAgICAgIyByZW1vdmUgY29sdW5hcyBjb20gdmFyacOibmNpYSB6ZXJvDQogIHN0ZXBfaW1wdXRlX21lYW4oYWxsX251bWVyaWMoKSwgLWFsbF9vdXRjb21lcygpKSAlPiUNCiAgI3N0ZXBfZHVtbXkoYWxsX25vbWluYWwoKSwgb25lX2hvdCA9IFRSVUUpICU+JQ0KICBzdGVwX25vcm1hbGl6ZShhbGxfbnVtZXJpYygpLCAtYWxsX291dGNvbWVzKCkpDQpgYGANCg0KDQpgYGB7cn0NCiMgU2VwYXJhciB0cmVpbm8vdGVzdGUNCnNldC5zZWVkKDEyMykNCnNwbGl0IDwtIGluaXRpYWxfc3BsaXQoZGFkb3NfbGltcG9zLCBwcm9wID0gMC44LCBzdHJhdGEgPSBOb3RhX29iamV0aXZhKQ0KdHJhaW4gPC0gdHJhaW5pbmcoc3BsaXQpDQp0ZXN0IDwtIHRlc3Rpbmcoc3BsaXQpDQpgYGANCg0KDQoNCg0KDQojIDMpIEdyaWQgc2VhcmNoDQpgYGB7cn0NCm1vZGVsb19ubiA8LSBtbHAoaGlkZGVuX3VuaXRzID0gdHVuZSgpLCBwZW5hbHR5ID0gdHVuZSgpLCBlcG9jaHMgPSAxMDApICU+JQ0KICBzZXRfZW5naW5lKCJubmV0IikgJT4lDQogIHNldF9tb2RlKCJyZWdyZXNzaW9uIikNCg0Kd2YgPC0gd29ya2Zsb3coKSAlPiUNCiAgYWRkX3JlY2lwZShyZWNlaXRhKSAlPiUNCiAgYWRkX21vZGVsKG1vZGVsb19ubikNCg0KZ3JpZCA8LSBncmlkX3JlZ3VsYXIoDQogIGhpZGRlbl91bml0cyhyYW5nZSA9IGMoMSwgNSkpLA0KICBwZW5hbHR5KHJhbmdlID0gYygtNCwgLTEpKSwNCiAgbGV2ZWxzID0gMw0KKQ0KDQpjdl9mb2xkcyA8LSB2Zm9sZF9jdih0cmFpbiwgdiA9IDUpDQoNCmFqdXN0ZSA8LSB0dW5lX2dyaWQoDQogIHdmLA0KICByZXNhbXBsZXMgPSBjdl9mb2xkcywNCiAgZ3JpZCA9IGdyaWQsDQogIG1ldHJpY3MgPSBtZXRyaWNfc2V0KHJtc2UsIHJzcSkNCikNCmBgYA0KDQoNClNhbHZhciBSZXN1bHRhZG9zDQpgYGB7cn0NCndyaXRlX3JkcyhhanVzdGUsIm1vZGVsb3MgYWp1c3RhZG9zLnJkcyIpDQpgYGANCg0KDQoNCiNzZWxlw6fDo28gZGUgbW9kZWxvcw0KYGBge3J9DQptZWxob3IgPC0gc2VsZWN0X2Jlc3QoYWp1c3RlLCBtZXRyaWMgPSAicm1zZSIpDQoNCmBgYA0KYGBge3J9DQojIDQpIFJvZGFyIG8gbW9kZWxvIGZpbmFsDQptb2RlbG9fZmluYWwgPC0gZmluYWxpemVfd29ya2Zsb3cod2YsIG1lbGhvcikgJT4lDQogIGZpdChkYXRhID0gdHJhaW4pDQpgYGANCiMgNSkgUmFua2luZyBkYXMgdmFyacOhdmVpcw0KYGBge3J9DQoNCg0KI2xpYnJhcnkoREFMRVgpDQoNCiMgUHJlcGFyYXIgb3MgZGFkb3MgZGUgZW50cmFkYQ0KI2RhZG9zX2p1aWNlIDwtIGp1aWNlKHByZXAocmVjaXBlKE5vdGFfb2JqZXRpdmEgfiAuLCBkYXRhID0gZGFkb3NfbGltcG9zKSkpDQoNCiMgQ3JpYXIgbyBleHBsaWNhZG9yIHBhcmEgbyBtb2RlbG8NCiNleHBsYWluZXJfbm4gPC0gZXhwbGFpbigNCiMgIG1vZGVsID0gbW9kZWxvX2ZpbmFsJGZpdCRmaXQsICAgICAgICAgIyBvYmpldG8gZGUgY2xhc3NlIG5uZXQNCiMgIGRhdGEgPSBzZWxlY3QoZGFkb3NfanVpY2UsIC1Ob3RhX29iamV0aXZhKSwNCiMgIHkgPSBkYWRvc19qdWljZSROb3RhX29iamV0aXZhLA0KIyAgbGFiZWwgPSAiUmVkZSBOZXVyYWwgbm5ldCINCiMpDQojaW1wb3J0YW5jaWEgPC0gbW9kZWxfcGFydHMoZXhwbGFpbmVyX25uLCBsb3NzX2Z1bmN0aW9uID0gbG9zc19yb290X21lYW5fc3F1YXJlKQ0KDQojIFZpc3VhbGl6YXINCiNpbXBvcnRhbmNpYSAlPiUgZGF0YS5mcmFtZSAlPiUgYXJyYW5nZShkcm9wb3V0X2xvc3MpICU+JSBmaWx0ZXIoIXBlcm11dGF0aW9uPT0wKSAlPiUgIHBsb3QoKSANCnZpcChtb2RlbG9fZmluYWwkZml0JGZpdCwgbnVtX2ZlYXR1cmVzID0gNDApDQoNCg0KYGBgDQoNCg0KDQoNCiMgNikgQ3Jvc3MtdmFsaWRhw6fDo28gZmluYWwNCg0KDQpgYGB7cn0NCg0KY3ZfZmluYWwgPC0gZml0X3Jlc2FtcGxlcygNCiAgZmluYWxpemVfd29ya2Zsb3cod2YsIG1lbGhvciksDQogIHJlc2FtcGxlcyA9IGN2X2ZvbGRzLA0KICBtZXRyaWNzID0gbWV0cmljX3NldChybXNlLCByc3EpLA0KICBjb250cm9sID0gY29udHJvbF9yZXNhbXBsZXMoc2F2ZV9wcmVkID0gVFJVRSkNCikNCg0KY3ZfZmluYWwgJT4lDQogIGNvbGxlY3RfcHJlZGljdGlvbnMoKSAlPiUNCiAgZ2dwbG90KGFlcyh4ID0gLnByZWQsIHkgPSBOb3RhX29iamV0aXZhKSkgKw0KICBnZW9tX3BvaW50KGFscGhhID0gMC4zKSArDQogIGdlb21fYWJsaW5lKGNvbG9yID0gInJlZCIsIGxpbmV0eXBlID0gImRhc2hlZCIpICsNCiAgZmFjZXRfd3JhcCh+aWQpICsNCiAgbGFicyh0aXRsZSA9ICJQcmVkIHZzIFJlYWwgcG9yIEZvbGQiKQ0KY29sbGVjdF9tZXRyaWNzKGN2X2ZpbmFsKQ0KDQpgYGANCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==