df <- read_csv("ieducar turmas _17042024 17-46.csv")
Rows: 16328 Columns: 6── Column specification ──────────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (2): nm_turma, sgl_turma
dbl (4): sigrh_lotacao, ano_turma, ref_cod_turno, cod_turma
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df$sigrh_lotacao %>% unique() %>% length()
[1] 619
df$ano_turma %>% unique() %>% length()
[1] 1
df$nm_turma %>% unique() %>% length()
[1] 6764
df$cod_turma %>% unique() %>% length()
[1] 16328
df$sgl_turma %>% unique() %>% length()
[1] 374
df1<-df %>% #select(-ano_turma,-sigrh_lotacao) %>%
separate(sgl_turma,into = c("id1","id2","id3","id4","id5"),sep = "_",remove=FALSE)
df1\(id1 %>% unique#[1] "EI"
"3C" "2C" "EM" "EMI"
df1\)id2 %>% unique#[1] “1P” “2P” “B1” “B2” “1” “2” “3” “M1”
“M2” df1\(id3 %>% unique#[1] "M"
"V" "N" "I" df1\)id4 %>% unique#
[1] “A” “B” “C” “D” “E” “F” “G” “H” “I” “J” “K” “L” “N” “M” “O” “P” “Q”
“R” “S” “T” “U” “V” “W” “X” df1$id5 %>% unique#[1] “1P” “2P” “B1”
“B2” “1” “2” “3” “M1” “M2”
df1 %>% #### Testar ids no nome
mutate(score1=map2_lgl(id1, nm_turma, ~ grepl(.x, .y))*1,#Há registro de modalidade?
score2=map2_lgl(id2, nm_turma, ~ grepl(.x, .y))*1,# há registro de seriação padronizado?
score3=map2_lgl(id3, nm_turma, ~ grepl(.x, .y))*1,# há registro de turno?
score4=map2_lgl(id4, nm_turma, ~ grepl(.x, .y))*1,# há registro alfabetico de turma
score5=map2_lgl(id5, nm_turma, ~ grepl(.x, .y))*1,# há registro de ... padronizado?
scoresimples=score1+score2+score3+score4+score5,### pontuação total de matching
scoresponderado=score1*2+score2+score3+score4*3+score5### pontuação total de matching
) %>% group_by(scoresponderado,scoresimples)%>%summarise(n=n())%>%gather("criterio","score",-n) %>%
ggplot(aes(score,n))+geom_col(alpha=0.5)+facet_wrap(~criterio,ncol=1)#+geom_text(aes(label=n))
`summarise()` has grouped output by 'scoresponderado'. You can override using the `.groups` argument.

df1 %>% #### Testar ids no nome
mutate(score1=map2_lgl(id1, nm_turma, ~ grepl(.x, .y))*1,#Há registro de modalidade?
score2=map2_lgl(id2, nm_turma, ~ grepl(.x, .y))*1,# há registro de seriação padronizado?
score3=map2_lgl(id3, nm_turma, ~ grepl(.x, .y))*1,# há registro de turno?
score4=map2_lgl(id4, nm_turma, ~ grepl(.x, .y))*1,# há registro alfabetico de turma
score5=map2_lgl(id5, nm_turma, ~ grepl(.x, .y))*1,# há registro de ... padronizado?
scoresimples=score1+score2+score3+score4+score5,### pontuação total de matching
scoresponderado=score1*2+score2+score3+score4*3+score5### pontuação total de matching
) ->df2
library(janitor)
df2%>% tabyl(score1) #Há registro de modalidade?
score1 n percent
0 14335 0.8779397
1 1993 0.1220603
df2%>% tabyl(score2) #Há registro de seriação?
score2 n percent
0 13112 0.8030377
1 3216 0.1969623
df2%>% tabyl(score3) #Há registro de turno?
score3 n percent
0 12626 0.7732729
1 3702 0.2267271
df2%>% tabyl(score4) #Há registro de alfabetico?
score4 n percent
0 11914 0.7296668
1 4414 0.2703332
df2%>% tabyl(score5) #Há registro de ...?
score5 n percent
0 3350 0.205169
1 12978 0.794831
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KYGBge3J9DQpkZiA8LSByZWFkX2NzdigiaWVkdWNhciB0dXJtYXMgXzE3MDQyMDI0IDE3LTQ2LmNzdiIpDQpvcHRpb25zKHNjaXBlbiA9IDk5OSkNCmBgYA0KYGBge3J9DQpkZiRzaWdyaF9sb3RhY2FvICU+JSB1bmlxdWUoKSAlPiUgbGVuZ3RoKCkNCm9wZQ0KYGBgDQpgYGB7cn0NCmRmJGFub190dXJtYSAlPiUgdW5pcXVlKCkgJT4lIGxlbmd0aCgpDQpgYGANCmBgYHtyfQ0KZGYkbm1fdHVybWEgJT4lIHVuaXF1ZSgpICU+JSBsZW5ndGgoKQ0KYGBgDQpgYGB7cn0NCmRmJGNvZF90dXJtYSAlPiUgdW5pcXVlKCkgJT4lIGxlbmd0aCgpDQpgYGANCmBgYHtyfQ0KZGYkc2dsX3R1cm1hICU+JSB1bmlxdWUoKSAlPiUgbGVuZ3RoKCkNCmBgYA0KYGBge3J9DQoNCg0KZGYxPC1kZiAlPiUgI3NlbGVjdCgtYW5vX3R1cm1hLC1zaWdyaF9sb3RhY2FvKSAlPiUgDQogIHNlcGFyYXRlKHNnbF90dXJtYSxpbnRvID0gYygiaWQxIiwiaWQyIiwiaWQzIiwiaWQ0IiwiaWQ1Iiksc2VwID0gIl8iLHJlbW92ZT1GQUxTRSkgDQpgYGANCmRmMSRpZDEgJT4lIHVuaXF1ZSNbMV0gIkVJIiAgIjNDIiAgIjJDIiAgIkVNIiAgIkVNSSINCmRmMSRpZDIgJT4lIHVuaXF1ZSNbMV0gIjFQIiAiMlAiICJCMSIgIkIyIiAiMSIgICIyIiAgIjMiICAiTTEiICJNMiINCmRmMSRpZDMgJT4lIHVuaXF1ZSNbMV0gIk0iICJWIiAiTiIgIkkiDQpkZjEkaWQ0ICU+JSB1bmlxdWUjIFsxXSAiQSIgIkIiICJDIiAiRCIgIkUiICJGIiAiRyIgIkgiICJJIiAiSiIgIksiICJMIiAiTiIgIk0iICJPIiAiUCIgIlEiICJSIiAiUyIgIlQiICJVIiAiViIgIlciICJYIg0KZGYxJGlkNSAlPiUgdW5pcXVlI1sxXSAiMVAiICIyUCIgIkIxIiAiQjIiICIxIiAgIjIiICAiMyIgICJNMSIgIk0yIg0KYGBge3J9DQpkZjEgJT4lICAgIyMjIyBUZXN0YXIgaWRzIG5vIG5vbWUNCiAgbXV0YXRlKHNjb3JlMT1tYXAyX2xnbChpZDEsIG5tX3R1cm1hLCB+IGdyZXBsKC54LCAueSkpKjEsI0jDoSByZWdpc3RybyBkZSBtb2RhbGlkYWRlPw0KICAgICAgICAgc2NvcmUyPW1hcDJfbGdsKGlkMiwgbm1fdHVybWEsIH4gZ3JlcGwoLngsIC55KSkqMSwjIGjDoSByZWdpc3RybyBkZSBzZXJpYcOnw6NvIHBhZHJvbml6YWRvPw0KICAgICAgICAgc2NvcmUzPW1hcDJfbGdsKGlkMywgbm1fdHVybWEsIH4gZ3JlcGwoLngsIC55KSkqMSwjIGjDoSByZWdpc3RybyBkZSB0dXJubz8NCiAgICAgICAgIHNjb3JlND1tYXAyX2xnbChpZDQsIG5tX3R1cm1hLCB+IGdyZXBsKC54LCAueSkpKjEsIyBow6EgcmVnaXN0cm8gYWxmYWJldGljbyBkZSB0dXJtYQ0KICAgICAgICAgc2NvcmU1PW1hcDJfbGdsKGlkNSwgbm1fdHVybWEsIH4gZ3JlcGwoLngsIC55KSkqMSwjIGjDoSByZWdpc3RybyBkZSAuLi4gcGFkcm9uaXphZG8/DQogICAgICAgICBzY29yZXNpbXBsZXM9c2NvcmUxK3Njb3JlMitzY29yZTMrc2NvcmU0K3Njb3JlNSwjIyMgcG9udHVhw6fDo28gdG90YWwgZGUgbWF0Y2hpbmcNCiAgICAgICAgIHNjb3Jlc3BvbmRlcmFkbz1zY29yZTEqMitzY29yZTIrc2NvcmUzK3Njb3JlNCozK3Njb3JlNSMjIyBwb250dWHDp8OjbyB0b3RhbCBkZSBtYXRjaGluZw0KICApICMlPiUgI2ZpbHRlcihzY29yZXNpbXBsZXM9PW1heChzY29yZXNpbXBsZXMpKSANCmBgYA0KYGBge3J9DQoNCmRmMSAlPiUgICAjIyMjIFRlc3RhciBpZHMgbm8gbm9tZQ0KICBtdXRhdGUoc2NvcmUxPW1hcDJfbGdsKGlkMSwgbm1fdHVybWEsIH4gZ3JlcGwoLngsIC55KSkqMSwjSMOhIHJlZ2lzdHJvIGRlIG1vZGFsaWRhZGU/DQogICAgICAgICBzY29yZTI9bWFwMl9sZ2woaWQyLCBubV90dXJtYSwgfiBncmVwbCgueCwgLnkpKSoxLCMgaMOhIHJlZ2lzdHJvIGRlIHNlcmlhw6fDo28gcGFkcm9uaXphZG8/DQogICAgICAgICBzY29yZTM9bWFwMl9sZ2woaWQzLCBubV90dXJtYSwgfiBncmVwbCgueCwgLnkpKSoxLCMgaMOhIHJlZ2lzdHJvIGRlIHR1cm5vPw0KICAgICAgICAgc2NvcmU0PW1hcDJfbGdsKGlkNCwgbm1fdHVybWEsIH4gZ3JlcGwoLngsIC55KSkqMSwjIGjDoSByZWdpc3RybyBhbGZhYmV0aWNvIGRlIHR1cm1hDQogICAgICAgICBzY29yZTU9bWFwMl9sZ2woaWQ1LCBubV90dXJtYSwgfiBncmVwbCgueCwgLnkpKSoxLCMgaMOhIHJlZ2lzdHJvIGRlIC4uLiBwYWRyb25pemFkbz8NCiAgICAgICAgIHNjb3Jlc2ltcGxlcz1zY29yZTErc2NvcmUyK3Njb3JlMytzY29yZTQrc2NvcmU1LCMjIyBwb250dWHDp8OjbyB0b3RhbCBkZSBtYXRjaGluZw0KICAgICAgICAgc2NvcmVzcG9uZGVyYWRvPXNjb3JlMSoyK3Njb3JlMitzY29yZTMrc2NvcmU0KjMrc2NvcmU1IyMjIHBvbnR1YcOnw6NvIHRvdGFsIGRlIG1hdGNoaW5nDQogICkgICU+JSBncm91cF9ieShzY29yZXNwb25kZXJhZG8sc2NvcmVzaW1wbGVzKSU+JXN1bW1hcmlzZShuPW4oKSklPiVnYXRoZXIoImNyaXRlcmlvIiwic2NvcmUiLC1uKSAgJT4lIA0KICBnZ3Bsb3QoYWVzKHNjb3JlLG4pKStnZW9tX2NvbChhbHBoYT0wLjUpK2ZhY2V0X3dyYXAofmNyaXRlcmlvLG5jb2w9MSkjK2dlb21fdGV4dChhZXMobGFiZWw9bikpDQpgYGAgICAgICANCg0KDQpgYGB7cn0NCmRmMSAlPiUgICAjIyMjIFRlc3RhciBpZHMgbm8gbm9tZQ0KICBtdXRhdGUoc2NvcmUxPW1hcDJfbGdsKGlkMSwgbm1fdHVybWEsIH4gZ3JlcGwoLngsIC55KSkqMSwjSMOhIHJlZ2lzdHJvIGRlIG1vZGFsaWRhZGU/DQogICAgICAgICBzY29yZTI9bWFwMl9sZ2woaWQyLCBubV90dXJtYSwgfiBncmVwbCgueCwgLnkpKSoxLCMgaMOhIHJlZ2lzdHJvIGRlIHNlcmlhw6fDo28gcGFkcm9uaXphZG8/DQogICAgICAgICBzY29yZTM9bWFwMl9sZ2woaWQzLCBubV90dXJtYSwgfiBncmVwbCgueCwgLnkpKSoxLCMgaMOhIHJlZ2lzdHJvIGRlIHR1cm5vPw0KICAgICAgICAgc2NvcmU0PW1hcDJfbGdsKGlkNCwgbm1fdHVybWEsIH4gZ3JlcGwoLngsIC55KSkqMSwjIGjDoSByZWdpc3RybyBhbGZhYmV0aWNvIGRlIHR1cm1hDQogICAgICAgICBzY29yZTU9bWFwMl9sZ2woaWQ1LCBubV90dXJtYSwgfiBncmVwbCgueCwgLnkpKSoxLCMgaMOhIHJlZ2lzdHJvIGRlIC4uLiBwYWRyb25pemFkbz8NCiAgICAgICAgIHNjb3Jlc2ltcGxlcz1zY29yZTErc2NvcmUyK3Njb3JlMytzY29yZTQrc2NvcmU1LCMjIyBwb250dWHDp8OjbyB0b3RhbCBkZSBtYXRjaGluZw0KICAgICAgICAgc2NvcmVzcG9uZGVyYWRvPXNjb3JlMSoyK3Njb3JlMitzY29yZTMrc2NvcmU0KjMrc2NvcmU1IyMjIHBvbnR1YcOnw6NvIHRvdGFsIGRlIG1hdGNoaW5nDQogICkgIC0+ZGYyDQpgYGANCg0KDQoNCmBgYHtyfQ0KbGlicmFyeShqYW5pdG9yKQ0KZGYyJT4lIHRhYnlsKHNjb3JlMSkgICNIw6EgcmVnaXN0cm8gZGUgbW9kYWxpZGFkZT8NCmBgYA0KDQoNCg0KYGBge3J9DQpkZjIlPiUgdGFieWwoc2NvcmUyKSAgI0jDoSByZWdpc3RybyBkZSBzZXJpYcOnw6NvPw0KYGBgDQoNCg0KDQpgYGB7cn0NCmRmMiU+JSB0YWJ5bChzY29yZTMpICAjSMOhIHJlZ2lzdHJvIGRlIHR1cm5vPw0KYGBgDQoNCg0KDQpgYGB7cn0NCmRmMiU+JSB0YWJ5bChzY29yZTQpICAjSMOhIHJlZ2lzdHJvIGRlIGFsZmFiZXRpY28/DQpgYGANCg0KDQoNCmBgYHtyfQ0KZGYyJT4lIHRhYnlsKHNjb3JlNSkgICNIw6EgcmVnaXN0cm8gZGUgLi4uPw0KDQpgYGANCg0KYGBge3J9DQp3cml0ZS5jc3YoZGYyLCJ0ZXN0ZV9ub21lX3R1cm1hLmNzdiIpDQpgYGANCiANCiA=