Este relatório apresenta uma análise das estatísticas de criminalidade produzidas pelo Instituto de Segurança Pública (ISP) do estado do Rio de Janeiro para o ano de 2020.
df = yearly_stats(by=‘municipality’)
df = df %>% filter(ano == 2020) %>% select(-fase, -pessoas_desaparecidas, -encontro_cadaver, -pol_militares_mortos_serv, -pol_civis_mortos_serv, -registro_ocorrencias, -encontro_ossada, -regiao, -ano) %>% mutate(across(everything(), ~ ifelse(is.na(.), median(., na.rm = TRUE), .)))
df %>% skim()
df1 = df %>% select(!fmun) %>% mutate(fmun_cod = factor(fmun_cod))
pca_rec <- recipe(~ ., data = df1) %>% update_role(fmun_cod, new_role = “ID”) %>% step_normalize(all_predictors()) %>% step_pca(all_predictors(), num_comp = 2, id = “pca”)
pca_rec
pca_estimates <- prep(pca_rec)
features_2d <- pca_estimates %>% bake(new_data = NULL)
features_2d %>% slice_head(n = 5)
pca_estimates %>% tidy(id = “pca”, type = “variance”) %>% filter(str_detect(terms, “percent”))
theme_set(theme_light())
pca_estimates %>% tidy(id = “pca”, type = “variance”) %>% filter(terms == “percent variance”) %>% ggplot(mapping = aes(x = component, y = value)) + geom_col(fill = “midnightblue”, alpha = 0.7) + ylab(“% of total variance”) + xlab(“Principal Component”) + ggtitle(“Variance Explained by Each Principal Component”) + scale_y_continuous(labels = scales::percent_format(scale = 1)) + theme_minimal() + theme( plot.title = element_text(hjust = 0.5, size = 14, face = “bold”), axis.title.x = element_text(size = 12), axis.title.y = element_text(size = 12), axis.text.x = element_text(size = 10), axis.text.y = element_text(size = 10) )
features_2d %>% ggplot(mapping = aes(x = PC1, y = PC2)) + geom_point(size = 2, color = “midnightblue”)
seeds_features<- recipe(~ ., data = df1) %>% step_rm(fmun_cod) %>% step_normalize(all_predictors()) %>% prep() %>% bake(new_data = NULL)
seeds_features %>% slice_head(n = 5)
set.seed(2056) # Create 10 models with 1 to 10 clusters kclusts <- tibble(k = 1:10) %>% mutate( model = map(k, ~ kmeans(x = seeds_features, centers = .x, nstart = 20)), glanced = map(model, glance)) %>% unnest(cols = c(glanced))