Mostrar/ocultar código
# format:
# html:
# theme: cerulean
# execute:
# echo: false
# warning: false
# message: false
# editor: visual03/02/2026
# format:
# html:
# theme: cerulean
# execute:
# echo: false
# warning: false
# message: false
# editor: visuallibrary(tidyverse)
library(skimr)
library(readr)
library(gt)df <- read_csv("churn_clean.csv")
View(df)glimpse(df)Rows: 7,043
Columns: 21
$ customer_id <chr> "7590-VHVEG", "5575-GNVDE", "3668-QPYBK", "7795-CFOC…
$ gender <chr> "Female", "Male", "Male", "Male", "Female", "Female"…
$ senior_citizen <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No"…
$ partner <chr> "Yes", "No", "No", "No", "No", "No", "No", "No", "Ye…
$ dependents <chr> "No", "No", "No", "No", "No", "No", "Yes", "No", "No…
$ tenure <dbl> 1, 34, 2, 45, 2, 8, 22, 10, 28, 62, 13, 16, 58, 49, …
$ phone_service <chr> "No", "Yes", "Yes", "No", "Yes", "Yes", "Yes", "No",…
$ multiple_lines <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "No", "Y…
$ internet_service <chr> "DSL", "DSL", "DSL", "DSL", "Fiber optic", "Fiber op…
$ online_security <chr> "No", "Yes", "Yes", "Yes", "No", "No", "No", "Yes", …
$ online_backup <chr> "Yes", "No", "Yes", "No", "No", "No", "Yes", "No", "…
$ device_protection <chr> "No", "Yes", "No", "Yes", "No", "Yes", "No", "No", "…
$ tech_support <chr> "No", "No", "No", "Yes", "No", "No", "No", "No", "Ye…
$ streaming_tv <chr> "No", "No", "No", "No", "No", "Yes", "Yes", "No", "Y…
$ streaming_movies <chr> "No", "No", "No", "No", "No", "Yes", "No", "No", "Ye…
$ contract <chr> "Month-to-month", "One year", "Month-to-month", "One…
$ paperless_billing <chr> "Yes", "No", "Yes", "No", "Yes", "Yes", "Yes", "No",…
$ payment_method <chr> "Electronic check", "Mailed check", "Mailed check", …
$ monthly_charges <dbl> 29.85, 56.95, 53.85, 42.30, 70.70, 99.65, 89.10, 29.…
$ total_charges <dbl> 29.85, 1889.50, 108.15, 1840.75, 151.65, 820.50, 194…
$ churn <chr> "No", "No", "Yes", "No", "Yes", "Yes", "No", "No", "…
skim(df)| Name | df |
| Number of rows | 7043 |
| Number of columns | 21 |
| _______________________ | |
| Column type frequency: | |
| character | 18 |
| numeric | 3 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| customer_id | 0 | 1 | 10 | 10 | 0 | 7043 | 0 |
| gender | 0 | 1 | 4 | 6 | 0 | 2 | 0 |
| senior_citizen | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| partner | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| dependents | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| phone_service | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| multiple_lines | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| internet_service | 0 | 1 | 2 | 11 | 0 | 3 | 0 |
| online_security | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| online_backup | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| device_protection | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| tech_support | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| streaming_tv | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| streaming_movies | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| contract | 0 | 1 | 8 | 14 | 0 | 3 | 0 |
| paperless_billing | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
| payment_method | 0 | 1 | 12 | 25 | 0 | 4 | 0 |
| churn | 0 | 1 | 2 | 3 | 0 | 2 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| tenure | 0 | 1 | 32.37 | 24.56 | 0.00 | 9.00 | 29.00 | 55.00 | 72.00 | ▇▃▃▃▆ |
| monthly_charges | 0 | 1 | 64.76 | 30.09 | 18.25 | 35.50 | 70.35 | 89.85 | 118.75 | ▇▅▆▇▅ |
| total_charges | 0 | 1 | 2281.92 | 2265.27 | 18.80 | 402.22 | 1397.47 | 3786.60 | 8684.80 | ▇▂▂▂▁ |
df1 <- df %>%
mutate(
churn = factor(churn, levels = c("No", "Yes")),
senior_citizen = factor(senior_citizen, levels = c("No", "Yes")),
partner = factor(partner),
dependents = factor(dependents),
contract = factor(contract),
payment_method = factor(payment_method),
internet_service = factor(internet_service)
)df1 %>%
count(churn) %>%
mutate(prop = 100 * n / sum(n)) %>%
gt()| churn | n | prop |
|---|---|---|
| No | 5174 | 73.46301 |
| Yes | 1869 | 26.53699 |
df1 %>%
summarise(
tenure_media = mean(tenure, na.rm = TRUE),
tenure_mediana = median(tenure, na.rm = TRUE),
monthly_charges_media = mean(monthly_charges, na.rm = TRUE),
total_charges_media = mean(total_charges, na.rm = TRUE)
) %>% gt()| tenure_media | tenure_mediana | monthly_charges_media | total_charges_media |
|---|---|---|---|
| 32.37115 | 29 | 64.76169 | 2281.917 |
df1 %>%
summarise(
min_tenure = min(tenure),
max_tenure = max(tenure)
)# A tibble: 1 × 2
min_tenure max_tenure
<dbl> <dbl>
1 0 72
ggplot(df1, aes(x = tenure)) +
geom_histogram(bins = 30,
fill = "lightblue",
color = "black",
alpha = 0.75) +
theme_minimal() +
labs(
x = "Meses de permanência",
y = "Frequência",
title = "Distribuição dos meses de permanência"
)df1 %>%
count(gender) %>%
mutate(prop = round(100 * n/sum(n), 2)) %>%
gt()| gender | n | prop |
|---|---|---|
| Female | 3488 | 49.52 |
| Male | 3555 | 50.48 |
df1 %>%
count(gender) %>%
mutate(prop = round(100 * n/sum(n), 2)) %>%
ggplot(aes(x = gender, y = prop, fill = gender)) +
geom_col() +
theme_minimal() +
labs(title = "Porporção de clientes por gênero",
y = "% de clientes", x = "Gênero")df1 %>%
count(gender) %>%
mutate(prop = round(100 * n / sum(n), 2)) %>%
ggplot(aes(x = gender, y = prop, fill = gender)) +
geom_col() +
geom_text(aes(label = paste0(prop, "%")),
vjust = -0.3,
size = 4) +
theme_minimal() +
labs(
title = "Proporção de clientes por gênero",
y = "% de clientes",
x = "Gênero",
fill = "Gênero do cliente"
)df1 %>%
count(gender) %>%
mutate(prop = round(100 * n / sum(n), 2)) %>%
ggplot(aes(x = gender, y = prop, fill = gender)) +
geom_col() +
geom_text(aes(label = paste0(prop, "%")),
vjust = -0.3,
size = 4) +
theme_minimal() +
labs(
title = "Proporção de clientes por gênero",
y = "% de clientes",
x = "Gênero",
fill = element_blank()
)df1 %>%
count(gender) %>%
mutate(prop = round(100 * n / sum(n), 2),
label = paste0(prop, "%")) %>%
ggplot(aes(x = "", y = prop, fill = gender)) +
geom_col(width = 1, color = "white") +
coord_polar(theta = "y") +
geom_text(aes(label = label),
position = position_stack(vjust = 0.5),
size = 4) +
theme_void() +
labs(
title = "Proporção de clientes por gênero",
fill = "Gênero do cliente"
)df1 %>%
count(gender) %>%
mutate(prop = round(100 * n / sum(n), 2),
label = paste0(prop, "%")) %>%
ggplot(aes(x = 2, y = prop, fill = gender)) +
geom_col(width = 1, color = "white") +
coord_polar(theta = "y") +
xlim(0.5, 2.5) + # cria o "buraco" da rosca
geom_text(aes(label = label),
position = position_stack(vjust = 0.5),
size = 4) +
theme_void() +
labs(
title = "Proporção de clientes por gênero",
fill = "Gênero do cliente"
)