Se analizará la base de datos del nuevo sistema de ingreso de tickets.
15 de julio , diccionario.
# summary(BD)
# names(BD)
Se reemplazan los strings “NULL” por NA y se gráfica para visualizar los campos con missing values de la data.
# BD <- read_csv("~/Ramos/2021-2/Instacrops/OTRS/otrs_state_july2021.csv",col_types = cols(tn = col_character()))
# BD = BD %>%
# mutate(create_time = as.character(create_time),
# change_time = as.character(change_time))
#
# for (i in 1:ncol(BD)) {
# for (j in 1:nrow(BD)) {
# BD[j,i] = ifelse(BD[j,i]=="NULL", NA ,BD[j,i])
# }
# }
# save(BD,file = "~/Ramos/2021-2/Instacrops/OTRS/BD.RData")
#
load("~/Ramos/2021-2/Instacrops/OTRS/BD.RData")
BD_clean=BD
vis_miss(BD)
Se revisará cada columna para explorar la información contenida y evaluar si es necesario cambiar el formato.
# Revisar si el id es único
sum(unique(BD$id)/unique(BD$id))
## [1] 154
nrow(BD)
## [1] 154
El id es único y completo.
# table(BD$tn)
BD %>%
select(tn) %>%
mutate(tn= as.numeric(tn)) %>%
filter(!is.na(tn)) %>%
ggplot(aes(tn)) + geom_bar(stat="bin", bins=60)
BD_clean = BD_clean %>%
mutate(tn= as.numeric(tn))
sum(unique(BD_clean$tn)/unique(BD_clean$tn))
## [1] 154
No hay NA’s, también es único.
table(BD$title)[1:8]
##
## 1014, falla de sensor atmosférico
## 1
## 1020, error en el registro de precipitaciones
## 1
## 1028, datos de sensor atmosférico con ruidos
## 1
## 1049-1051 - Falla eléctrica en activación de sectores
## 1
## 1052, intermitencia en la señal/caídas en las lecturas de humedad
## 1
## 1062/1063 - Cambio de altura sensores atmosféricos
## 1
## 1069, dudas en registro de precipitación
## 1
## 1069, sin registro de velocidad del viento
## 1
Se muestran algunos titulos.
table(BD$queue_id)
##
## 6 9 15 16 19 44 45 46
## 10 20 56 53 1 1 3 10
BD %>%
select(queue_id) %>%
count(queue_id, sort=TRUE) %>%
filter(!is.na(queue_id)) %>%
mutate(queue_id = as.factor(queue_id)) %>%
ggplot(aes(n,queue_id)) + geom_col()
BD %>%
filter(is.na(queue_id))
## # A tibble: 0 x 25
## # ... with 25 variables: id <dbl>, tn <chr>, title <chr>, queue_id <dbl>,
## # ticket_lock_id <dbl>, type_id <dbl>, service_id <chr>, sla_id <chr>,
## # user_id <dbl>, responsible_user_id <dbl>, ticket_priority_id <dbl>,
## # ticket_state_id <dbl>, customer_id <chr>, customer_user_id <chr>,
## # timeout <dbl>, until_time <dbl>, escalation_time <dbl>,
## # escalation_update_time <dbl>, escalation_response_time <dbl>,
## # escalation_solution_time <dbl>, archive_flag <dbl>, create_time <chr>, ...
# BD_clean = BD_clean %>%
# filter(!is.na(queue_id))
No hay NA’s.
# table(BD$ticket_lock_id)
BD %>%
select(ticket_lock_id) %>%
count(ticket_lock_id, sort=TRUE) %>%
filter(!is.na(ticket_lock_id)) %>%
mutate(ticket_lock_id = as.factor(ticket_lock_id)) %>%
ggplot(aes(n,ticket_lock_id)) + geom_col(stat="identity")
## Warning: Ignoring unknown parameters: stat
BD %>%
filter(is.na(ticket_lock_id))
## # A tibble: 0 x 25
## # ... with 25 variables: id <dbl>, tn <chr>, title <chr>, queue_id <dbl>,
## # ticket_lock_id <dbl>, type_id <dbl>, service_id <chr>, sla_id <chr>,
## # user_id <dbl>, responsible_user_id <dbl>, ticket_priority_id <dbl>,
## # ticket_state_id <dbl>, customer_id <chr>, customer_user_id <chr>,
## # timeout <dbl>, until_time <dbl>, escalation_time <dbl>,
## # escalation_update_time <dbl>, escalation_response_time <dbl>,
## # escalation_solution_time <dbl>, archive_flag <dbl>, create_time <chr>, ...
# BD_clean = BD_clean %>%
# filter(!is.na(ticket_lock_id))
No hay NA’s.
# table(BD$type_id)
BD %>%
select(type_id) %>%
count(type_id, sort=TRUE) %>%
filter(!is.na(type_id)) %>%
mutate(type_id = as.factor(type_id)) %>%
ggplot(aes(n,type_id)) + geom_col()
BD %>%
filter(is.na(type_id))
## # A tibble: 0 x 25
## # ... with 25 variables: id <dbl>, tn <chr>, title <chr>, queue_id <dbl>,
## # ticket_lock_id <dbl>, type_id <dbl>, service_id <chr>, sla_id <chr>,
## # user_id <dbl>, responsible_user_id <dbl>, ticket_priority_id <dbl>,
## # ticket_state_id <dbl>, customer_id <chr>, customer_user_id <chr>,
## # timeout <dbl>, until_time <dbl>, escalation_time <dbl>,
## # escalation_update_time <dbl>, escalation_response_time <dbl>,
## # escalation_solution_time <dbl>, archive_flag <dbl>, create_time <chr>, ...
# BD_clean = BD_clean %>%
# filter(!is.na(type_id))
No hay NA’s.
# table(BD$service_id)
BD %>%
select(service_id) %>%
count(service_id, sort=TRUE) %>%
filter(!is.na(service_id)) %>%
mutate(service_id = as.factor(service_id)) %>%
ggplot(aes(n,service_id)) + geom_col(stat="identity")
## Warning: Ignoring unknown parameters: stat
BD %>%
filter(is.na(service_id))
## # A tibble: 28 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2435 20210~ 884, ~ 46 1 2 <NA> <NA> 7
## 2 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 3 2442 20210~ 143, ~ 9 2 2 <NA> <NA> 50
## 4 2444 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 5 2445 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 6 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 7 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 8 2454 20210~ 1098_~ 15 1 2 <NA> <NA> 42
## 9 2455 20210~ 1097_~ 15 1 2 <NA> <NA> 42
## 10 2456 20210~ IWP_F~ 45 1 8 <NA> <NA> 50
## # ... with 18 more rows, and 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
# BD_clean = BD_clean %>%
# filter(!is.na(service_id))
Hay 28 filas con este valor NA.
# table(BD$sla_id)
BD %>%
select(sla_id) %>%
count(sla_id, sort=TRUE) %>%
filter(!is.na(sla_id)) %>%
mutate(sla_id = as.factor(sla_id)) %>%
ggplot(aes(n,sla_id)) + geom_col()
BD %>%
filter(is.na(sla_id))
## # A tibble: 48 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2434 20210~ 366, ~ 46 1 2 44 <NA> 7
## 2 2435 20210~ 884, ~ 46 1 2 <NA> <NA> 7
## 3 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 4 2442 20210~ 143, ~ 9 2 2 <NA> <NA> 50
## 5 2443 20210~ 805, ~ 16 2 2 50 <NA> 46
## 6 2444 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 7 2445 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 8 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 9 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 10 2452 20210~ 704, ~ 46 1 2 44 <NA> 46
## # ... with 38 more rows, and 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
# BD_clean = BD_clean %>%
# filter(!is.na(sla_id))
Hay 48 filas con NA’s.
# table(BD$user_id)
BD %>%
select(user_id) %>%
count(user_id, sort=TRUE) %>%
#filter(n>20) %>%
mutate(user_id = as.factor(user_id)) %>%
ggplot(aes(n,user_id)) + geom_col()
BD %>%
filter(is.na(user_id))
## # A tibble: 0 x 25
## # ... with 25 variables: id <dbl>, tn <chr>, title <chr>, queue_id <dbl>,
## # ticket_lock_id <dbl>, type_id <dbl>, service_id <chr>, sla_id <chr>,
## # user_id <dbl>, responsible_user_id <dbl>, ticket_priority_id <dbl>,
## # ticket_state_id <dbl>, customer_id <chr>, customer_user_id <chr>,
## # timeout <dbl>, until_time <dbl>, escalation_time <dbl>,
## # escalation_update_time <dbl>, escalation_response_time <dbl>,
## # escalation_solution_time <dbl>, archive_flag <dbl>, create_time <chr>, ...
# BD_clean = BD_clean %>%
# filter(!is.na(user_id))
No hay NA’s.
table(BD$responsible_user_id)
##
## 1
## 154
El valor es único.
table(BD$ticket_priority_id)
##
## 4
## 154
El valor es único.
table(BD$ticket_state_id)
##
## 2 4
## 23 131
BD %>%
select(ticket_state_id) %>%
count(ticket_state_id, sort=TRUE) %>%
#filter(n>20) %>%
mutate(ticket_state_id = as.factor(ticket_state_id)) %>%
ggplot(aes(n,ticket_state_id)) + geom_col(stat="identity")
## Warning: Ignoring unknown parameters: stat
table(BD$customer_id)
##
## 1 10 100 1005 1006 1008 1010 1012 102 103 104 108 120 121 129 130
## 1 2 1 1 4 1 2 2 1 1 1 2 1 1 4 1
## 131 132 134 145 146 148 149 15 154 155 16 17 171 172 175 176
## 8 5 1 2 1 1 3 1 2 1 2 2 2 2 1 1
## 179 186 190 192 193 198 201 202 212 215 226 23 230 232 243 245
## 1 5 2 1 1 2 2 1 1 1 1 2 9 2 1 1
## 247 248 25 251 256 257 258 26 262 268 269 271 272 33 35 39
## 3 1 1 1 1 1 2 4 1 2 1 1 2 1 1 3
## 45 46 47 49 51 59 63 64 89 999
## 1 1 1 1 2 3 1 3 1 5
BD %>%
select(customer_id) %>%
count(customer_id, sort=TRUE) %>%
filter(n>2) %>%
mutate(customer_id = as.factor(customer_id)) %>%
ggplot(aes(n,customer_id)) + geom_col()
BD %>%
filter(is.na(customer_id))
## # A tibble: 15 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 2 2444 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 3 2445 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 4 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 5 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 6 2454 20210~ 1098_~ 15 1 2 <NA> <NA> 42
## 7 2455 20210~ 1097_~ 15 1 2 <NA> <NA> 42
## 8 2456 20210~ IWP_F~ 45 1 8 <NA> <NA> 50
## 9 2457 20210~ id_10~ 15 1 8 <NA> <NA> 42
## 10 2468 20210~ id_11~ 44 1 2 <NA> <NA> 42
## 11 2478 20210~ ID: 1~ 15 1 8 <NA> <NA> 38
## 12 2573 20210~ id_46~ 15 1 2 <NA> <NA> 42
## 13 2574 20210~ ID_39~ 15 1 2 <NA> <NA> 42
## 14 2576 20210~ ID_47~ 15 1 2 <NA> <NA> 42
## 15 2577 20210~ ID_83~ 15 1 2 <NA> <NA> 42
## # ... with 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
Hay 15 filas con este valor vacio.
table(BD$customer_user_id)
##
## Agrícola Correa Cox
## 1
## Agricola Del Carmen
## 2
## Agricola del Solar
## 2
## Agrícola Ditzler ltda
## 4
## Agrícola Don Jochen
## 1
## Agrícola El Arrollo
## 1
## Agrícola El Huapi
## 2
## AGRICOLA KAIKEN LIMITADA
## 1
## Agricola Las Vertientes Ltda.
## 5
## AGRÍCOLA LOS ESPINOS
## 1
## Agrícola Los Molinos
## 1
## Agrícola Los Robles
## 1
## Agricola los Zorrillos
## 1
## Agricola Malaga
## 2
## Agricola Mercedes Briceño
## 3
## Agricola Ocoa Ltda
## 1
## Agrícola Panagro
## 2
## Agricola Pimpihue S.A
## 2
## Agricola Pinochet
## 1
## Agricola Ponderosa
## 1
## Agricola Portezuelo
## 1
## Agrícola Rafael Tomas Eduardo Undurraga EIRL
## 2
## Agrícola San Carlos Limitada
## 2
## Agrícola San Carlos Ltda
## 1
## AGRICOLA SAN RICARDO
## 1
## Agricola Santa Ana
## 1
## Agrícola Santa Graciela
## 1
## Agrícola Santa Marta de Liray
## 2
## Agricola Valvalle
## 2
## Agrícola Verdani Ltda
## 2
## Agrícola y Forestal El Durazno
## 1
## Agrofrutícola Mallarauco
## 1
## Agropecuaria Ferias Araucanía
## 1
## AGROREYES
## 3
## bnegroni
## 3
## CER - Felipe Labbé
## 2
## CHARLES WINDELSCHMIDT COLLAR
## 1
## Cliente no encontrado
## 5
## El Encanto
## 2
## elhuapi
## 1
## EXPORTADORA POMPEIA
## 2
## Frutasol
## 4
## Frutícola Denisse Salinas Martínez
## 1
## Fruticola el Aromo
## 1
## Fundo Catemu
## 1
## Jhonson Fruits
## 2
## Juan Henriquez Marich
## 1
## Las Cebra
## 3
## Llancay
## 1
## lmiranda
## 1
## Luis Correa Ponce
## 1
## Luis Miranda Oyanadel
## 1
## MARIA CRISTINA BUSTOS GALDAMES
## 1
## McLean
## 1
## Monfrut
## 2
## New Farm Forestry Spa
## 2
## Noble Fruit
## 1
## Pablo Basualto
## 1
## Pablo Massoud
## 1
## Paulina Gajardo Valenzuela
## 1
## Providencia
## 3
## SAMUEL BUDINICH
## 1
## Santa Francisca
## 1
## Santa Lucia
## 1
## Soc. de Inversiones y servicios pro Ingenieri
## 2
## Sociedad Agricola El Radal Ltda
## 1
## SOCIEDAD AGRICOLA FISTUR SPA
## 9
## Sociedad agrícola Jg Ltda
## 8
## Sociedad Agricola La Hornilla SPA
## 2
## Sociedad Agricola Mataquito
## 1
## Sociedad Agricola Munilque Limitada
## 3
## Sociedad Agricola Santa Laura SpA
## 1
## Tierra Buena
## 1
## Transportes La Martina LTDA
## 4
## UCM
## 1
## Viña Los Boldos Ltda
## 1
BD %>%
select(customer_user_id) %>%
count(customer_user_id, sort=TRUE) %>%
filter(n>20) %>%
mutate(customer_user_id = as.factor(customer_user_id)) %>%
ggplot(aes(n,customer_user_id)) + geom_col(stat="identity")
## Warning: Ignoring unknown parameters: stat
BD %>%
filter(is.na(customer_user_id))
## # A tibble: 15 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 2 2444 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 3 2445 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 4 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 5 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 6 2454 20210~ 1098_~ 15 1 2 <NA> <NA> 42
## 7 2455 20210~ 1097_~ 15 1 2 <NA> <NA> 42
## 8 2456 20210~ IWP_F~ 45 1 8 <NA> <NA> 50
## 9 2457 20210~ id_10~ 15 1 8 <NA> <NA> 42
## 10 2468 20210~ id_11~ 44 1 2 <NA> <NA> 42
## 11 2478 20210~ ID: 1~ 15 1 8 <NA> <NA> 38
## 12 2573 20210~ id_46~ 15 1 2 <NA> <NA> 42
## 13 2574 20210~ ID_39~ 15 1 2 <NA> <NA> 42
## 14 2576 20210~ ID_47~ 15 1 2 <NA> <NA> 42
## 15 2577 20210~ ID_83~ 15 1 2 <NA> <NA> 42
## # ... with 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
# BD_clean = BD_clean %>%
# filter(!is.na(customer_user_id))
Hay 913 filas con este valor vacio.
table(BD$customer_user_id)
##
## Agrícola Correa Cox
## 1
## Agricola Del Carmen
## 2
## Agricola del Solar
## 2
## Agrícola Ditzler ltda
## 4
## Agrícola Don Jochen
## 1
## Agrícola El Arrollo
## 1
## Agrícola El Huapi
## 2
## AGRICOLA KAIKEN LIMITADA
## 1
## Agricola Las Vertientes Ltda.
## 5
## AGRÍCOLA LOS ESPINOS
## 1
## Agrícola Los Molinos
## 1
## Agrícola Los Robles
## 1
## Agricola los Zorrillos
## 1
## Agricola Malaga
## 2
## Agricola Mercedes Briceño
## 3
## Agricola Ocoa Ltda
## 1
## Agrícola Panagro
## 2
## Agricola Pimpihue S.A
## 2
## Agricola Pinochet
## 1
## Agricola Ponderosa
## 1
## Agricola Portezuelo
## 1
## Agrícola Rafael Tomas Eduardo Undurraga EIRL
## 2
## Agrícola San Carlos Limitada
## 2
## Agrícola San Carlos Ltda
## 1
## AGRICOLA SAN RICARDO
## 1
## Agricola Santa Ana
## 1
## Agrícola Santa Graciela
## 1
## Agrícola Santa Marta de Liray
## 2
## Agricola Valvalle
## 2
## Agrícola Verdani Ltda
## 2
## Agrícola y Forestal El Durazno
## 1
## Agrofrutícola Mallarauco
## 1
## Agropecuaria Ferias Araucanía
## 1
## AGROREYES
## 3
## bnegroni
## 3
## CER - Felipe Labbé
## 2
## CHARLES WINDELSCHMIDT COLLAR
## 1
## Cliente no encontrado
## 5
## El Encanto
## 2
## elhuapi
## 1
## EXPORTADORA POMPEIA
## 2
## Frutasol
## 4
## Frutícola Denisse Salinas Martínez
## 1
## Fruticola el Aromo
## 1
## Fundo Catemu
## 1
## Jhonson Fruits
## 2
## Juan Henriquez Marich
## 1
## Las Cebra
## 3
## Llancay
## 1
## lmiranda
## 1
## Luis Correa Ponce
## 1
## Luis Miranda Oyanadel
## 1
## MARIA CRISTINA BUSTOS GALDAMES
## 1
## McLean
## 1
## Monfrut
## 2
## New Farm Forestry Spa
## 2
## Noble Fruit
## 1
## Pablo Basualto
## 1
## Pablo Massoud
## 1
## Paulina Gajardo Valenzuela
## 1
## Providencia
## 3
## SAMUEL BUDINICH
## 1
## Santa Francisca
## 1
## Santa Lucia
## 1
## Soc. de Inversiones y servicios pro Ingenieri
## 2
## Sociedad Agricola El Radal Ltda
## 1
## SOCIEDAD AGRICOLA FISTUR SPA
## 9
## Sociedad agrícola Jg Ltda
## 8
## Sociedad Agricola La Hornilla SPA
## 2
## Sociedad Agricola Mataquito
## 1
## Sociedad Agricola Munilque Limitada
## 3
## Sociedad Agricola Santa Laura SpA
## 1
## Tierra Buena
## 1
## Transportes La Martina LTDA
## 4
## UCM
## 1
## Viña Los Boldos Ltda
## 1
BD %>%
select(customer_user_id) %>%
count(customer_user_id, sort=TRUE) %>%
filter(n>2) %>%
mutate(customer_user_id = as.factor(customer_user_id)) %>%
ggplot(aes(n,customer_user_id)) + geom_col()
BD %>%
filter(is.na(customer_user_id))
## # A tibble: 15 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 2 2444 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 3 2445 20210~ ID: 1~ 15 1 8 <NA> <NA> 48
## 4 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 5 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 6 2454 20210~ 1098_~ 15 1 2 <NA> <NA> 42
## 7 2455 20210~ 1097_~ 15 1 2 <NA> <NA> 42
## 8 2456 20210~ IWP_F~ 45 1 8 <NA> <NA> 50
## 9 2457 20210~ id_10~ 15 1 8 <NA> <NA> 42
## 10 2468 20210~ id_11~ 44 1 2 <NA> <NA> 42
## 11 2478 20210~ ID: 1~ 15 1 8 <NA> <NA> 38
## 12 2573 20210~ id_46~ 15 1 2 <NA> <NA> 42
## 13 2574 20210~ ID_39~ 15 1 2 <NA> <NA> 42
## 14 2576 20210~ ID_47~ 15 1 2 <NA> <NA> 42
## 15 2577 20210~ ID_83~ 15 1 2 <NA> <NA> 42
## # ... with 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
# BD_clean = BD_clean %>%
# filter(!is.na(customer_user_id))
Hay 15 filas con NA.
#table(BD$timeout)
BD %>%
select(timeout) %>%
mutate(timeout= as.numeric(timeout)) %>%
filter(!is.na(timeout)) %>%
ggplot(aes(timeout)) + geom_bar(stat="bin", bins=60)
Hay 40 filas con valor igual a 0. ¿En qué formato está?
table(BD$until_time)
##
## 0
## 154
BD %>%
filter(until_time!=0)
## # A tibble: 0 x 25
## # ... with 25 variables: id <dbl>, tn <chr>, title <chr>, queue_id <dbl>,
## # ticket_lock_id <dbl>, type_id <dbl>, service_id <chr>, sla_id <chr>,
## # user_id <dbl>, responsible_user_id <dbl>, ticket_priority_id <dbl>,
## # ticket_state_id <dbl>, customer_id <chr>, customer_user_id <chr>,
## # timeout <dbl>, until_time <dbl>, escalation_time <dbl>,
## # escalation_update_time <dbl>, escalation_response_time <dbl>,
## # escalation_solution_time <dbl>, archive_flag <dbl>, create_time <chr>, ...
Hay solo 5 filas con valores distintos de 0.
#table(BD$escalation_time)
BD %>%
filter(escalation_time!=0)
## # A tibble: 35 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 2 2442 20210~ 143, ~ 9 2 2 <NA> <NA> 50
## 3 2443 20210~ 805, ~ 16 2 2 50 <NA> 46
## 4 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 5 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 6 2453 20210~ 914, ~ 16 2 2 50 <NA> 46
## 7 2457 20210~ id_10~ 15 1 8 <NA> <NA> 42
## 8 2466 20210~ 254, ~ 15 2 8 50 <NA> 50
## 9 2467 20210~ 857, ~ 9 2 2 50 <NA> 50
## 10 2470 20210~ 1052,~ 16 2 2 50 <NA> 46
## # ... with 25 more rows, and 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
Hay 487 filas con valores distintos de 0.
#table(BD$escalation_update_time)
BD %>%
filter(escalation_update_time!=0)
## # A tibble: 35 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 2 2442 20210~ 143, ~ 9 2 2 <NA> <NA> 50
## 3 2443 20210~ 805, ~ 16 2 2 50 <NA> 46
## 4 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 5 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 6 2453 20210~ 914, ~ 16 2 2 50 <NA> 46
## 7 2457 20210~ id_10~ 15 1 8 <NA> <NA> 42
## 8 2466 20210~ 254, ~ 15 2 8 50 <NA> 50
## 9 2467 20210~ 857, ~ 9 2 2 50 <NA> 50
## 10 2470 20210~ 1052,~ 16 2 2 50 <NA> 46
## # ... with 25 more rows, and 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
Hay 486 variables con valores distintos a 0.
#table(BD$escalation_response_time)
BD %>%
filter(escalation_response_time!=0)
## # A tibble: 1 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2453 202107~ 914, ~ 16 2 2 50 <NA> 46
## # ... with 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
Hay 66 filas con valores distintos de 0.
#table(BD$escalation_solution_time)
BD %>%
filter(escalation_solution_time!=0)
## # A tibble: 35 x 25
## id tn title queue_id ticket_lock_id type_id service_id sla_id user_id
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <chr> <chr> <dbl>
## 1 2441 20210~ ID: 1~ 15 1 8 <NA> <NA> 46
## 2 2442 20210~ 143, ~ 9 2 2 <NA> <NA> 50
## 3 2443 20210~ 805, ~ 16 2 2 50 <NA> 46
## 4 2446 20210~ id_62~ 16 2 2 <NA> <NA> 46
## 5 2447 20210~ ID_50~ 19 2 2 <NA> <NA> 42
## 6 2453 20210~ 914, ~ 16 2 2 50 <NA> 46
## 7 2457 20210~ id_10~ 15 1 8 <NA> <NA> 42
## 8 2466 20210~ 254, ~ 15 2 8 50 <NA> 50
## 9 2467 20210~ 857, ~ 9 2 2 50 <NA> 50
## 10 2470 20210~ 1052,~ 16 2 2 50 <NA> 46
## # ... with 25 more rows, and 16 more variables: responsible_user_id <dbl>,
## # ticket_priority_id <dbl>, ticket_state_id <dbl>, customer_id <chr>,
## # customer_user_id <chr>, timeout <dbl>, until_time <dbl>,
## # escalation_time <dbl>, escalation_update_time <dbl>,
## # escalation_response_time <dbl>, escalation_solution_time <dbl>,
## # archive_flag <dbl>, create_time <chr>, create_by <dbl>, change_time <chr>,
## # change_by <dbl>
Hay que descubrir en que unidad esta el tiempo en esta columna.
table(BD$archive_flag)
##
## 0
## 154
BD %>%
filter(archive_flag!=0)
## # A tibble: 0 x 25
## # ... with 25 variables: id <dbl>, tn <chr>, title <chr>, queue_id <dbl>,
## # ticket_lock_id <dbl>, type_id <dbl>, service_id <chr>, sla_id <chr>,
## # user_id <dbl>, responsible_user_id <dbl>, ticket_priority_id <dbl>,
## # ticket_state_id <dbl>, customer_id <chr>, customer_user_id <chr>,
## # timeout <dbl>, until_time <dbl>, escalation_time <dbl>,
## # escalation_update_time <dbl>, escalation_response_time <dbl>,
## # escalation_solution_time <dbl>, archive_flag <dbl>, create_time <chr>, ...
Existe solo un valor distinto de 0 que tiene id 2281.
#table(BD$create_time)
BD %>%
select(create_time) %>%
mutate(create_time = as.Date(create_time, format="%d-%m-%Y %H:%M")) %>%
#mutate(create_time = round_date(create_time,unit="week")) %>%
ggplot(aes(x=create_time)) + geom_bar(stat="bin")+scale_x_date(date_labels = "%m-%Y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 154 rows containing non-finite values (stat_bin).
BD_clean = BD_clean %>%
mutate(create_time = as.Date(create_time, format="%d-%m-%Y %H:%M"))
table(BD$create_by)
##
## 5 7 8 42 45 46 48
## 8 65 48 11 17 4 1
BD %>%
select(create_by) %>%
mutate(create_by = as.factor(create_by)) %>%
ggplot(aes(x=create_by)) + geom_bar(stat="count")
BD_clean = BD_clean %>%
filter(!is.na(create_by))
#table(BD$change_time)
BD %>%
select(change_time) %>%
mutate(change_time = as.Date(change_time, format="%d-%m-%Y %H:%M")) %>%
#mutate(change_time = round_date(change_time,unit="week")) %>%
ggplot(aes(x=change_time)) + geom_bar(stat="bin")+scale_x_date(date_labels = "%m-%Y")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 154 rows containing non-finite values (stat_bin).
BD_clean = BD_clean %>%
mutate(change_time = as.Date(change_time, format="%d-%m-%Y %H:%M"))
table(BD$change_by)
##
## 5 7 8 38 42 45 46 48 50
## 5 23 28 5 7 9 10 65 2
BD %>%
select(change_by) %>%
mutate(change_by = as.factor(change_by)) %>%
ggplot(aes(x=change_by)) + geom_bar(stat="count")
BD_clean = BD_clean %>%
filter(!is.na(change_by))
Se ve que la columna toma valores del 1 al 50, existiendo algunos más frecuentes.
BD_clean$X26 = NULL