# ==================================================================================
# REPORTE FINAL R PUBS: PROPERTY DAMAGE COSTS (USD)
# ==================================================================================
# 1. LIBRERÍAS
library(ggplot2)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(scales)
# 2. DATOS
database <- read_excel("database.xlsx")
## Warning: Expecting numeric in C2189 / R2189C3: got 'Accident Year'
## Warning: Expecting numeric in C2215 / R2215C3: got 'Accident Year'
nombre_var <- "Property Damage Costs"
datos_poblacion <- as.numeric(database[[nombre_var]])
datos_poblacion <- datos_poblacion[!is.na(datos_poblacion)]
# ZOOM VISUAL
limite_visual <- median(datos_poblacion) * 10
datos_v <- datos_poblacion[datos_poblacion <= limite_visual]
n_zoom <- length(datos_v)
# 3. TABLA DE FRECUENCIAS
cortes <- seq(0, limite_visual, length.out = 11)
etiquetas <- paste0(
comma(cortes[-length(cortes)]),
" - ",
comma(cortes[-1])
)
tabla_frecuencia <- data.frame(
Intervalo = cut(
datos_v,
breaks = cortes,
include.lowest = TRUE,
right = FALSE,
labels = etiquetas
)
) %>%
count(Intervalo, name = "ni") %>%
mutate(
hi = round(ni / n_zoom, 3),
Ni_asc = cumsum(ni),
Ni_desc = n_zoom - Ni_asc + ni
)
print(tabla_frecuencia)
## Intervalo ni hi Ni_asc Ni_desc
## 1 0 - 3,000 1367 0.633 1367 2160
## 2 3,000 - 6,000 247 0.114 1614 793
## 3 6,000 - 9,000 116 0.054 1730 546
## 4 9,000 - 12,000 118 0.055 1848 430
## 5 12,000 - 15,000 56 0.026 1904 312
## 6 15,000 - 18,000 70 0.032 1974 256
## 7 18,000 - 21,000 74 0.034 2048 186
## 8 21,000 - 24,000 16 0.007 2064 112
## 9 24,000 - 27,000 51 0.024 2115 96
## 10 27,000 - 30,000 45 0.021 2160 45
# 4. HISTOGRAMA
p1 <- ggplot(tabla_frecuencia, aes(x = Intervalo, y = ni)) +
geom_col(fill = "#546E7A", color = "white") +
theme_minimal() +
labs(
title = "Histograma de Costos por Daños a la Propiedad",
x = "Rango de Costo (USD)",
y = "Frecuencia Absoluta"
) +
theme(axis.text.x = element_text(angle = 40, hjust = 1))
print(p1)

# 5. OJIVAS (MENOR QUE Y MAYOR QUE)
puntos_x <- cortes[-1]
ojiva <- rbind(
data.frame(
Limite = puntos_x,
Frecuencia = tabla_frecuencia$Ni_asc,
Tipo = "Menor que"
),
data.frame(
Limite = puntos_x,
Frecuencia = tabla_frecuencia$Ni_desc,
Tipo = "Mayor que"
)
)
p2 <- ggplot(ojiva, aes(x = Limite, y = Frecuencia, color = Tipo)) +
geom_line(size = 1.2) +
geom_point(size = 2) +
scale_x_continuous(labels = label_dollar()) +
theme_minimal() +
labs(
title = "Ojivas de Frecuencia Acumulada",
x = "Costo (USD)",
y = "Frecuencia Acumulada"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(p2)

# 6. BOXPLOT LIMPIO Y ELEGANTE (SIN MUCHOS PUNTOS)
p3 <- ggplot(data.frame(v = datos_v), aes(x = "", y = v)) +
geom_boxplot(
fill = "#4E342E",
color = "black",
outlier.color = "#D32F2F",
outlier.alpha = 0.7
) +
scale_y_continuous(labels = label_dollar()) +
coord_flip() +
theme_minimal() +
labs(
title = "Boxplot de Costos por Daños a la Propiedad",
y = "Dólares (USD)",
x = ""
)
print(p3)

# ==================================================================================
# FIN
# ==================================================================================