knitr::opts_chunk$set(
echo = TRUE,
warning = FALSE,
message = FALSE
)
options(scipen = 999)
options(scipen = 999)
#1. Carga de datos
setwd("C:/Users/ronal/OneDrive/Desktop")
datos <- read.csv("database (1).csv",
header = TRUE,
sep = ",",
dec = ".")
#2. Cargar librerías
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#3. Extracción y limpieza de la variable Other.Costs
Other.Costs <- na.omit(datos$Other.Costs)
Other.Costs <- Other.Costs[Other.Costs > 0]
#4. Rango y número de clases (Regla de Sturges)
xmin <- min(Other.Costs)
xmax <- max(Other.Costs)
R <- xmax - xmin
K <- floor(1 + 3.3 * log10(length(Other.Costs)))
A <- R / K
#5. Límites de clase y marcas de clase
Li <- round(seq(from = xmin, to = xmax - A, by = A), 2)
Ls <- round(seq(from = xmin + A, to = xmax, by = A), 2)
MC <- round((Li + Ls) / 2)
length(MC) == K
## [1] TRUE
#6. Tabla de distribución de frecuencias
#6.1 Frecuencia absoluta
ni <- numeric(K)
for (i in 1) {
ni[i] <- sum(Other.Costs >= Li[i] & Other.Costs < Ls[i])
}
ni[K] <- sum(Other.Costs >= Li[K] & Other.Costs <= xmax)
sum(ni)
## [1] 307
length(Other.Costs)
## [1] 314
sum(ni) == length(Other.Costs)
## [1] FALSE
#6.2 Frecuencias relativas y acumuladas
hi <- ni / sum(ni) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
#6.3 Tabla final de frecuencias
TDF <- data.frame(
Li = Li,
Ls = Ls,
MC = MC,
ni = ni,
hi_porc = round(hi, 2),
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc_porc = round(Hi_asc, 2),
Hi_desc_porc = round(Hi_desc, 2)
)
TDF
## Li Ls MC ni hi_porc Ni_asc Ni_desc Hi_asc_porc
## 1 50 2483378 1241714 306 99.67 306 307 99.67
## 2 2483378 4966706 3725042 0 0.00 306 1 99.67
## 3 4966706 7450033 6208369 0 0.00 306 1 99.67
## 4 7450033 9933361 8691697 0 0.00 306 1 99.67
## 5 9933361 12416689 11175025 0 0.00 306 1 99.67
## 6 12416689 14900017 13658353 0 0.00 306 1 99.67
## 7 14900017 17383344 16141681 0 0.00 306 1 99.67
## 8 17383344 19866672 18625008 0 0.00 306 1 99.67
## 9 19866672 22350000 21108336 1 0.33 307 1 100.00
## Hi_desc_porc
## 1 100.00
## 2 0.33
## 3 0.33
## 4 0.33
## 5 0.33
## 6 0.33
## 7 0.33
## 8 0.33
## 9 0.33
#7. Histograma
h <- hist(
Other.Costs,
main = "Gráfica No.1: Distribución de Otros Costos",
breaks = seq(min(Other.Costs), max(Other.Costs) + A, by = A),
xlab = "Otros Costos",
ylab = "Frecuencia",
col = "pink2",
xaxt = "n"
)
axis(1,
at = pretty(h$breaks),
labels = format(pretty(h$breaks), scientific = FALSE))

#8. Ojivas ascendente y descendente
x_asc <- c(min(Ls), Ls)
y_asc <- c(0, Ni_asc)
x_desc <- c(Li, max(Li))
y_desc <- c(Ni_desc, 0)
x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))
plot(x_asc, y_asc, type = "o", col = "skyblue",
main = "Gráfica No.2: Ojivas Ascendente y Descendente de Otros Costos",
xlab = "Otros Costos",
ylab = "Frecuencia acumulada",
xlim = x_range, ylim = y_range,
xaxt = "n", yaxt = "n")
axis(1,
at = pretty(x_range),
labels = format(pretty(x_range), scientific = FALSE))
axis(2, at = pretty(y_range))
lines(x_desc, y_desc, type = "o", col = "pink4")
legend("topright",
legend = c("Ascendente", "Descendente"),
col = c("skyblue", "pink4"),
lty = 1, pch = 1, cex = 0.8)

#9. Diagramas de caja
boxplot(Other.Costs, horizontal = TRUE, col = "blue",
main = "Gráfica No.3: Otros Costos",
xlab = "Costos",
xaxt = "n")
axis(1,
at = pretty(Other.Costs),
labels = format(pretty(Other.Costs), scientific = FALSE))

#9.1 Diagrama de caja logarítmico
boxplot(log10(Other.Costs),
horizontal = TRUE,
col = "skyblue",
main = "Gráfica No.4 (log10): Otros Costos",
xlab = "log10(Otros Costos)")

#10. Indicadores estadísticos
library(moments)
media <- mean(Other.Costs)
mediana <- median(Other.Costs)
desv <- sd(Other.Costs)
varianza <- var(Other.Costs)
cv <- (desv / media) * 100
asim <- skewness(Other.Costs)
curt <- kurtosis(Other.Costs)
# Moda por intervalo (clase modal)
hist_data <- hist(Other.Costs, plot = FALSE)
indice_modal <- which(hist_data$counts == max(hist_data$counts))[1]
moda_intervalo <- paste0(
"[",
round(hist_data$breaks[indice_modal], 2),
" - ",
round(hist_data$breaks[indice_modal + 1], 2),
"]"
)
#11. Tabla de indicadores finales
indicadores_OtrosCostos <- data.frame(
Indicador = c("Moda (intervalo)", "Mediana", "Media",
"Desviación Estándar", "Varianza",
"Coef. de Variación (%)", "Asimetría", "Curtosis"),
Valor = c(moda_intervalo,
round(mediana, 2),
round(media, 2),
round(desv, 2),
round(varianza, 2),
round(cv, 2),
round(asim, 2),
round(curt, 2)),
stringsAsFactors = FALSE
)
indicadores_OtrosCostos$Valor <-
format(indicadores_OtrosCostos$Valor, scientific = FALSE)
print(indicadores_OtrosCostos, row.names = FALSE)
## Indicador Valor
## Moda (intervalo) [0 - 2000000]
## Mediana 9914
## Media 304061.89
## Desviación Estándar 1822030.32
## Varianza 3319794475085.87
## Coef. de Variación (%) 599.23
## Asimetría 9.96
## Curtosis 111.76