knitr::opts_chunk$set(
echo = TRUE,
warning = FALSE,
message = FALSE
)
options(scipen = 999)
#1. Carga de datos
setwd("C:/Users/ronal/OneDrive/Desktop")
datos <- read.csv("database (1).csv",
header = TRUE,
sep = ",",
dec = ".")
#2. Cargar librerías
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(moments)
#3. Extracción y limpieza de la variable All.Costs
All.Costs <- na.omit(datos$All.Costs)
All.Costs <- All.Costs[All.Costs > 0]
#4. Rango y número de clases (Regla de Sturges)
xmin <- min(All.Costs)
xmax <- max(All.Costs)
R <- xmax - xmin
K <- floor(1 + 3.3 * log10(length(All.Costs)))
A <- R / K
#5. Límites de clase y marcas de clase
Li <- round(seq(from = xmin, to = xmax - A, by = A), 2)
Ls <- round(seq(from = xmin + A, to = xmax, by = A), 2)
MC <- round((Li + Ls) / 2)
length(MC) == K
## [1] TRUE
#6. Tabla de distribución de frecuencias
#6.1 Frecuencia Absoluta
ni <- numeric(K)
for (i in 1) {
ni[i] <- sum(All.Costs >= Li[i] & All.Costs < Ls[i])
}
ni[K] <- sum(All.Costs >= Li[K] & All.Costs <= xmax)
#6.2 Frecuencias Relativas Acumuladas
hi <- ni / sum(ni) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
#6.3 Tabla final de fracuncias
TDF <- data.frame(
Li, Ls, MC, ni,
hi_porc = round(hi, 2),
Ni_asc, Ni_desc,
Hi_asc_porc = round(Hi_asc, 2),
Hi_desc_porc = round(Hi_desc, 2)
)
TDF
## Li Ls MC ni hi_porc Ni_asc Ni_desc Hi_asc_porc
## 1 1 70043844 35021923 2757 99.96 2757 2758 99.96
## 2 70043844 140087687 105065766 0 0.00 2757 1 99.96
## 3 140087687 210131530 175109609 0 0.00 2757 1 99.96
## 4 210131530 280175373 245153452 0 0.00 2757 1 99.96
## 5 280175373 350219216 315197295 0 0.00 2757 1 99.96
## 6 350219216 420263060 385241138 0 0.00 2757 1 99.96
## 7 420263060 490306903 455284981 0 0.00 2757 1 99.96
## 8 490306903 560350746 525328824 0 0.00 2757 1 99.96
## 9 560350746 630394589 595372667 0 0.00 2757 1 99.96
## 10 630394589 700438432 665416510 0 0.00 2757 1 99.96
## 11 700438432 770482275 735460353 0 0.00 2757 1 99.96
## 12 770482275 840526118 805504196 1 0.04 2758 1 100.00
## Hi_desc_porc
## 1 100.00
## 2 0.04
## 3 0.04
## 4 0.04
## 5 0.04
## 6 0.04
## 7 0.04
## 8 0.04
## 9 0.04
## 10 0.04
## 11 0.04
## 12 0.04
#7. hISTOGRMA
h <- hist(
All.Costs,
main = "Gráfica No.1: Distribución de Todos los Costos",
breaks = seq(min(All.Costs), max(All.Costs) + A, by = A),
xlab = "Costos",
ylab = "Frecuencia",
col = "pink2",
xaxt = "n"
)
axis(1,
at = pretty(h$breaks),
labels = format(pretty(h$breaks), scientific = FALSE))

#8. Ojivas ASC y DSC
x_asc <- c(min(Ls), Ls)
y_asc <- c(0, Ni_asc)
x_desc <- c(Li, max(Li))
y_desc <- c(Ni_desc, 0)
x_range <- range(c(x_asc, x_desc))
y_range <- c(0, max(c(y_asc, y_desc)))
plot(x_asc, y_asc, type = "o", col = "skyblue",
main = "Gráfica No.2: Ojivas Ascendente y Descendente de Costos",
xlab = "Costos",
ylab = "Frecuencia acumulada",
xlim = x_range, ylim = y_range,
xaxt = "n", yaxt = "n")
axis(1,
at = pretty(x_range),
labels = format(pretty(x_range), scientific = FALSE))
axis(2, at = pretty(y_range))
lines(x_desc, y_desc, type = "o", col = "pink4")
legend("topright",
legend = c("Ascendente", "Descendente"),
col = c("skyblue", "pink4"),
lty = 1, pch = 1, cex = 0.8)

#10. Indicadores estadísticos
media <- mean(All.Costs)
mediana <- median(All.Costs)
desv <- sd(All.Costs)
varianza <- var(All.Costs)
cv <- (desv / media) * 100
asim <- skewness(All.Costs)
curt <- kurtosis(All.Costs)
#Moda por intervalo
hist_data <- hist(All.Costs, plot = FALSE)
i_modal <- which.max(hist_data$counts)
moda_intervalo <- paste0(
"[",
round(hist_data$breaks[i_modal], 2),
" - ",
round(hist_data$breaks[i_modal + 1], 2),
"]"
)
boxplot(All.Costs, horizontal = TRUE, col = "blue",
main = "Gráfica No.3: Todos los Costos",
xlab = "Costos",
xaxt = "n")
axis(1,
at = pretty(All.Costs),
labels = format(pretty(All.Costs), scientific = FALSE))

#Diagrama de Caja Logarítmica
boxplot(log10(All.Costs),
horizontal = TRUE,
col = "skyblue",
main = "Gráfica No.4 (log10): Todos los Costos",
xlab = "log10(Costos)")

#. Tabla final de indicadores
indicadores_TodoslosCostos <- data.frame(
Indicador = c("Moda (intervalo)", "Mediana", "Media",
"Desviación Estándar", "Varianza",
"Coef. de Variación (%)", "Asimetría", "Curtosis"),
Valor = c(moda_intervalo,
round(mediana, 2),
round(media, 2),
round(desv, 2),
round(varianza, 2),
round(cv, 2),
round(asim, 2),
round(curt, 2))
)
print(indicadores_TodoslosCostos, row.names = FALSE)
## Indicador Valor
## Moda (intervalo) [0 - 50000000]
## Mediana 24300
## Media 844303.85
## Desviación Estándar 16679838.6
## Varianza 278217015685101
## Coef. de Variación (%) 1975.57
## Asimetría 46.75
## Curtosis 2331.64