Cargar el conjunto de datos
telco = read.csv("https://raw.githubusercontent.com/VictorGuevaraP/Estadistica-R/master/Caso_telefon%C3%ADa.csv", sep = ";", encoding = "latin1", stringsAsFactors = TRUE)
head(telco)
# Original
hist(telco$Monto, 12)
# Original
hist(telco$Minutos, 12)
# Original
hist(telco$Reclamos, 12)
Para sarcar la raiz cuadrada simplemente se puede utilizar la funcion sqrt de Monto
sqrt(telco$Monto)
## [1] 9.523655 9.782638 10.700467 10.295630 9.949874 9.497368 10.124228
## [8] 9.602083 10.492855 9.823441 9.396808 9.252027 9.471008 9.423375
## [15] 9.165151 9.176056 9.576012 8.608136 9.423375 9.570789 8.944272
## [22] 9.402127 8.860023 9.396808 9.252027 9.412757 8.876936 9.033272
## [29] 9.289779 9.154234 9.523655 9.782638 9.964939 9.126883 9.864076
## [36] 9.099451 9.710819 9.823441 8.876936 10.009995 9.082951 9.088454
## [43] 9.721111 9.289779 9.396808 9.148770 9.154234 9.170605 9.121403
## [50] 9.208692 9.944848 10.913295 10.168579 10.469002 10.295630 10.029955
## [57] 10.148892 10.511898 10.601887 9.523655 10.606602 10.406729 10.295630
## [64] 9.949874 9.497368 10.124228 9.602083 10.492855 9.823441 9.591663
## [71] 9.176056 9.402127 8.860023 9.396808 9.252027 9.412757 8.876936
## [78] 9.033272 9.289779 9.154234
Para sarcar la raiz cuadrada simplemente se puede utilizar la funcion sqrt de Minutos
sqrt(telco$Minutos)
## [1] 5.744563 5.630275 6.316645 6.268971 6.148170 6.610598 6.789698 6.131884
## [9] 6.480741 6.557439 6.693280 6.131884 5.196152 5.486347 5.147815 5.319774
## [17] 5.449771 5.700877 4.969909 4.979960 5.674504 5.167204 5.603570 5.576737
## [25] 5.167204 5.839521 5.215362 5.329165 7.042727 5.347897 6.107373 5.822371
## [33] 6.164414 6.457554 6.188699 6.971370 5.761944 6.403124 6.457554 5.882176
## [41] 6.892024 5.916080 6.066300 6.603030 5.932959 6.148170 6.140033 6.442049
## [49] 7.092249 5.796551 7.211103 7.099296 6.016644 6.308724 7.886698 6.752777
## [57] 6.971370 6.300794 5.069517 6.356099 6.804410 6.603030 7.429670 6.252999
## [65] 7.183314 6.640783 6.244998 7.224957 7.321202 6.024948 4.929503 5.639149
## [73] 5.744563 5.630275 5.603570 5.186521 4.183300 5.468089 6.024948 5.639149
Para sarcar la raiz cuadrada simplemente se puede utilizar la funcion sqrt de Reclamos
sqrt(telco$Reclamos)
## [1] 2.236068 1.414214 2.236068 1.732051 1.414214 1.732051 2.449490 1.414214
## [9] 1.732051 1.000000 1.732051 1.732051 1.414214 1.000000 2.000000 1.732051
## [17] 1.732051 2.000000 2.000000 1.000000 1.732051 1.000000 2.645751 1.732051
## [25] 1.732051 1.414214 2.000000 2.236068 1.000000 2.236068 1.414214 1.732051
## [33] 1.732051 2.000000 1.000000 1.414214 1.414214 1.000000 2.000000 2.000000
## [41] 1.732051 1.000000 1.732051 1.000000 1.414214 2.645751 1.732051 2.236068
## [49] 1.732051 2.645751 2.828427 2.236068 2.828427 2.000000 2.000000 1.732051
## [57] 2.645751 2.236068 1.414214 1.732051 2.828427 2.000000 2.828427 3.316625
## [65] 2.236068 2.000000 3.000000 2.828427 1.732051 1.414214 1.000000 1.000000
## [73] 1.732051 2.000000 1.414214 1.000000 1.414214 1.732051 1.414214 1.414214
Graficamente de Monto
hist(sqrt(telco$Monto))
Graficamente de Minutos
hist(sqrt(telco$Minutos))
Graficamente de Reclamos
hist(sqrt(telco$Reclamos))
En R para poder obtener la transformación, se debe utilizar la función exp() de Monto
exp(telco$Monto)
## [1] 2.457590e+39 3.647388e+41 5.329889e+49 1.084464e+46 9.889030e+42
## [6] 1.490604e+39 3.274797e+44 1.101416e+40 6.543686e+47 8.117410e+41
## [11] 2.229476e+38 1.498331e+37 9.040970e+38 3.675784e+38 3.025077e+36
## [16] 3.694838e+36 6.680423e+39 1.517823e+32 3.675784e+38 6.044697e+39
## [21] 5.540622e+34 2.463952e+38 1.236280e+34 2.229476e+38 1.498331e+37
## [26] 3.009477e+38 1.668803e+34 2.744288e+35 3.017267e+37 2.476724e+36
## [31] 2.457590e+39 3.647388e+41 1.334879e+43 1.502209e+36 1.806563e+42
## [36] 9.111358e+35 8.994347e+40 8.117410e+41 1.668803e+34 3.283274e+43
## [41] 6.749860e+35 7.459749e+35 1.098572e+41 3.017267e+37 2.229476e+38
## [46] 2.241032e+36 2.476724e+36 3.343227e+36 1.359255e+36 6.732433e+36
## [51] 8.947965e+42 5.302404e+51 8.054701e+44 3.968946e+47 1.084464e+46
## [56] 4.898069e+43 5.399228e+44 9.762033e+47 6.526792e+48 2.457590e+39
## [61] 7.213221e+48 1.081664e+47 1.084464e+46 9.889030e+42 1.490604e+39
## [66] 3.274797e+44 1.101416e+40 6.543686e+47 8.117410e+41 9.017628e+39
## [71] 3.694838e+36 2.463952e+38 1.236280e+34 2.229476e+38 1.498331e+37
## [76] 3.009477e+38 1.668803e+34 2.744288e+35 3.017267e+37 2.476724e+36
En R para poder obtener la transformación, se debe utilizar la función exp() de Minutos
exp(telco$Minutos)
## [1] 2.146436e+14 5.849720e+13 2.129854e+17 1.168889e+17 2.608143e+16
## [6] 9.520700e+18 1.049484e+20 2.135367e+16 1.739275e+18 4.727839e+18
## [11] 2.860176e+19 2.135367e+16 5.320482e+11 1.181038e+13 3.227036e+11
## [16] 1.952243e+12 7.916735e+12 1.301879e+14 5.334254e+10 5.895263e+10
## [21] 9.644558e+13 3.941510e+11 4.333579e+13 3.210394e+13 3.941510e+11
## [26] 6.448249e+14 6.498452e+11 2.157562e+12 3.475412e+21 2.635252e+12
## [31] 1.581919e+16 5.279380e+14 3.185593e+16 1.288487e+18 4.300101e+16
## [36] 1.278533e+21 2.621663e+14 6.398435e+17 1.288487e+18 1.063137e+15
## [41] 4.255865e+20 1.586013e+15 9.594822e+15 8.614685e+18 1.937161e+15
## [46] 2.608143e+16 2.359945e+16 1.054924e+18 6.998620e+21 3.911061e+14
## [51] 3.831008e+22 7.734672e+21 5.265750e+15 1.927172e+17 1.030663e+27
## [56] 6.365439e+19 1.278533e+21 1.743777e+17 1.450001e+11 3.511536e+17
## [61] 1.281842e+20 8.614685e+18 9.398432e+23 9.570051e+16 2.568001e+22
## [66] 1.420321e+19 8.659340e+16 4.679204e+22 1.897511e+23 5.819554e+15
## [71] 3.575657e+10 6.464940e+13 2.146436e+14 5.849720e+13 4.333579e+13
## [76] 4.814172e+11 3.982478e+07 9.669522e+12 5.819554e+15 6.464940e+13
En R para poder obtener la transformación, se debe utilizar la función exp() de Reclamos
exp(telco$Reclamos)
## [1] 148.413159 7.389056 148.413159 20.085537 7.389056
## [6] 20.085537 403.428793 7.389056 20.085537 2.718282
## [11] 20.085537 20.085537 7.389056 2.718282 54.598150
## [16] 20.085537 20.085537 54.598150 54.598150 2.718282
## [21] 20.085537 2.718282 1096.633158 20.085537 20.085537
## [26] 7.389056 54.598150 148.413159 2.718282 148.413159
## [31] 7.389056 20.085537 20.085537 54.598150 2.718282
## [36] 7.389056 7.389056 2.718282 54.598150 54.598150
## [41] 20.085537 2.718282 20.085537 2.718282 7.389056
## [46] 1096.633158 20.085537 148.413159 20.085537 1096.633158
## [51] 2980.957987 148.413159 2980.957987 54.598150 54.598150
## [56] 20.085537 1096.633158 148.413159 7.389056 20.085537
## [61] 2980.957987 54.598150 2980.957987 59874.141715 148.413159
## [66] 54.598150 8103.083928 2980.957987 20.085537 7.389056
## [71] 2.718282 2.718282 20.085537 54.598150 7.389056
## [76] 2.718282 7.389056 20.085537 7.389056 7.389056
Graficamente para Monto
hist(exp(telco$Monto))
Graficamente para Minutos
hist(exp(telco$Minutos))
Graficamente para Reclamos
hist(exp(telco$Reclamos))
Forma 2 para Monto:
Monto_exp <- exp(telco$Monto)
hist(Monto_exp)
Forma 2 para Minutos:
Minutos_exp <- exp(telco$Minutos)
hist(Minutos_exp)
Forma 2 para Reclamos:
Reclamos_exp <- exp(telco$Reclamos)
hist(Reclamos_exp)
Para la tranformación logarítmica, utiliza log (dentro de los argumentos se puede cambiar la base) Monto
log(telco$Monto)
## [1] 4.507557 4.561218 4.740575 4.663439 4.595120 4.502029 4.629863 4.523960
## [9] 4.701389 4.569543 4.480740 4.449685 4.496471 4.486387 4.430817 4.433195
## [17] 4.518522 4.305416 4.486387 4.517431 4.382027 4.481872 4.363099 4.480740
## [25] 4.449685 4.484132 4.366913 4.401829 4.457830 4.428433 4.507557 4.561218
## [33] 4.598146 4.422449 4.577799 4.416428 4.546481 4.569543 4.366913 4.607168
## [41] 4.412798 4.414010 4.548600 4.457830 4.480740 4.427239 4.428433 4.432007
## [49] 4.421247 4.440296 4.594109 4.779963 4.638605 4.696837 4.663439 4.611152
## [57] 4.634729 4.705016 4.722064 4.507557 4.722953 4.684905 4.663439 4.595120
## [65] 4.502029 4.629863 4.523960 4.701389 4.569543 4.521789 4.433195 4.481872
## [73] 4.363099 4.480740 4.449685 4.484132 4.366913 4.401829 4.457830 4.428433
Graficamente para Monto
hist(log(telco$Monto))
Cambiar la base para Monto
log(telco$Monto, base = 2)
## [1] 6.503031 6.580447 6.839204 6.727920 6.629357 6.495056 6.679480 6.526695
## [9] 6.782671 6.592457 6.464342 6.419539 6.487036 6.472488 6.392317 6.395748
## [17] 6.518850 6.211402 6.472488 6.517276 6.321928 6.465974 6.294621 6.464342
## [25] 6.419539 6.469235 6.300124 6.350497 6.431289 6.388878 6.503031 6.580447
## [33] 6.633722 6.380245 6.604368 6.371559 6.559186 6.592457 6.300124 6.646739
## [41] 6.366322 6.368070 6.562242 6.431289 6.464342 6.387156 6.388878 6.394034
## [49] 6.378512 6.405992 6.627899 6.896030 6.692092 6.776104 6.727920 6.652486
## [57] 6.686501 6.787903 6.812498 6.503031 6.813781 6.758889 6.727920 6.629357
## [65] 6.495056 6.679480 6.526695 6.782671 6.592457 6.523562 6.395748 6.465974
## [73] 6.294621 6.464342 6.419539 6.469235 6.300124 6.350497 6.431289 6.388878
Graficamente para Monto
hist(log(telco$Monto, base = 2))
Para la tranformación logarítmica, utiliza log (dentro de los argumentos se puede cambiar la base) Minutos
log(telco$Minutos)
## [1] 3.496508 3.456317 3.686376 3.671225 3.632309 3.777348 3.830813 3.627004
## [9] 3.737670 3.761200 3.802208 3.627004 3.295837 3.404525 3.277145 3.342862
## [17] 3.391147 3.481240 3.206803 3.210844 3.471966 3.284664 3.446808 3.437208
## [25] 3.284664 3.529297 3.303217 3.346389 3.903991 3.353407 3.618993 3.523415
## [33] 3.637586 3.730501 3.645450 3.883624 3.502550 3.713572 3.730501 3.543854
## [41] 3.860730 3.555348 3.605498 3.775057 3.561046 3.632309 3.629660 3.725693
## [49] 3.918005 3.514526 3.951244 3.919991 3.589059 3.683867 4.130355 3.819908
## [57] 3.883624 3.681351 3.246491 3.698830 3.835142 3.775057 4.010963 3.666122
## [65] 3.943522 3.786460 3.663562 3.955082 3.981549 3.591818 3.190476 3.459466
## [73] 3.496508 3.456317 3.446808 3.292126 2.862201 3.397858 3.591818 3.459466
Graficamente para Minutos
hist(log(telco$Minutos))
Cambiar la base para Minutos
log(telco$Minutos, base = 2)
## [1] 5.044394 4.986411 5.318317 5.296457 5.240314 5.449561 5.526695 5.232661
## [9] 5.392317 5.426265 5.485427 5.232661 4.754888 4.911692 4.727920 4.822730
## [17] 4.892391 5.022368 4.626439 4.632268 5.008989 4.738768 4.972693 4.958843
## [25] 4.738768 5.091700 4.765535 4.827819 5.632268 4.837943 5.221104 5.083213
## [33] 5.247928 5.381975 5.259272 5.602884 5.053111 5.357552 5.381975 5.112700
## [41] 5.569856 5.129283 5.201634 5.446256 5.137504 5.240314 5.236493 5.375039
## [49] 5.652486 5.070389 5.700440 5.655352 5.177918 5.314697 5.958843 5.510962
## [57] 5.602884 5.311067 4.683696 5.336283 5.532940 5.446256 5.786596 5.289097
## [65] 5.689299 5.462707 5.285402 5.705978 5.744161 5.181898 4.602884 4.990955
## [73] 5.044394 4.986411 4.972693 4.749534 4.129283 4.902074 5.181898 4.990955
Graficamente para Minutos
hist(log(telco$Minutos, base = 2))
Para la tranformación logarítmica, utiliza log (dentro de los argumentos se puede cambiar la base) Reclamos
log(telco$Reclamos)
## [1] 1.6094379 0.6931472 1.6094379 1.0986123 0.6931472 1.0986123 1.7917595
## [8] 0.6931472 1.0986123 0.0000000 1.0986123 1.0986123 0.6931472 0.0000000
## [15] 1.3862944 1.0986123 1.0986123 1.3862944 1.3862944 0.0000000 1.0986123
## [22] 0.0000000 1.9459101 1.0986123 1.0986123 0.6931472 1.3862944 1.6094379
## [29] 0.0000000 1.6094379 0.6931472 1.0986123 1.0986123 1.3862944 0.0000000
## [36] 0.6931472 0.6931472 0.0000000 1.3862944 1.3862944 1.0986123 0.0000000
## [43] 1.0986123 0.0000000 0.6931472 1.9459101 1.0986123 1.6094379 1.0986123
## [50] 1.9459101 2.0794415 1.6094379 2.0794415 1.3862944 1.3862944 1.0986123
## [57] 1.9459101 1.6094379 0.6931472 1.0986123 2.0794415 1.3862944 2.0794415
## [64] 2.3978953 1.6094379 1.3862944 2.1972246 2.0794415 1.0986123 0.6931472
## [71] 0.0000000 0.0000000 1.0986123 1.3862944 0.6931472 0.0000000 0.6931472
## [78] 1.0986123 0.6931472 0.6931472
Graficamente para Reclamos
hist(log(telco$Reclamos))
Cambiar la base para Reclamos
log(telco$Reclamos, base = 2)
## [1] 2.321928 1.000000 2.321928 1.584963 1.000000 1.584963 2.584963 1.000000
## [9] 1.584963 0.000000 1.584963 1.584963 1.000000 0.000000 2.000000 1.584963
## [17] 1.584963 2.000000 2.000000 0.000000 1.584963 0.000000 2.807355 1.584963
## [25] 1.584963 1.000000 2.000000 2.321928 0.000000 2.321928 1.000000 1.584963
## [33] 1.584963 2.000000 0.000000 1.000000 1.000000 0.000000 2.000000 2.000000
## [41] 1.584963 0.000000 1.584963 0.000000 1.000000 2.807355 1.584963 2.321928
## [49] 1.584963 2.807355 3.000000 2.321928 3.000000 2.000000 2.000000 1.584963
## [57] 2.807355 2.321928 1.000000 1.584963 3.000000 2.000000 3.000000 3.459432
## [65] 2.321928 2.000000 3.169925 3.000000 1.584963 1.000000 0.000000 0.000000
## [73] 1.584963 2.000000 1.000000 0.000000 1.000000 1.584963 1.000000 1.000000
Graficamente para Reclamos
hist(log(telco$Reclamos, base = 2))
#Obtener solo transformaciones de Monto
Monto_sqtr <- sqrt(telco$Monto)
Monto_exp <- exp(telco$Monto)
Monto_ln <- log(telco$Monto)
Monto_log2 <- log(telco$Monto, base = 2)
Monto_log5 <- log(telco$Monto, base = 5)
#Obtener solo transformaciones de Minutos
Minutos_sqtr <- sqrt(telco$Minutos)
Minutos_exp <- exp(telco$Minutos)
Minutos_ln <- log(telco$Minutos)
Minutos_log2 <- log(telco$Minutos, base = 2)
Minutos_log5 <- log(telco$Minutos, base = 5)
#Obtener solo transformaciones de Reclamos
Reclamos_sqtr <- sqrt(telco$Reclamos)
Reclamos_exp <- exp(telco$Reclamos)
Reclamos_ln <- log(telco$Reclamos)
Reclamos_log2 <- log(telco$Reclamos, base = 2)
Reclamos_log5 <- log(telco$Reclamos, base = 5)
Ver graficamente cada una Monto
par(mfrow=c(3,2))
hist(telco$Monto)
hist(Monto_sqtr)
hist(Monto_exp)
hist(Monto_ln)
hist(Monto_log2)
hist(Monto_log5)
Ver graficamente cada una Minutos
par(mfrow=c(3,2))
hist(telco$Minutos)
hist(Minutos_sqtr)
hist(Minutos_exp)
hist(Minutos_ln)
hist(Minutos_log2)
hist(Minutos_log5)
Ver graficamente cada una Reclamos
par(mfrow=c(3,2))
hist(telco$Reclamos)
hist(Reclamos_sqtr)
hist(Reclamos_exp)
hist(Reclamos_ln)
hist(Reclamos_log2)
hist(Reclamos_log5)
Este tipo de transformaciones, busca que los datos sean simetricos (distribuicion forma de campana) o que se cumpla el supuesto estadistico
La visualizacion de distribucion puede mejorarse con la grafica de densidad
#Para Montos
par(mfrow=c(3,2))
plot(density(telco$Monto), main = "Distribución de montos originales")
plot(density(Monto_sqtr), main = "Distribución de montos transformadas - sqrt")
plot(density(Monto_exp), main = "Distribución de montos transformadas - exp")
plot(density(Monto_ln), main = "Distribución de montos transformadas - ln")
plot(density(Monto_log2), main = "Distribución de montos transformadas - log2")
plot(density(Monto_log5), main = "Distribución de montos transformadas - log5")
#Para Minutos
par(mfrow=c(3,2))
plot(density(telco$Minutos), main = "Distribución de minutos originales")
plot(density(Minutos_sqtr), main = "Distribución de minutos transformadas - sqrt")
plot(density(Minutos_exp), main = "Distribución de minutos transformadas - exp")
plot(density(Minutos_ln), main = "Distribución de minutos transformadas - ln")
plot(density(Minutos_log2), main = "Distribución de minutos transformadas - log2")
plot(density(Minutos_log5), main = "Distribución de minutos transformadas - log5")
#Para Reclamos
par(mfrow=c(3,2))
plot(density(telco$Reclamos), main = "Distribución de reclamos originales")
plot(density(Reclamos_sqtr), main = "Distribución de reclamos transformadas - sqrt")
plot(density(Reclamos_exp), main = "Distribución de reclamos transformadas - exp")
plot(density(Reclamos_ln), main = "Distribución de reclamos transformadas - ln")
plot(density(Reclamos_log2), main = "Distribución de reclamos transformadas - log2")
plot(density(Reclamos_log5), main = "Distribución de reclamos transformadas - log5")
Se puede realizar un analisis general de las variables ariginales y cerificar su comportamiento, a partir de alli se puede aplicar la transfromación mas adecuado sungun su objetivo.
Grafica general
library(PerformanceAnalytics)
chart.Correlation(cor(telco[,4:8]), histogram = TRUE)