library(MVN)
## Warning: package 'MVN' was built under R version 4.3.3
data<- read.csv("Tasas.csv")
head(data)
## Paises TN TM EV EVM EVH
## 1 Afganistan 35.84 7.34 61.98 65.28 58.92
## 2 Albania 8.90 8.60 75.50 77.70 73.60
## 3 Alemania 8.80 12.70 80.70 83.30 78.40
## 4 Andorra 6.20 4.60 83.70 86.00 81.30
## 5 Angola 38.81 8.01 61.64 64.31 59.03
## 6 Antigua y Barbuda 12.12 6.37 78.50 80.94 75.78
# Mardia
Mardia <- mvn(data[,-1], mvnTest = "mardia")
Mardia$multivariateNormality
## Test Statistic p value Result
## 1 Mardia Skewness 201.323801536089 2.98182323473629e-25 NO
## 2 Mardia Kurtosis 4.20204709407368 2.64512086503021e-05 NO
## 3 MVN <NA> <NA> NO
# Henze-Zirkler
HZ <- mvn(data[,-1], mvnTest = "hz")
HZ$multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 2.294973 0 NO
# Royston
Royston <- mvn(data[,-1], mvnTest = "royston")
Royston$multivariateNormality
## Test H p value MVN
## 1 Royston 25.55369 1.527186e-06 NO
# Doornik Hansen
DH <- mvn(data[,-1], mvnTest = "dh")
DH$multivariateNormality
## Test E df p value MVN
## 1 Doornik-Hansen 239.7374 10 7.77862e-46 NO
# Despues de hacer varias pruebas multivariadas (mardia,royston, henze-zirkler y doornik- hansen) podemos observar y por ende concluir que los datos NO son normales
# Shapiro-Wilks
SW <- mvn(data[,-1], univariateTest = "SW",desc=T)
SW
## $multivariateNormality
## Test HZ p value MVN
## 1 Henze-Zirkler 2.294973 0 NO
##
## $univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Wilk TN 0.9233 <0.001 NO
## 2 Shapiro-Wilk TM 0.9589 <0.001 NO
## 3 Shapiro-Wilk EV 0.9776 0.0034 NO
## 4 Shapiro-Wilk EVM 0.9707 4e-04 NO
## 5 Shapiro-Wilk EVH 0.9804 0.008 NO
##
## $Descriptives
## n Mean Std.Dev Median Min Max 25th 75th Skew
## TN 194 19.06469 9.872522 16.885 5.00 45.29 10.4250 27.2400 0.65598906
## TM 194 8.57701 3.074472 8.090 1.31 18.40 6.6150 9.9975 0.72290852
## EV 194 71.31052 7.764204 71.860 52.53 85.60 65.6700 76.8050 -0.23215863
## EVM 194 74.01974 7.876189 75.165 53.07 87.90 68.4375 79.5975 -0.39876259
## EVH 194 68.73345 7.767232 68.700 50.37 84.10 63.0850 73.6750 -0.07285581
## Kurtosis
## TN -0.6175703
## TM 0.6721286
## EV -0.6532162
## EVM -0.5952345
## EVH -0.6863484
# Shapiro-Francia
SF <- mvn(data[,-1], univariateTest = "SF",desc=T)
SF$univariateNormality
## Test Variable Statistic p value Normality
## 1 Shapiro-Francia TN 0.9267 <0.001 NO
## 2 Shapiro-Francia TM 0.9588 1e-04 NO
## 3 Shapiro-Francia EV 0.9809 0.0112 NO
## 4 Shapiro-Francia EVM 0.9736 0.0015 NO
## 5 Shapiro-Francia EVH 0.9835 0.0237 NO
# Anderson Darling
AD <- mvn(data[,-1], univariateTest = "AD",desc=T)
AD$univariateNormality
## Test Variable Statistic p value Normality
## 1 Anderson-Darling TN 4.9804 <0.001 NO
## 2 Anderson-Darling TM 2.6962 <0.001 NO
## 3 Anderson-Darling EV 0.9374 0.0172 NO
## 4 Anderson-Darling EVM 1.5290 6e-04 NO
## 5 Anderson-Darling EVH 0.8141 0.0347 NO
#EJERCICIO 2 #para este ejericio utilizare el año 2007
library(gapminder)
## Warning: package 'gapminder' was built under R version 4.3.3
library(devtools)
## Warning: package 'devtools' was built under R version 4.3.3
## Loading required package: usethis
## Warning: package 'usethis' was built under R version 4.3.3
devtools::install_github("jennybc/gapminder")
## Downloading GitHub repo jennybc/gapminder@HEAD
##
## ── R CMD build ─────────────────────────────────────────────────────────────────
## checking for file 'C:\Users\polic\AppData\Local\Temp\RtmpURBNzt\remotes6ffc53333894\jennybc-gapminder-b895872/DESCRIPTION' ... ✔ checking for file 'C:\Users\polic\AppData\Local\Temp\RtmpURBNzt\remotes6ffc53333894\jennybc-gapminder-b895872/DESCRIPTION' (519ms)
## ─ preparing 'gapminder': (1.9s)
## checking DESCRIPTION meta-information ... checking DESCRIPTION meta-information ... ✔ checking DESCRIPTION meta-information
## ─ checking for LF line-endings in source and make files and shell scripts (357ms)
## ─ checking for empty or unneeded directories
## ─ building 'gapminder_1.0.0.9000.tar.gz'
##
##
## Warning: package 'gapminder' is in use and will not be installed
gapminder <- gapminder::gapminder
Data <- data.frame(
pob = gapminder$pop[gapminder$year == 2007],
lf = gapminder$lifeExp[gapminder$year == 2007],
gdp = gapminder$gdpPercap[gapminder$year == 2007])
library(bestNormalize)
## Warning: package 'bestNormalize' was built under R version 4.3.3
SW1 <- shapiro.test(Data$pob)
SW1
##
## Shapiro-Wilk normality test
##
## data: Data$pob
## W = 0.25267, p-value < 2.2e-16
SW2 <- shapiro.test(Data$lf)
SW2
##
## Shapiro-Wilk normality test
##
## data: Data$lf
## W = 0.89467, p-value = 1.357e-08
SW3 <- shapiro.test(Data$gdp)
SW3
##
## Shapiro-Wilk normality test
##
## data: Data$gdp
## W = 0.80644, p-value = 2.039e-12
transformaciones
library(bestNormalize)
best_trans1 <- bestNormalize(Data$pob)
best_trans1
## Best Normalizing transformation with 142 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 0.9109
## - Box-Cox: 0.9493
## - Center+scale: 7.2895
## - Double Reversed Log_b(x+a): 6.6665
## - Log_b(x+a): 0.9109
## - orderNorm (ORQ): 1.1204
## - sqrt(x + a): 2.8248
## - Yeo-Johnson: 0.9493
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## Standardized asinh(x) Transformation with 142 nonmissing obs.:
## Relevant statistics:
## - mean (before standardization) = 16.96914
## - sd (before standardization) = 1.525595
best_trans2 <- bestNormalize(Data$lf)
best_trans2
## Best Normalizing transformation with 142 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 3.4491
## - Box-Cox: 1.9288
## - Center+scale: 2.6217
## - Double Reversed Log_b(x+a): 1.5337
## - Exp(x): 14.2968
## - Log_b(x+a): 3.4491
## - orderNorm (ORQ): 1.3996
## - sqrt(x + a): 3.0476
## - Yeo-Johnson: 1.717
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## orderNorm Transformation with 142 nonmissing obs and no ties
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 39.613 57.160 71.935 76.413 82.603
best_trans3 <- bestNormalize(Data$gdp)
best_trans3
## Best Normalizing transformation with 142 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 1.3642
## - Box-Cox: 1.3611
## - Center+scale: 4.1695
## - Double Reversed Log_b(x+a): 5.1676
## - Log_b(x+a): 1.3642
## - orderNorm (ORQ): 1.2587
## - sqrt(x + a): 1.971
## - Yeo-Johnson: 1.3611
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## orderNorm Transformation with 142 nonmissing obs and no ties
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 277.552 1624.842 6124.371 18008.836 49357.190
library(MVN)
SW1_trans <- shapiro.test(best_trans1$x.t)
SW1_trans
##
## Shapiro-Wilk normality test
##
## data: best_trans1$x.t
## W = 0.99249, p-value = 0.6593
SW2_trans <- shapiro.test(best_trans2$x.t)
SW2_trans
##
## Shapiro-Wilk normality test
##
## data: best_trans2$x.t
## W = 0.99968, p-value = 1
SW3_trans <- shapiro.test(best_trans3$x.t)
SW3_trans
##
## Shapiro-Wilk normality test
##
## data: best_trans3$x.t
## W = 0.99968, p-value = 1
library(bestNormalize)
data("autotrader")
names(autotrader)
## [1] "Car_Info" "Link" "Make" "Year" "Location" "Radius"
## [7] "price" "mileage" "status" "model"
Data3 <- data.frame(
kilom= autotrader$mileage,
ant = autotrader$Year,
precio = autotrader$price)
library(nortest)
AD_kilom <- ad.test(Data3$kilom)
AD_ant <- ad.test(Data3$ant)
AD_precio <- ad.test(Data3$precio)
los datos no son normales.
library(bestNormalize)
best_transk <- bestNormalize(Data3$kilom)
## Warning: `progress_estimated()` was deprecated in dplyr 1.0.0.
## ℹ The deprecated feature was likely used in the bestNormalize package.
## Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
best_transk
## Best Normalizing transformation with 6283 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 3.388
## - Box-Cox: 3.0709
## - Center+scale: 14.8621
## - Double Reversed Log_b(x+a): 23.1172
## - Log_b(x+a): 3.3852
## - orderNorm (ORQ): 1.1028
## - sqrt(x + a): 5.0991
## - Yeo-Johnson: 3.0674
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## orderNorm Transformation with 6283 nonmissing obs and ties
## - 6077 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2 29099 44800 88950 325556
best_transa <- bestNormalize(Data3$ant)
best_transa
## Best Normalizing transformation with 6283 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 83.5506
## - Box-Cox: 83.5506
## - Center+scale: 83.5506
## - Double Reversed Log_b(x+a): 83.3981
## - Log_b(x+a): 83.5506
## - orderNorm (ORQ): 81.6037
## - sqrt(x + a): 83.5506
## - Yeo-Johnson: 83.5506
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## orderNorm Transformation with 6283 nonmissing obs and ties
## - 17 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 2000 2010 2013 2014 2016
best_transp <- bestNormalize(Data3$precio)
best_transp
## Best Normalizing transformation with 6283 Observations
## Estimated Normality Statistics (Pearson P / df, lower => more normal):
## - arcsinh(x): 4.1032
## - Box-Cox: 2.2162
## - Center+scale: 3.4596
## - Double Reversed Log_b(x+a): 6.3819
## - Log_b(x+a): 4.1032
## - orderNorm (ORQ): 1.0972
## - sqrt(x + a): 2.2029
## - Yeo-Johnson: 2.2165
## Estimation method: Out-of-sample via CV with 10 folds and 5 repeats
##
## Based off these, bestNormalize chose:
## orderNorm Transformation with 6283 nonmissing obs and ties
## - 2465 unique values
## - Original quantiles:
## 0% 25% 50% 75% 100%
## 722 11499 15998 21497 64998
trans_kilom <- ad.test(best_transk$x.t)
trans_kilom
##
## Anderson-Darling normality test
##
## data: best_transk$x.t
## A = 0.00032915, p-value = 1
trans_ant <- ad.test(best_transa$x.t)
trans_ant
##
## Anderson-Darling normality test
##
## data: best_transa$x.t
## A = 98.365, p-value < 2.2e-16
trans_precio <- ad.test(best_transp$x.t)
trans_precio
##
## Anderson-Darling normality test
##
## data: best_transp$x.t
## A = 0.01806, p-value = 1
Concluimos que despues de la transformacion, 2 de los datos son normales ya que una de ellas no se pudo transformar.