# setwd("C:/CURSO REG JMG") #Ajuste del directorio de trabajo
data ("airquality") # Conjunto de datos
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Los datos #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Datos<-airquality
head(Datos) # Primeros datos
## Ozone Solar.R Wind Temp Month Day
## 1 41 190 7.4 67 5 1
## 2 36 118 8.0 72 5 2
## 3 12 149 12.6 74 5 3
## 4 18 313 11.5 62 5 4
## 5 NA NA 14.3 56 5 5
## 6 28 NA 14.9 66 5 6
View(Datos) # ver los datos en una tabla
str(Datos) # ver la estructura de los datos
## 'data.frame': 153 obs. of 6 variables:
## $ Ozone : int 41 36 12 18 NA 28 23 19 8 NA ...
## $ Solar.R: int 190 118 149 313 NA NA 299 99 19 194 ...
## $ Wind : num 7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
## $ Temp : int 67 72 74 62 56 66 65 59 61 69 ...
## $ Month : int 5 5 5 5 5 5 5 5 5 5 ...
## $ Day : int 1 2 3 4 5 6 7 8 9 10 ...
attach(Datos) # Comando crucial
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Visualizando los datos #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
PlotMiss(Datos, main="Mi Primera figura", clust = FALSE)
Datos<-Datos %>% drop_na() #Eliminar celdas con NA
PlotMiss(Datos, main="Datos raw", clust = FALSE)
attach(Datos)
## The following objects are masked from Datos (pos = 3):
##
## Day, Month, Ozone, Solar.R, Temp, Wind
plot(Datos[,1:4], col= "blue", cex=1.5, pch=16)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Analisis nuemrico de los datos #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
round(stat.desc(Datos [1:6]), digits = 5) #Estadistica descriptiva
## Ozone Solar.R Wind Temp Month Day
## nbr.val 111.00000 111.00000 111.00000 111.00000 111.00000 111.00000
## nbr.null 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
## nbr.na 0.00000 0.00000 0.00000 0.00000 0.00000 0.00000
## min 1.00000 7.00000 2.30000 57.00000 5.00000 1.00000
## max 168.00000 334.00000 20.70000 97.00000 9.00000 31.00000
## range 167.00000 327.00000 18.40000 40.00000 4.00000 30.00000
## sum 4673.00000 20513.00000 1103.30000 8635.00000 801.00000 1770.00000
## median 31.00000 207.00000 9.70000 79.00000 7.00000 16.00000
## mean 42.09910 184.80180 9.93964 77.79279 7.21622 15.94595
## SE.mean 3.15842 8.65179 0.33768 0.90454 0.13985 0.82645
## CI.mean.0.95 6.25924 17.14582 0.66921 1.79260 0.27715 1.63783
## var 1107.29009 8308.74218 12.65732 90.82031 2.17101 75.81523
## std.dev 33.27597 91.15230 3.55771 9.52997 1.47343 8.70719
## coef.var 0.79042 0.49324 0.35793 0.12250 0.20418 0.54604
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Datos atipicos? #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Box plot vertical
Variable<-Temp # Cambiar la variable de INTERES para visualizar
boxplot(Variable , col = "white")
# Puntos
stripchart(Variable , # Datos
method = "jitter", # Ruido aleatorio
pch = 19, # Símbolo pch
col = 4, # Color del símbolo
vertical = TRUE, # Modo vertical
add = TRUE) # Agregar encima
boxplot(Datos, col = "pink")
Desc(Variable, plotit=TRUE)
## ------------------------------------------------------------------------------
## Variable (integer)
##
## length n NAs unique 0s mean meanCI'
## 111 111 0 39 0 77.79 76.00
## 100.0% 0.0% 0.0% 79.59
##
## .05 .10 .25 median .75 .90 .95
## 61.00 64.00 71.00 79.00 84.50 90.00 92.50
##
## range sd vcoef mad IQR skew kurt
## 40.00 9.53 0.12 10.38 13.50 -0.22 -0.71
##
## lowest : 57, 58, 59 (2), 61 (3), 62 (2)
## highest: 92 (3), 93 (2), 94 (2), 96, 97
##
## ' 95%-CI (classic)
Hipótesis H0: La muestra proviene de una distribución normal. H1: La muestra no proviene de una distribución normal.
Nivel de Significancia El nivel de significancia que se trabajará es de 0.05. Alfa=0.05 Criterio de Decisión Si P < Alfa Se rechaza Ho Si p >= Alfa No se rechaza Ho
hist(Variable)
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Pruebass de normalidad de los datos #%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
ad.test(Variable) # Prueba de Anderson-Darling
##
## Anderson-Darling normality test
##
## data: Variable
## A = 0.59884, p-value = 0.1174
cvm.test(Variable) # Prueba de Cramer-von Mises, muestras pequeñas
##
## Cramer-von Mises normality test
##
## data: Variable
## W = 0.10117, p-value = 0.1069
lillie.test(Variable) # Prueba de Lilliefors (Kolmogorov-Smirnov) muestras grandes
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: Variable
## D = 0.091227, p-value = 0.02385
sf.test(Variable) # Prueba de Shapiro-Francia muestras pequeñas
##
## Shapiro-Francia normality test
##
## data: Variable
## W = 0.98457, p-value = 0.1986
frosini.norm.test(Variable) # Prueba de Frosini
##
## Frosini test for normality
##
## data: Variable
## B = 0.2471, p-value = 0.142
hegazy1.norm.test(Variable, nrepl=2000) # Prueba de Hegazy-Green , simulación de Monte Carlo
##
## Hegazy-Green test for normality
##
## data: Variable
## T = 0.094333, p-value = 0.158
shapiro.test(Variable) # Prueba de Shapiro-Wilk, muestra pequeña
##
## Shapiro-Wilk normality test
##
## data: Variable
## W = 0.98007, p-value = 0.09569