En la actualiad, encontramos varias empresas de ceulares como lo puede ser Apple, Samsung, entre otras las cuales tienen a la venta diferentes gamas de celulares, en los cuales se encuentran la gama baja, media, alta y muy alta, las cuales se clasifican de esa manera dependiendo de la velocidad del microprocesador, los megapixeles de las cĂĄmaras tanto frontal como de la trasera, el nĂșmero de nucleos del procesador, la ram, el tiempo de carga y demĂĄs variables. Por lo que en el presente anĂĄlisis buscamos predecir el rango de precios segĂșn sus categorĂas de una base de datos la cual recopilĂł datos de ventas de telĂ©fonos mĂłviles de varias empresas.
DescripciĂłn de Variables
battery_power: EnergĂa total que una baterĂa puede almacenar en un perĂodo de tiempo medido en mAh. blue: Tiene bluetooth o no clock_speed: Velocidad a la que el microprocesador ejecuta las instrucciones. int_memory: Memoria interna en gigabytes. n_cores: NĂșmero de nĂșcleos del procesador. ram: Memoria de acceso aleatorio en megabytes. dual_sim: Tiene soporte dual sim o no. three_g: Tiene 3G o no. four_g: Tiene 4G o no. fc: MegapĂxeles de la cĂĄmara frontal. pc: MegapĂxeles de la cĂĄmara principal. px_height: ResoluciĂłn de pĂxeles Altura. px_width: Ancho de resoluciĂłn de pĂxeles. m_dep: Profundidad mĂłvil en cm. mobile_wt: Peso del telĂ©fono mĂłvil. sc_h: Altura de la pantalla del mĂłvil en cm. sc_w: Ancho de pantalla del mĂłvil en cm. talk_time: El tiempo mĂĄs largo que durarĂĄ una sola carga de baterĂa. touch_screen: Tiene pantalla tĂĄctil o no wifi: Tiene wifi o no price_range: Esta es la variable objetivo con valor 0 (costo bajo), 1 (costo medio), 2 (costo alto) y 3 (costo muy alto).
Importamos la base de datos y renombramos para trabajar en un anĂĄlisis descriptivo
celulares <- read_delim("celulares.csv",
delim = ";", escape_double = FALSE, trim_ws = TRUE,show_col_types = FALSE)
data=celulares
head(data)
## # A tibble: 6 Ă 21
## battery_power bluetooth clock_speed int_memory n_cores ram dual_sim three_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 842 0 2.2 7 2 2549 0 0
## 2 1021 1 0.5 53 3 2631 1 1
## 3 563 1 0.5 41 5 2603 1 1
## 4 615 1 2.5 10 6 2769 0 1
## 5 1821 1 1.2 44 2 1411 0 1
## 6 1859 0 0.5 22 1 1067 1 1
## # âč 13 more variables: four_g <dbl>, fc <dbl>, pc <dbl>, px_height <dbl>,
## # px_width <dbl>, m_dep <dbl>, mobile_wt <dbl>, sc_h <dbl>, sc_w <dbl>,
## # talk_time <dbl>, touch_screen <dbl>, wifi <dbl>, price_range <dbl>
tail(data)
## # A tibble: 6 Ă 21
## battery_power bluetooth clock_speed int_memory n_cores ram dual_sim three_g
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 858 0 2.2 50 1 3978 0 1
## 2 794 1 0.5 2 6 668 1 1
## 3 1965 1 2.6 39 4 2032 1 1
## 4 1911 0 0.9 36 8 3057 1 1
## 5 1512 0 0.9 46 5 869 0 1
## 6 510 1 2 45 6 3919 1 1
## # âč 13 more variables: four_g <dbl>, fc <dbl>, pc <dbl>, px_height <dbl>,
## # px_width <dbl>, m_dep <dbl>, mobile_wt <dbl>, sc_h <dbl>, sc_w <dbl>,
## # talk_time <dbl>, touch_screen <dbl>, wifi <dbl>, price_range <dbl>
dim(data) # La dimensiĂłn de los datos. La cual es de 2000 observaciones y 21 variables
## [1] 2000 21
str(data) # Revisamos los datos faltantes
## spc_tbl_ [2,000 Ă 21] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ battery_power: num [1:2000] 842 1021 563 615 1821 ...
## $ bluetooth : num [1:2000] 0 1 1 1 1 0 0 0 1 1 ...
## $ clock_speed : num [1:2000] 2.2 0.5 0.5 2.5 1.2 0.5 1.7 0.5 0.5 0.6 ...
## $ int_memory : num [1:2000] 7 53 41 10 44 22 10 24 53 9 ...
## $ n_cores : num [1:2000] 2 3 5 6 2 1 8 4 7 5 ...
## $ ram : num [1:2000] 2549 2631 2603 2769 1411 ...
## $ dual_sim : num [1:2000] 0 1 1 0 0 1 0 1 0 1 ...
## $ three_g : num [1:2000] 0 1 1 1 1 1 1 1 1 1 ...
## $ four_g : num [1:2000] 0 1 1 0 1 0 1 0 0 1 ...
## $ fc : num [1:2000] 1 0 2 0 13 3 4 0 0 2 ...
## $ pc : num [1:2000] 2 6 6 9 14 7 10 0 14 15 ...
## $ px_height : num [1:2000] 20 905 1263 1216 1208 ...
## $ px_width : num [1:2000] 756 1988 1716 1786 1212 ...
## $ m_dep : num [1:2000] 0.6 0.7 0.9 0.8 0.6 0.7 0.8 0.8 0.7 0.1 ...
## $ mobile_wt : num [1:2000] 188 136 145 131 141 164 139 187 174 93 ...
## $ sc_h : num [1:2000] 9 17 11 16 8 17 13 16 17 19 ...
## $ sc_w : num [1:2000] 7 3 2 8 2 1 8 3 1 10 ...
## $ talk_time : num [1:2000] 19 7 9 11 15 10 18 5 20 12 ...
## $ touch_screen : num [1:2000] 0 1 1 0 1 0 0 1 0 0 ...
## $ wifi : num [1:2000] 1 0 0 0 0 0 1 1 0 0 ...
## $ price_range : num [1:2000] 1 2 2 2 1 1 3 0 0 0 ...
## - attr(*, "spec")=
## .. cols(
## .. battery_power = col_double(),
## .. bluetooth = col_double(),
## .. clock_speed = col_double(),
## .. int_memory = col_double(),
## .. n_cores = col_double(),
## .. ram = col_double(),
## .. dual_sim = col_double(),
## .. three_g = col_double(),
## .. four_g = col_double(),
## .. fc = col_double(),
## .. pc = col_double(),
## .. px_height = col_double(),
## .. px_width = col_double(),
## .. m_dep = col_double(),
## .. mobile_wt = col_double(),
## .. sc_h = col_double(),
## .. sc_w = col_double(),
## .. talk_time = col_double(),
## .. touch_screen = col_double(),
## .. wifi = col_double(),
## .. price_range = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
colSums(is.na(data)) # Contamos los valores NA en cada columna del marco de datos.
## battery_power bluetooth clock_speed int_memory n_cores
## 0 0 0 0 0
## ram dual_sim three_g four_g fc
## 0 0 0 0 0
## pc px_height px_width m_dep mobile_wt
## 0 0 0 0 0
## sc_h sc_w talk_time touch_screen wifi
## 0 0 0 0 0
## price_range
## 0
attach(data) # Con la funciĂłn attach nos ayuda a fijar las variables
summary(data) # Con la funciĂłn summary podemos revisar si hay datos faltantes
## battery_power bluetooth clock_speed int_memory
## Min. : 501.0 Min. :0.000 Min. :0.500 Min. : 2.00
## 1st Qu.: 851.8 1st Qu.:0.000 1st Qu.:0.700 1st Qu.:16.00
## Median :1226.0 Median :0.000 Median :1.500 Median :32.00
## Mean :1238.5 Mean :0.495 Mean :1.522 Mean :32.05
## 3rd Qu.:1615.2 3rd Qu.:1.000 3rd Qu.:2.200 3rd Qu.:48.00
## Max. :1998.0 Max. :1.000 Max. :3.000 Max. :64.00
## n_cores ram dual_sim three_g
## Min. :1.000 Min. : 256 Min. :0.0000 Min. :0.0000
## 1st Qu.:3.000 1st Qu.:1208 1st Qu.:0.0000 1st Qu.:1.0000
## Median :4.000 Median :2146 Median :1.0000 Median :1.0000
## Mean :4.521 Mean :2124 Mean :0.5095 Mean :0.7615
## 3rd Qu.:7.000 3rd Qu.:3064 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :8.000 Max. :3998 Max. :1.0000 Max. :1.0000
## four_g fc pc px_height
## Min. :0.0000 Min. : 0.000 Min. : 0.000 Min. : 0.0
## 1st Qu.:0.0000 1st Qu.: 1.000 1st Qu.: 5.000 1st Qu.: 282.8
## Median :1.0000 Median : 3.000 Median :10.000 Median : 564.0
## Mean :0.5215 Mean : 4.309 Mean : 9.916 Mean : 645.1
## 3rd Qu.:1.0000 3rd Qu.: 7.000 3rd Qu.:15.000 3rd Qu.: 947.2
## Max. :1.0000 Max. :19.000 Max. :20.000 Max. :1960.0
## px_width m_dep mobile_wt sc_h
## Min. : 500.0 Min. :0.1000 Min. : 80.0 Min. : 5.00
## 1st Qu.: 874.8 1st Qu.:0.2000 1st Qu.:109.0 1st Qu.: 9.00
## Median :1247.0 Median :0.5000 Median :141.0 Median :12.00
## Mean :1251.5 Mean :0.5018 Mean :140.2 Mean :12.31
## 3rd Qu.:1633.0 3rd Qu.:0.8000 3rd Qu.:170.0 3rd Qu.:16.00
## Max. :1998.0 Max. :1.0000 Max. :200.0 Max. :19.00
## sc_w talk_time touch_screen wifi
## Min. : 0.000 Min. : 2.00 Min. :0.000 Min. :0.000
## 1st Qu.: 2.000 1st Qu.: 6.00 1st Qu.:0.000 1st Qu.:0.000
## Median : 5.000 Median :11.00 Median :1.000 Median :1.000
## Mean : 5.767 Mean :11.01 Mean :0.503 Mean :0.507
## 3rd Qu.: 9.000 3rd Qu.:16.00 3rd Qu.:1.000 3rd Qu.:1.000
## Max. :18.000 Max. :20.00 Max. :1.000 Max. :1.000
## price_range
## Min. :0.00
## 1st Qu.:0.75
## Median :1.50
## Mean :1.50
## 3rd Qu.:2.25
## Max. :3.00
De acuerdo al anĂĄlisis descriptivo encontramos que 6 variables son cualitativas nominales y cuantitativas continuas son 2 y las 13 variables restantes son cuantitativas discretas.
Variables cualitativas: Nominal - Ordinal
Inicialmente vamos a convertir variables nĂșmericas a factores
data$bluetooth =as.factor(data$bluetooth)
data$dual_sim = as.factor(data$dual_sim)
data$three_g = as.factor(data$three_g)
data$four_g = as.factor(data$four_g)
data$touch_screen = as.factor(data$touch_screen)
data$wifi = as.factor(data$wifi)
Barra1 <- ggplot(data, aes(x = bluetooth, fill= bluetooth)) +
geom_bar(width=0.5, colour="black") +
labs(x="bluetooth",y= "Frecuencia") +
ylim(c(0,1200))
Barra2 <- ggplot(data, aes(x = dual_sim, fill=dual_sim)) +
geom_bar(width=0.5, colour="black") +
labs(x="dual_sim",y= "Frecuencia") +
ylim(c(0,1200))
g1=plot_grid(Barra1, Barra2, nrow= 1, align = "h")
Barra6 <- ggplot(data, aes(x = touch_screen, fill= touch_screen)) +
geom_bar(width=0.5, colour="black") +
labs(x="touch_screen",y= "Frecuencia") +
ylim(c(0,1200))
Barra5 <- ggplot(data, aes(x = wifi, fill=wifi)) +
geom_bar(width=0.5, colour="black") +
labs(x="wifi",y= "Frecuencia") +
ylim(c(0,1200))
g2=plot_grid(Barra5, Barra6, nrow= 1, align = "h")
plot_grid(g1, g2, nrow = 2, ncol=1)
cat(
"[1] \"Variable: Doble sim\"\n"
)
## [1] "Variable: Doble sim"
freq(dual_sim)
## Frequencies
## dual_sim
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 981 49.05 49.05 49.05 49.05
## 1 1019 50.95 100.00 50.95 100.00
## <NA> 0 0.00 100.00
## Total 2000 100.00 100.00 100.00 100.00
cat(
"[2] \"Test de normalidad de Shapiro-Wilk: Doble sim\"\n"
)
## [2] "Test de normalidad de Shapiro-Wilk: Doble sim"
shapiro.test(dual_sim)
##
## Shapiro-Wilk normality test
##
## data: dual_sim
## W = 0.63648, p-value < 2.2e-16
cat(
"[3] \"Tabla de distribuciĂłn de frecuencias : Doble sim\"\n"
)
## [3] "Tabla de distribuciĂłn de frecuencias : Doble sim"
tabla_dft= fdt_cat(data$dual_sim)
tabla_dft
## Category f rf rf(%) cf cf(%)
## 1 1019 0.51 50.95 1019 50.95
## 0 981 0.49 49.05 2000 100.00
Tabla_dual_sim <- data %>% group_by(dual_sim) %>% summarise(Total=n()) %>%
dplyr::mutate(Porcentaje = round(Total/sum(Total)*100, 1)) #16
ggplot(Tabla_dual_sim, aes(x = dual_sim, y=Total,fill=dual_sim) ) +
geom_bar(width = 0.9, stat="identity", position = position_dodge())+
ylim(c(0,1200))+
labs(x="Doble sim", y= "Frecuencia \n (Porcentajes)") + #17
labs(fill = "")+
geom_text(aes(label=paste0(Total," ", "", "(", Porcentaje, "%",")")), #18
vjust=-0.9,
color="black",
hjust=0.5,
position = position_dodge(0.9),
angle=0,
size=4.0
) +
scale_fill_discrete(name = "El dispositivo tiene Doble Sim", labels = c("No", "Si")) + #19
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + #20
#theme_bw(base_size = 14) +
facet_wrap(~"Variable Doble Sim")
#plot_grid(Barra1, Barra2, nrow= 1, align = "h")
Al realizar el test de normalidad de Shapiro-Wilk obtenemos que el valor p es muy pequeño, lo que quiere decir que los datos no siguen una distribución normal.
cat(
"[1] \"Variable: bluetooth\"\n"
)
## [1] "Variable: bluetooth"
freq(bluetooth)
## Frequencies
## bluetooth
## Type: Numeric
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 1010 50.50 50.50 50.50 50.50
## 1 990 49.50 100.00 49.50 100.00
## <NA> 0 0.00 100.00
## Total 2000 100.00 100.00 100.00 100.00
cat(
"[2] \"Test de normalidad de Shapiro-Wilk: bluetooth\"\n"
)
## [2] "Test de normalidad de Shapiro-Wilk: bluetooth"
shapiro.test(bluetooth)
##
## Shapiro-Wilk normality test
##
## data: bluetooth
## W = 0.63658, p-value < 2.2e-16
cat(
"[3] \"Tabla de distribuciĂłn de frecuencias : bluetooth\"\n"
)
## [3] "Tabla de distribuciĂłn de frecuencias : bluetooth"
tabla_bluetooth= fdt_cat(data$bluetooth)
tabla_bluetooth
## Category f rf rf(%) cf cf(%)
## 0 1010 0.5 50.5 1010 50.5
## 1 990 0.5 49.5 2000 100.0
Tabla_bluetooth <- data %>% group_by(bluetooth) %>% summarise(Total=n()) %>%
dplyr::mutate(Porcentaje = round(Total/sum(Total)*100, 1)) #16
ggplot(Tabla_bluetooth, aes(x = bluetooth, y=Total,fill=bluetooth) ) +
geom_bar(width = 0.9, stat="identity", position = position_dodge())+
ylim(c(0,1200))+
labs(x="bluetooth", y= "Frecuencia \n (Porcentajes)") + #17
labs(fill = "")+
geom_text(aes(label=paste0(Total," ", "", "(", Porcentaje, "%",")")), #18
vjust=-0.9,
color="black",
hjust=0.5,
position = position_dodge(0.9),
angle=0,
size=4.0
) +
scale_fill_discrete(name = "El dispositivo tiene bluetooth", labels = c("No", "Si")) + #19
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + #20
#theme_bw(base_size = 14) +
facet_wrap(~"Variable bluetooth")
#plot_grid(Barra1, Barra2, nrow= 1, align = "h")
cat(
"[1] \"Variable: touch_screen \"\n"
)
## [1] "Variable: touch_screen "
freq(data$touch_screen )
## Frequencies
## data$touch_screen
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 994 49.70 49.70 49.70 49.70
## 1 1006 50.30 100.00 50.30 100.00
## <NA> 0 0.00 100.00
## Total 2000 100.00 100.00 100.00 100.00
cat(
"[2] \"Test de normalidad de Shapiro-Wilk: touch_screen \"\n"
)
## [2] "Test de normalidad de Shapiro-Wilk: touch_screen "
shapiro.test(touch_screen )
##
## Shapiro-Wilk normality test
##
## data: touch_screen
## W = 0.6366, p-value < 2.2e-16
cat(
"[3] \"Tabla de distribuciĂłn de frecuencias : touch_screen\"\n"
)
## [3] "Tabla de distribuciĂłn de frecuencias : touch_screen"
tabla_touch_screen= fdt_cat(data$touch_screen)
tabla_touch_screen
## Category f rf rf(%) cf cf(%)
## 1 1006 0.5 50.3 1006 50.3
## 0 994 0.5 49.7 2000 100.0
Tabla_touch_screen <- data %>% group_by(touch_screen ) %>% summarise(Total=n()) %>%
dplyr::mutate(Porcentaje = round(Total/sum(Total)*100, 1)) #16
ggplot(Tabla_touch_screen, aes(x = touch_screen, y=Total,fill=touch_screen) ) +
geom_bar(width = 0.9, stat="identity", position = position_dodge())+
ylim(c(0,1100))+
labs(x="touch_screen", y= "Frecuencia \n (Porcentajes)") + #17
labs(fill = "")+
geom_text(aes(label=paste0(Total," ", "", "(", Porcentaje, "%",")")), #18
vjust=-0.9,
color="black",
hjust=0.5,
position = position_dodge(0.9),
angle=0,
size=4.0
) +
scale_fill_discrete(name = "El dispositivo tiene pantalla tĂĄctil", labels = c("No", "Si")) + #19
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + #20
#theme_bw(base_size = 14) +
facet_wrap(~"Variable pantalla tĂĄctil")
#plot_grid(Barra1, Barra2, nrow= 1, align = "h")
cat(
"[1] \"Variable: wifi \"\n"
)
## [1] "Variable: wifi "
freq(data$wifi )
## Frequencies
## data$wifi
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 986 49.30 49.30 49.30 49.30
## 1 1014 50.70 100.00 50.70 100.00
## <NA> 0 0.00 100.00
## Total 2000 100.00 100.00 100.00 100.00
cat(
"[2] \"Test de normalidad de Shapiro-Wilk: wifi \"\n"
)
## [2] "Test de normalidad de Shapiro-Wilk: wifi "
shapiro.test(wifi )
##
## Shapiro-Wilk normality test
##
## data: wifi
## W = 0.63654, p-value < 2.2e-16
cat(
"[3] \"Tabla de distribuciĂłn de frecuencias : wifi\"\n"
)
## [3] "Tabla de distribuciĂłn de frecuencias : wifi"
tabla_wifi = fdt_cat(data$wifi)
tabla_wifi
## Category f rf rf(%) cf cf(%)
## 1 1014 0.51 50.7 1014 50.7
## 0 986 0.49 49.3 2000 100.0
Tabla_wifi <- data %>% group_by(wifi ) %>% summarise(Total=n()) %>%
dplyr::mutate(Porcentaje = round(Total/sum(Total)*100, 1)) #16
ggplot(Tabla_wifi, aes(x = wifi, y=Total,fill=wifi) ) +
geom_bar(width = 0.9, stat="identity", position = position_dodge())+
ylim(c(0,1100))+
labs(x="wifi", y= "Frecuencia \n (Porcentajes)") + #17
labs(fill = "")+
geom_text(aes(label=paste0(Total," ", "", "(", Porcentaje, "%",")")), #18
vjust=-0.9,
color="black",
hjust=0.5,
position = position_dodge(0.9),
angle=0,
size=4.0
) +
scale_fill_discrete(name = "El dispositivo tiene wifi", labels = c("No", "Si")) + #19
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + #20
#theme_bw(base_size = 14) +
facet_wrap(~"Variable wifi")
#plot_grid(Barra1, Barra2, nrow= 1, align = "h")
cat(
"[1] \"Variable: $three_g \"\n"
)
## [1] "Variable: $three_g "
freq(data$three_g )
## Frequencies
## data$three_g
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 477 23.85 23.85 23.85 23.85
## 1 1523 76.15 100.00 76.15 100.00
## <NA> 0 0.00 100.00
## Total 2000 100.00 100.00 100.00 100.00
cat(
"[2] \"Test de normalidad de Shapiro-Wilk: $three_g \"\n"
)
## [2] "Test de normalidad de Shapiro-Wilk: $three_g "
shapiro.test(three_g )
##
## Shapiro-Wilk normality test
##
## data: three_g
## W = 0.52845, p-value < 2.2e-16
cat(
"[3] \"Tabla de distribuciĂłn de frecuencias : three_g\"\n"
)
## [3] "Tabla de distribuciĂłn de frecuencias : three_g"
tabla_three_g = fdt_cat(data$three_g)
tabla_three_g
## Category f rf rf(%) cf cf(%)
## 1 1523 0.76 76.15 1523 76.15
## 0 477 0.24 23.85 2000 100.00
Tabla_three_g <- data %>% group_by(three_g ) %>% summarise(Total=n()) %>%
dplyr::mutate(Porcentaje = round(Total/sum(Total)*100, 1)) #16
ggplot(Tabla_three_g, aes(x = three_g, y=Total,fill=three_g) ) +
geom_bar(width = 0.9, stat="identity", position = position_dodge())+
ylim(c(0,1600))+
labs(x="three_g", y= "Frecuencia \n (Porcentajes)") + #17
labs(fill = "")+
geom_text(aes(label=paste0(Total," ", "", "(", Porcentaje, "%",")")), #18
vjust=-0.9,
color="black",
hjust=0.5,
position = position_dodge(0.9),
angle=0,
size=4.0
) +
scale_fill_discrete(name = "El dispositivo tiene 3G", labels = c("No", "Si")) + #19
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + #20
#theme_bw(base_size = 14) +
facet_wrap(~"Variable 3G")
#plot_grid(Barra1, Barra2, nrow= 1, align = "h")
cat(
"[1] \"Variable: four_g \"\n"
)
## [1] "Variable: four_g "
freq(data$four_g )
## Frequencies
## data$four_g
## Type: Factor
##
## Freq % Valid % Valid Cum. % Total % Total Cum.
## ----------- ------ --------- -------------- --------- --------------
## 0 957 47.85 47.85 47.85 47.85
## 1 1043 52.15 100.00 52.15 100.00
## <NA> 0 0.00 100.00
## Total 2000 100.00 100.00 100.00 100.00
cat(
"[2] \"Test de normalidad de Shapiro-Wilk: four_g \"\n"
)
## [2] "Test de normalidad de Shapiro-Wilk: four_g "
shapiro.test(four_g )
##
## Shapiro-Wilk normality test
##
## data: four_g
## W = 0.63594, p-value < 2.2e-16
cat(
"[3] \"Tabla de distribuciĂłn de frecuencias : four_g\"\n"
)
## [3] "Tabla de distribuciĂłn de frecuencias : four_g"
tabla_four_g = fdt_cat(data$four_g)
tabla_four_g
## Category f rf rf(%) cf cf(%)
## 1 1043 0.52 52.15 1043 52.15
## 0 957 0.48 47.85 2000 100.00
Tabla_four_g <- data %>% group_by(four_g ) %>% summarise(Total=n()) %>%
dplyr::mutate(Porcentaje = round(Total/sum(Total)*100, 1)) #16
ggplot(Tabla_four_g, aes(x = four_g, y=Total,fill=four_g) ) +
geom_bar(width = 0.9, stat="identity", position = position_dodge())+
ylim(c(0,1100))+
labs(x="four_g", y= "Frecuencia \n (Porcentajes)") + #17
labs(fill = "")+
geom_text(aes(label=paste0(Total," ", "", "(", Porcentaje, "%",")")), #18
vjust=-0.9,
color="black",
hjust=0.5,
position = position_dodge(0.9),
angle=0,
size=4.0
) +
scale_fill_discrete(name = "El dispositivo tiene 4G", labels = c("No", "Si")) + #19
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + #20
#theme_bw(base_size = 14) +
facet_wrap(~"Variable 4G")
#plot_grid(Barra1, Barra2, nrow= 1, align = "h")
cat(
"[1] \"Medidas descriptivas: ram \"\n"
)
## [1] "Medidas descriptivas: ram "
descr(data$ram)
## Descriptive Statistics
## data$ram
## N: 2000
##
## ram
## ----------------- ---------
## Mean 2124.21
## Std.Dev 1084.73
## Min 256.00
## Q1 1207.00
## Median 2146.50
## Q3 3065.00
## Max 3998.00
## MAD 1382.52
## IQR 1857.00
## CV 0.51
## Skewness 0.01
## SE.Skewness 0.05
## Kurtosis -1.19
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: ram \"\n"
)
## [2] "Tabla de frecuencia: ram "
tablas_freq_ram= fdt(data$ram)
tablas_freq_ram
## Class limits f rf rf(%) cf cf(%)
## [253.44,568.8183) 168 0.08 8.40 168 8.40
## [568.8183,884.1967) 176 0.09 8.80 344 17.20
## [884.1967,1199.575) 149 0.07 7.45 493 24.65
## [1199.575,1514.953) 195 0.10 9.75 688 34.40
## [1514.953,1830.332) 150 0.07 7.50 838 41.90
## [1830.332,2145.71) 161 0.08 8.05 999 49.95
## [2145.71,2461.088) 188 0.09 9.40 1187 59.35
## [2461.088,2776.467) 179 0.09 8.95 1366 68.30
## [2776.467,3091.845) 145 0.07 7.25 1511 75.55
## [3091.845,3407.223) 164 0.08 8.20 1675 83.75
## [3407.223,3722.602) 176 0.09 8.80 1851 92.55
## [3722.602,4037.98) 149 0.07 7.45 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: ram \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: ram "
shapiro.test(ram)
##
## Shapiro-Wilk normality test
##
## data: ram
## W = 0.95462, p-value < 2.2e-16
Res_crimen = EDA(data$ram)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median
## 2000.000 0.000 256.000 1207.500 2124.213 2146.500
## TrMean 3rd Qu Max Stdev Var SE Mean
## 2123.541 3064.500 3998.000 1084.732 1176643.606 24.255
## I.Q.R. Range Kurtosis Skewness SW p-val
## 1857.000 3742.000 -1.194 0.007 0.000
cat(
"[1] \"Medidas descriptivas: battery_power \"\n"
)
## [1] "Medidas descriptivas: battery_power "
descr(data$battery_power)
## Descriptive Statistics
## data$battery_power
## N: 2000
##
## battery_power
## ----------------- ---------------
## Mean 1238.52
## Std.Dev 439.42
## Min 501.00
## Q1 851.50
## Median 1226.00
## Q3 1615.50
## Max 1998.00
## MAD 566.35
## IQR 763.50
## CV 0.35
## Skewness 0.03
## SE.Skewness 0.05
## Kurtosis -1.23
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: battery_power \"\n"
)
## [2] "Tabla de frecuencia: battery_power "
tablas_freq_battery_power= fdt(data$battery_power)
tablas_freq_battery_power
## Class limits f rf rf(%) cf cf(%)
## [495.99,622.8225) 184 0.09 9.20 184 9.20
## [622.8225,749.655) 175 0.09 8.75 359 17.95
## [749.655,876.4875) 178 0.09 8.90 537 26.85
## [876.4875,1003.32) 166 0.08 8.30 703 35.15
## [1003.32,1130.153) 165 0.08 8.25 868 43.40
## [1130.153,1256.985) 162 0.08 8.10 1030 51.50
## [1256.985,1383.818) 163 0.08 8.15 1193 59.65
## [1383.818,1510.65) 156 0.08 7.80 1349 67.45
## [1510.65,1637.483) 174 0.09 8.70 1523 76.15
## [1637.483,1764.315) 166 0.08 8.30 1689 84.45
## [1764.315,1891.148) 167 0.08 8.35 1856 92.80
## [1891.148,2017.98) 144 0.07 7.20 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: battery_power \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: battery_power "
shapiro.test(battery_power)
##
## Shapiro-Wilk normality test
##
## data: battery_power
## W = 0.95188, p-value < 2.2e-16
Res_crimen = EDA(data$battery_power)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean
## 2000.000 0.000 501.000 851.750 1238.518 1226.000 1237.268
## 3rd Qu Max Stdev Var SE Mean I.Q.R. Range
## 1615.250 1998.000 439.418 193088.360 9.826 763.500 1497.000
## Kurtosis Skewness SW p-val
## -1.226 0.032 0.000
cat(
"[1] \"Medidas descriptivas: clock_speed \"\n"
)
## [1] "Medidas descriptivas: clock_speed "
descr(data$clock_speed)
## Descriptive Statistics
## data$clock_speed
## N: 2000
##
## clock_speed
## ----------------- -------------
## Mean 1.52
## Std.Dev 0.82
## Min 0.50
## Q1 0.70
## Median 1.50
## Q3 2.20
## Max 3.00
## MAD 1.19
## IQR 1.50
## CV 0.54
## Skewness 0.18
## SE.Skewness 0.05
## Kurtosis -1.32
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: clock_speed \"\n"
)
## [2] "Tabla de frecuencia: clock_speed "
tablas_freq_clock_speed= fdt(data$clock_speed)
tablas_freq_clock_speed
## Class limits f rf rf(%) cf cf(%)
## [0.495,0.7062) 551 0.28 27.55 551 27.55
## [0.7062,0.9175) 116 0.06 5.80 667 33.35
## [0.9175,1.129) 112 0.06 5.60 779 38.95
## [1.129,1.34) 124 0.06 6.20 903 45.15
## [1.34,1.551) 137 0.07 6.85 1040 52.00
## [1.551,1.762) 136 0.07 6.80 1176 58.80
## [1.762,1.974) 127 0.06 6.35 1303 65.15
## [1.974,2.185) 143 0.07 7.15 1446 72.30
## [2.185,2.396) 137 0.07 6.85 1583 79.15
## [2.396,2.607) 187 0.09 9.35 1770 88.50
## [2.607,2.819) 140 0.07 7.00 1910 95.50
## [2.819,3.03) 90 0.04 4.50 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: clock_speed \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: clock_speed "
shapiro.test(clock_speed)
##
## Shapiro-Wilk normality test
##
## data: clock_speed
## W = 0.91123, p-value < 2.2e-16
Res_crimen = EDA(data$clock_speed)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean 3rd Qu
## 2000.000 0.000 0.500 0.700 1.522 1.500 1.502 2.200
## Max Stdev Var SE Mean I.Q.R. Range Kurtosis Skewness
## 3.000 0.816 0.666 0.018 1.500 2.500 -1.325 0.178
## SW p-val
## 0.000
cat(
"[1] \"Medidas descriptivas: int_memory \"\n"
)
## [1] "Medidas descriptivas: int_memory "
descr(data$int_memory)
## Descriptive Statistics
## data$int_memory
## N: 2000
##
## int_memory
## ----------------- ------------
## Mean 32.05
## Std.Dev 18.15
## Min 2.00
## Q1 16.00
## Median 32.00
## Q3 48.00
## Max 64.00
## MAD 23.72
## IQR 32.00
## CV 0.57
## Skewness 0.06
## SE.Skewness 0.05
## Kurtosis -1.22
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: int_memory \"\n"
)
## [2] "Tabla de frecuencia: int_memory "
tablas_freq_int_memory= fdt(data$int_memory)
tablas_freq_int_memory
## Class limits f rf rf(%) cf cf(%)
## [1.98,7.2017) 200 0.10 10.00 200 10.00
## [7.2017,12.423) 178 0.09 8.90 378 18.90
## [12.423,17.645) 177 0.09 8.85 555 27.75
## [17.645,22.867) 156 0.08 7.80 711 35.55
## [22.867,28.088) 192 0.10 9.60 903 45.15
## [28.088,33.31) 159 0.08 7.95 1062 53.10
## [33.31,38.532) 139 0.07 6.95 1201 60.05
## [38.532,43.753) 161 0.08 8.05 1362 68.10
## [43.753,48.975) 157 0.08 7.85 1519 75.95
## [48.975,54.197) 193 0.10 9.65 1712 85.60
## [54.197,59.418) 152 0.08 7.60 1864 93.20
## [59.418,64.64) 136 0.07 6.80 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: int_memory \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: int_memory "
shapiro.test(int_memory)
##
## Shapiro-Wilk normality test
##
## data: int_memory
## W = 0.95274, p-value < 2.2e-16
Res_crimen = EDA(data$int_memory)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean 3rd Qu
## 2000.000 0.000 2.000 16.000 32.047 32.000 31.953 48.000
## Max Stdev Var SE Mean I.Q.R. Range Kurtosis Skewness
## 64.000 18.146 329.267 0.406 32.000 62.000 -1.218 0.058
## SW p-val
## 0.000
cat(
"[1] \"Medidas descriptivas: n_cores \"\n"
)
## [1] "Medidas descriptivas: n_cores "
descr(data$n_cores)
## Descriptive Statistics
## data$n_cores
## N: 2000
##
## n_cores
## ----------------- ---------
## Mean 4.52
## Std.Dev 2.29
## Min 1.00
## Q1 3.00
## Median 4.00
## Q3 7.00
## Max 8.00
## MAD 2.97
## IQR 4.00
## CV 0.51
## Skewness 0.00
## SE.Skewness 0.05
## Kurtosis -1.23
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: n_cores \"\n"
)
## [2] "Tabla de frecuencia: n_cores "
tablas_freq_n_cores= fdt(data$n_cores)
tablas_freq_n_cores
## Class limits f rf rf(%) cf cf(%)
## [0.99,1.581) 242 0.12 12.10 242 12.10
## [1.581,2.172) 247 0.12 12.35 489 24.45
## [2.172,2.763) 0 0.00 0.00 489 24.45
## [2.763,3.353) 246 0.12 12.30 735 36.75
## [3.353,3.944) 0 0.00 0.00 735 36.75
## [3.944,4.535) 274 0.14 13.70 1009 50.45
## [4.535,5.126) 246 0.12 12.30 1255 62.75
## [5.126,5.717) 0 0.00 0.00 1255 62.75
## [5.717,6.308) 230 0.12 11.50 1485 74.25
## [6.308,6.898) 0 0.00 0.00 1485 74.25
## [6.898,7.489) 259 0.13 12.95 1744 87.20
## [7.489,8.08) 256 0.13 12.80 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: n_cores \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: n_cores "
shapiro.test(n_cores)
##
## Shapiro-Wilk normality test
##
## data: n_cores
## W = 0.92619, p-value < 2.2e-16
Res_crimen = EDA(data$n_cores)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean 3rd Qu
## 2000.000 0.000 1.000 3.000 4.521 4.000 4.523 7.000
## Max Stdev Var SE Mean I.Q.R. Range Kurtosis Skewness
## 8.000 2.288 5.234 0.051 4.000 7.000 -1.231 0.004
## SW p-val
## 0.000
cat(
"[1] \"Medidas descriptivas: MegapĂxeles de la cĂĄmara principal. \"\n"
)
## [1] "Medidas descriptivas: MegapĂxeles de la cĂĄmara principal. "
descr(data$pc)
## Descriptive Statistics
## data$pc
## N: 2000
##
## pc
## ----------------- ---------
## Mean 9.92
## Std.Dev 6.06
## Min 0.00
## Q1 5.00
## Median 10.00
## Q3 15.00
## Max 20.00
## MAD 7.41
## IQR 10.00
## CV 0.61
## Skewness 0.02
## SE.Skewness 0.05
## Kurtosis -1.17
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: pc \"\n"
)
## [2] "Tabla de frecuencia: pc "
tablas_freq_pc= fdt(data$pc)
tablas_freq_pc
## Class limits f rf rf(%) cf cf(%)
## [0,1.683) 205 0.10 10.25 205 10.25
## [1.683,3.367) 192 0.10 9.60 397 19.85
## [3.367,5.05) 154 0.08 7.70 551 27.55
## [5.05,6.733) 95 0.05 4.75 646 32.30
## [6.733,8.417) 208 0.10 10.40 854 42.70
## [8.417,10.1) 234 0.12 11.70 1088 54.40
## [10.1,11.78) 79 0.04 3.95 1167 58.35
## [11.78,13.47) 175 0.09 8.75 1342 67.10
## [13.47,15.15) 196 0.10 9.80 1538 76.90
## [15.15,16.83) 88 0.04 4.40 1626 81.30
## [16.83,18.52) 181 0.09 9.05 1807 90.35
## [18.52,20.2) 193 0.10 9.65 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: pc \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: pc "
shapiro.test(pc)
##
## Shapiro-Wilk normality test
##
## data: pc
## W = 0.95126, p-value < 2.2e-16
Res_crimen = EDA(data$pc)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean 3rd Qu
## 2000.000 0.000 0.000 5.000 9.916 10.000 9.907 15.000
## Max Stdev Var SE Mean I.Q.R. Range Kurtosis Skewness
## 20.000 6.064 36.776 0.136 10.000 20.000 -1.173 0.017
## SW p-val
## 0.000
cat(
"[1] \"Medidas descriptivas: MegapĂxeles de la cĂĄmara frontal \"\n"
)
## [1] "Medidas descriptivas: MegapĂxeles de la cĂĄmara frontal "
descr(data$fc)
## Descriptive Statistics
## data$fc
## N: 2000
##
## fc
## ----------------- ---------
## Mean 4.31
## Std.Dev 4.34
## Min 0.00
## Q1 1.00
## Median 3.00
## Q3 7.00
## Max 19.00
## MAD 4.45
## IQR 6.00
## CV 1.01
## Skewness 1.02
## SE.Skewness 0.05
## Kurtosis 0.27
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: fc \"\n"
)
## [2] "Tabla de frecuencia: fc "
tablas_freq_fc= fdt(data$fc)
tablas_freq_fc
## Class limits f rf rf(%) cf cf(%)
## [0,1.5992) 719 0.36 35.95 719 35.95
## [1.5992,3.1983) 359 0.18 17.95 1078 53.90
## [3.1983,4.7975) 133 0.07 6.65 1211 60.55
## [4.7975,6.3967) 251 0.13 12.55 1462 73.10
## [6.3967,7.9958) 100 0.05 5.00 1562 78.10
## [7.9958,9.595) 155 0.08 7.75 1717 85.85
## [9.595,11.194) 113 0.06 5.65 1830 91.50
## [11.194,12.793) 45 0.02 2.25 1875 93.75
## [12.793,14.393) 60 0.03 3.00 1935 96.75
## [14.393,15.992) 23 0.01 1.15 1958 97.90
## [15.992,17.591) 30 0.01 1.50 1988 99.40
## [17.591,19.19) 12 0.01 0.60 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: fc \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: fc "
shapiro.test(fc)
##
## Shapiro-Wilk normality test
##
## data: fc
## W = 0.87374, p-value < 2.2e-16
Res_crimen = EDA(data$fc)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean 3rd Qu
## 2000.000 0.000 0.000 1.000 4.310 3.000 3.942 7.000
## Max Stdev Var SE Mean I.Q.R. Range Kurtosis Skewness
## 19.000 4.341 18.848 0.097 6.000 19.000 0.270 1.018
## SW p-val
## 0.000
cat(
"[1] \"Medidas descriptivas: ResoluciĂłn de pĂxeles Altura \"\n"
)
## [1] "Medidas descriptivas: ResoluciĂłn de pĂxeles Altura "
descr(data$px_height)
## Descriptive Statistics
## data$px_height
## N: 2000
##
## px_height
## ----------------- -----------
## Mean 645.11
## Std.Dev 443.78
## Min 0.00
## Q1 282.50
## Median 564.00
## Q3 947.50
## Max 1960.00
## MAD 471.47
## IQR 664.50
## CV 0.69
## Skewness 0.67
## SE.Skewness 0.05
## Kurtosis -0.32
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: ResoluciĂłn de pĂxeles Altura \"\n"
)
## [2] "Tabla de frecuencia: ResoluciĂłn de pĂxeles Altura "
tablas_freq_px_height= fdt(data$px_height)
tablas_freq_px_height
## Class limits f rf rf(%) cf cf(%)
## [0,164.967) 269 0.13 13.45 269 13.45
## [164.967,329.933) 320 0.16 16.00 589 29.45
## [329.933,494.9) 300 0.15 15.00 889 44.45
## [494.9,659.867) 262 0.13 13.10 1151 57.55
## [659.867,824.833) 217 0.11 10.85 1368 68.40
## [824.833,989.8) 183 0.09 9.15 1551 77.55
## [989.8,1154.77) 143 0.07 7.15 1694 84.70
## [1154.77,1319.73) 128 0.06 6.40 1822 91.10
## [1319.73,1484.7) 77 0.04 3.85 1899 94.95
## [1484.7,1649.67) 50 0.03 2.50 1949 97.45
## [1649.67,1814.63) 35 0.02 1.75 1984 99.20
## [1814.63,1979.6) 16 0.01 0.80 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: ResoluciĂłn de pĂxeles Altura \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: ResoluciĂłn de pĂxeles Altura "
shapiro.test(px_height)
##
## Shapiro-Wilk normality test
##
## data: px_height
## W = 0.9469, p-value < 2.2e-16
Res_crimen = EDA(data$px_height)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean
## 2000.000 0.000 0.000 282.750 645.108 564.000 621.618
## 3rd Qu Max Stdev Var SE Mean I.Q.R. Range
## 947.250 1960.000 443.781 196941.408 9.923 664.500 1960.000
## Kurtosis Skewness SW p-val
## -0.321 0.665 0.000
cat(
"[1] \"Medidas descriptivas: Ancho de resoluciĂłn de pĂxeles \"\n"
)
## [1] "Medidas descriptivas: Ancho de resoluciĂłn de pĂxeles "
descr(data$px_width)
## Descriptive Statistics
## data$px_width
## N: 2000
##
## px_width
## ----------------- ----------
## Mean 1251.52
## Std.Dev 432.20
## Min 500.00
## Q1 874.50
## Median 1247.00
## Q3 1633.00
## Max 1998.00
## MAD 557.46
## IQR 758.25
## CV 0.35
## Skewness 0.01
## SE.Skewness 0.05
## Kurtosis -1.19
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: ResoluciĂłn de pĂxeles Altura \"\n"
)
## [2] "Tabla de frecuencia: ResoluciĂłn de pĂxeles Altura "
tablas_freq_px_width= fdt(data$px_width)
tablas_freq_px_width
## Class limits f rf rf(%) cf cf(%)
## [495,621.915) 147 0.07 7.35 147 7.35
## [621.915,748.83) 178 0.09 8.90 325 16.25
## [748.83,875.745) 176 0.09 8.80 501 25.05
## [875.745,1002.66) 158 0.08 7.90 659 32.95
## [1002.66,1129.575) 170 0.09 8.50 829 41.45
## [1129.575,1256.49) 186 0.09 9.30 1015 50.75
## [1256.49,1383.405) 171 0.09 8.55 1186 59.30
## [1383.405,1510.32) 175 0.09 8.75 1361 68.05
## [1510.32,1637.235) 146 0.07 7.30 1507 75.35
## [1637.235,1764.15) 177 0.09 8.85 1684 84.20
## [1764.15,1891.065) 156 0.08 7.80 1840 92.00
## [1891.065,2017.98) 160 0.08 8.00 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: ResoluciĂłn de pĂxeles Altura \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: ResoluciĂłn de pĂxeles Altura "
shapiro.test(px_width)
##
## Shapiro-Wilk normality test
##
## data: px_width
## W = 0.95604, p-value < 2.2e-16
Res_crimen = EDA(data$px_width)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean
## 2000.000 0.000 500.000 874.750 1251.515 1247.000 1251.566
## 3rd Qu Max Stdev Var SE Mean I.Q.R. Range
## 1633.000 1998.000 432.199 186796.362 9.664 758.250 1498.000
## Kurtosis Skewness SW p-val
## -1.188 0.015 0.000
cat(
"[1] \"Medidas descriptivas: Tiempo de la baterĂa \"\n"
)
## [1] "Medidas descriptivas: Tiempo de la baterĂa "
descr(data$talk_time)
## Descriptive Statistics
## data$talk_time
## N: 2000
##
## talk_time
## ----------------- -----------
## Mean 11.01
## Std.Dev 5.46
## Min 2.00
## Q1 6.00
## Median 11.00
## Q3 16.00
## Max 20.00
## MAD 7.41
## IQR 10.00
## CV 0.50
## Skewness 0.01
## SE.Skewness 0.05
## Kurtosis -1.22
## N.Valid 2000.00
## N 2000.00
## Pct.Valid 100.00
cat(
"[2] \"Tabla de frecuencia: ResoluciĂłn de pĂxeles Altura \"\n"
)
## [2] "Tabla de frecuencia: ResoluciĂłn de pĂxeles Altura "
tablas_freq_talk_time= fdt(data$talk_time)
tablas_freq_talk_time
## Class limits f rf rf(%) cf cf(%)
## [1.98,3.498) 193 0.10 9.65 193 9.65
## [3.498,5.017) 216 0.11 10.80 409 20.45
## [5.017,6.535) 111 0.06 5.55 520 26.00
## [6.535,8.053) 228 0.11 11.40 748 37.40
## [8.053,9.572) 100 0.05 5.00 848 42.40
## [9.572,11.09) 208 0.10 10.40 1056 52.80
## [11.09,12.61) 99 0.05 4.95 1155 57.75
## [12.61,14.13) 201 0.10 10.05 1356 67.80
## [14.13,15.64) 115 0.06 5.75 1471 73.55
## [15.64,17.16) 214 0.11 10.70 1685 84.25
## [17.16,18.68) 100 0.05 5.00 1785 89.25
## [18.68,20.2) 215 0.11 10.75 2000 100.00
cat(
"[3] \"Test de normalidad de Shapiro-Wilk: ResoluciĂłn de pĂxeles Altura \"\n"
)
## [3] "Test de normalidad de Shapiro-Wilk: ResoluciĂłn de pĂxeles Altura "
shapiro.test(talk_time)
##
## Shapiro-Wilk normality test
##
## data: talk_time
## W = 0.94821, p-value < 2.2e-16
Res_crimen = EDA(data$talk_time)
Res_crimen
## Size (n) Missing Minimum 1st Qu Mean Median TrMean 3rd Qu
## 2000.000 0.000 2.000 6.000 11.011 11.000 11.012 16.000
## Max Stdev Var SE Mean I.Q.R. Range Kurtosis Skewness
## 20.000 5.464 29.855 0.122 10.000 18.000 -1.220 0.009
## SW p-val
## 0.000
AnĂĄlisis Bivariado
# Tabla cruzada entre dual_sim y wifi
cat(
"[1] \"Tabla cruzada entre dual_sim y wifi \"\n"
)
## [1] "Tabla cruzada entre dual_sim y wifi "
tabla_dual_wifi <- table(data$dual_sim, data$wifi)
print(tabla_dual_wifi)
##
## 0 1
## 0 495 486
## 1 491 528
cat(
"[1] \"Tabla cruzada entre dual_sim y wifi en proporciones \"\n"
)
## [1] "Tabla cruzada entre dual_sim y wifi en proporciones "
# Convertir en proporciones
prop.table(tabla_dual_wifi, margin = 1) # Proporciones por fila
##
## 0 1
## 0 0.5045872 0.4954128
## 1 0.4818449 0.5181551
ggplot(data, aes(x = dual_sim, fill = wifi)) +
geom_bar(position = "dodge") +
theme_minimal() +
labs(title = "Dual SIM vs WiFi", x = "Dual SIM", fill = "WiFi")
ggplot(data, aes(x = three_g, y = battery_power, fill = three_g)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Battery Power vs 3G", x = "3G Disponible", y = "Battery Power (mAh)")
ram segĂșn four_g (Celulares con y sin 4G)
ggplot(data, aes(x = four_g, y = ram, fill = four_g)) +
geom_boxplot() +
theme_minimal() +
labs(title = "RAM vs 4G", x = "4G Disponible", y = "RAM (MB)")