library(readr)
library(estadistica)
library(ggplot2)
Sleep <- read_csv("Sleep_health_and_lifestyle_dataset.csv")
## Rows: 374 Columns: 13
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Gender, Occupation, BMI Category, Blood Pressure, Sleep Disorder
## dbl (8): Person ID, Age, Sleep Duration, Quality of Sleep, Physical Activity...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Sleep=as.data.frame(unclass(Sleep),
stringsAsFactors = TRUE)
str(Sleep)
## 'data.frame': 374 obs. of 13 variables:
## $ Person.ID : num 1 2 3 4 5 6 7 8 9 10 ...
## $ Gender : Factor w/ 2 levels "Female","Male": 2 2 2 2 2 2 2 2 2 2 ...
## $ Age : num 27 28 28 28 28 28 29 29 29 29 ...
## $ Occupation : Factor w/ 11 levels "Accountant","Doctor",..: 10 2 2 7 7 10 11 2 2 2 ...
## $ Sleep.Duration : num 6.1 6.2 6.2 5.9 5.9 5.9 6.3 7.8 7.8 7.8 ...
## $ Quality.of.Sleep : num 6 6 6 4 4 4 6 7 7 7 ...
## $ Physical.Activity.Level: num 42 60 60 30 30 30 40 75 75 75 ...
## $ Stress.Level : num 6 8 8 8 8 8 7 6 6 6 ...
## $ BMI.Category : Factor w/ 4 levels "Normal","Normal Weight",..: 4 1 1 3 3 3 3 1 1 1 ...
## $ Blood.Pressure : Factor w/ 25 levels "115/75","115/78",..: 12 10 10 23 23 23 23 7 7 7 ...
## $ Heart.Rate : num 77 75 75 85 85 85 82 70 70 70 ...
## $ Daily.Steps : num 4200 10000 10000 3000 3000 3000 3500 8000 8000 8000 ...
## $ Sleep.Disorder : Factor w/ 3 levels "Insomnia","None",..: 2 2 2 3 3 1 1 2 2 2 ...
Sleep$Quality.of.Sleep=as.factor(Sleep$Quality.of.Sleep)
summary(Sleep)
## Person.ID Gender Age Occupation Sleep.Duration
## Min. : 1.00 Female:185 Min. :27.00 Nurse :73 Min. :5.800
## 1st Qu.: 94.25 Male :189 1st Qu.:35.25 Doctor :71 1st Qu.:6.400
## Median :187.50 Median :43.00 Engineer :63 Median :7.200
## Mean :187.50 Mean :42.18 Lawyer :47 Mean :7.132
## 3rd Qu.:280.75 3rd Qu.:50.00 Teacher :40 3rd Qu.:7.800
## Max. :374.00 Max. :59.00 Accountant:37 Max. :8.500
## (Other) :43
## Quality.of.Sleep Physical.Activity.Level Stress.Level BMI.Category
## 4: 5 Min. :30.00 Min. :3.000 Normal :195
## 5: 7 1st Qu.:45.00 1st Qu.:4.000 Normal Weight: 21
## 6:105 Median :60.00 Median :5.000 Obese : 10
## 7: 77 Mean :59.17 Mean :5.385 Overweight :148
## 8:109 3rd Qu.:75.00 3rd Qu.:7.000
## 9: 71 Max. :90.00 Max. :8.000
##
## Blood.Pressure Heart.Rate Daily.Steps Sleep.Disorder
## 130/85 :99 Min. :65.00 Min. : 3000 Insomnia : 77
## 125/80 :65 1st Qu.:68.00 1st Qu.: 5600 None :219
## 140/95 :65 Median :70.00 Median : 7000 Sleep Apnea: 78
## 120/80 :45 Mean :70.17 Mean : 6817
## 115/75 :32 3rd Qu.:72.00 3rd Qu.: 8000
## 135/90 :27 Max. :86.00 Max. :10000
## (Other):41
###
# La variable Sleep Disorder contiene valores faltantes. En esta base se asumirán
# como ausencia de trastorno del sueño.
Sleep$Sleep.Disorder[is.na(Sleep$Sleep.Disorder)] <- "No Disorder"
## Warning in `[<-.factor`(`*tmp*`, is.na(Sleep$Sleep.Disorder), value =
## structure(c(2L, : invalid factor level, NA generated
Sleep$Sleep.Disorder <- as.factor(Sleep$Sleep.Disorder)
# Separación de la presión arterial en sistólica y diastólica
bp_split <- strsplit(as.character(Sleep$Blood.Pressure), "/")
Sleep$Systolic <- as.numeric(sapply(bp_split, `[`, 1))
Sleep$Diastolic <- as.numeric(sapply(bp_split, `[`, 2))
# Variable binaria auxiliar para modelado logÃstico
# 0 = No Disorder, 1 = presenta algún trastorno
Sleep$Sleep.Disorder.Binary <- ifelse(Sleep$Sleep.Disorder == "None", 0, 1)
Sleep$Sleep.Disorder.Binary <- as.factor(Sleep$Sleep.Disorder.Binary)
#Resumen estadÃstico
n=nrow(Sleep)
max(Sleep$Age)
## [1] 59
min(Sleep$Age)
## [1] 27
round(mean(Sleep$Age),1)
## [1] 42.2
median(Sleep$Age)
## [1] 43
as.numeric(moda(Sleep$Age))
## [1] 43
diff(range(Sleep$Age))
## [1] 32
var(Sleep$Age)
## [1] 75.22324
sd(Sleep$Age)
## [1] 8.673133
coeficiente.variacion(Sleep$Age)*100
## variable
## 1 20.53
quantile(Sleep$Age)
## 0% 25% 50% 75% 100%
## 27.00 35.25 43.00 50.00 59.00
boxplot.stats(Sleep$Age)
## $stats
## [1] 27 35 43 50 59
##
## $n
## [1] 374
##
## $conf
## [1] 41.7745 44.2255
##
## $out
## numeric(0)
ggplot(Sleep, aes(x=Age))+
geom_boxplot()+
labs(title = "Diagrama de boxplot de las edades",
x="Edades de los pacientes")

ggplot(Sleep, aes(x=Age))+
geom_boxplot(fill="red")+
labs(title = "Diagrama de boxplot de las edades",
x="Edades de los pacientes")

ggplot(Sleep, aes(x=Age,
fill=Gender))+
geom_boxplot()+
labs(title = "Diagrama de boxplot de las edades",
x="Edades de los pacientes")

names(Sleep)
## [1] "Person.ID" "Gender"
## [3] "Age" "Occupation"
## [5] "Sleep.Duration" "Quality.of.Sleep"
## [7] "Physical.Activity.Level" "Stress.Level"
## [9] "BMI.Category" "Blood.Pressure"
## [11] "Heart.Rate" "Daily.Steps"
## [13] "Sleep.Disorder" "Systolic"
## [15] "Diastolic" "Sleep.Disorder.Binary"
ggplot(Sleep, aes(x=Age,
fill=Sleep.Disorder.Binary))+
geom_boxplot()+
labs(title = "Diagrama de boxplot de las edades",
x="Edades de los pacientes")

#Histograma
ggplot(Sleep, aes(x=Age))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value `binwidth`.

#Histograma
Clase=1+3.33*log10(n)
Clase
## [1] 9.567662
ggplot(Sleep, aes(x=Age))+
geom_histogram(bins=10,
color="white",
fill="red")+
labs(title="Histograma de edad de los pacientes",
x="Edad",
y="Frecuencia Absoluta")

ggplot(Sleep, aes(x=Age,
fill=Sleep.Disorder.Binary))+
geom_histogram(bins=10,
color="white")+
labs(title="Histograma de edad de los pacientes",
x="Edad",
y="Frecuencia Absoluta")

ggplot(Sleep, aes(x=Age,
fill=Sleep.Disorder.Binary))+
geom_histogram(bins=10,
color="white")+
labs(title="Histograma de edad de los pacientes",
x="Edad",
y="Frecuencia Absoluta")+
geom_vline(aes(xintercept = 42.2), color="blue")+
geom_vline(aes(xintercept = 43), color="yellow")+
geom_vline(aes(xintercept = 43), color="yellow")

ggplot(Sleep, aes(x=Age,
fill=BMI.Category))+
geom_density(alpha=0.3)+
labs(title="Densidad de edad de los pacientes",
x="Edad",
y="Densidad")

ggplot(Sleep, aes(x=Age,
fill=BMI.Category))+
geom_density(alpha=0.2)+
labs(title="Densidad de edad de los pacientes",
x="Edad",
y="Densidad")+
scale_fill_manual(values = c("red", "blue", "yellow", "cyan"))
