library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr) #Para leer archivos .csv
medical_costs <- read_csv("E:\\2023_Capacitaciones\\BIG DATA UAO\\medical_costs.csv")
## Rows: 10000 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Sex, Smoker, Region
## dbl (4): Age, BMI, Children, Medical Cost
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(medical_costs,5)
## # A tibble: 5 × 7
## Age Sex BMI Children Smoker Region `Medical Cost`
## <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl>
## 1 58 male 15.6 2 yes northwest 17908.
## 2 24 male 29.8 0 yes northeast 16313.
## 3 50 male 29 5 no northwest 6819.
## 4 35 male 34 1 no southeast 5248.
## 5 31 female 17.6 3 yes southeast 17525.
medical_costs %>%
select(Sex:Region, Costo='Medical Cost') %>%
filter(Sex == 'male') %>%
group_by(Region) %>%
summarise(Avg_Costo = mean(Costo))
## # A tibble: 4 × 2
## Region Avg_Costo
## <chr> <dbl>
## 1 northeast 11959.
## 2 northwest 11737.
## 3 southeast 11910.
## 4 southwest 11910.
medical_costs %>%
select(Sex:Region, Costo='Medical Cost') %>%
filter(Sex == 'female') %>%
group_by(Region) %>%
summarise(Avg_Costo = mean(Costo))
## # A tibble: 4 × 2
## Region Avg_Costo
## <chr> <dbl>
## 1 northeast 11870.
## 2 northwest 11976.
## 3 southeast 12088.
## 4 southwest 11737.
datos <- medical_costs %>%
select(Age, Costo='Medical Cost') %>%
group_by(Age) %>%
summarise(Avg_Costo = mean(Costo))
plot(datos)
## Costos medicos segun edad en personas que fuman
datos2 <- medical_costs %>%
select(Age:Smoker , Costo='Medical Cost') %>%
filter(Smoker == 'yes') %>%
group_by(Age) %>%
summarise(Avg_Costo = mean(Costo))
plot(datos2)
## Costos medicos segun edad en personas que no fuman
datos3 <- medical_costs %>%
select(Age:Smoker , Costo='Medical Cost') %>%
filter(Smoker == 'no') %>%
group_by(Age) %>%
summarise(Avg_Costo = mean(Costo))
plot(datos3)
datos4 <- medical_costs %>%
select(Smoker, Costo='Medical Cost') %>%
group_by(Smoker) %>%
summarise(Avg_Costo = mean(Costo))
barplot(
height = datos4$Avg_Costo,
names.arg = datos4$Smoker,
main = "Ingerencia de consumo de tabaco en costos medicos"
)