Library dplyr

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr) #Para leer archivos .csv
medical_costs <- read_csv("E:\\2023_Capacitaciones\\BIG DATA UAO\\medical_costs.csv")
## Rows: 10000 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): Sex, Smoker, Region
## dbl (4): Age, BMI, Children, Medical Cost
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(medical_costs,5)
## # A tibble: 5 × 7
##     Age Sex      BMI Children Smoker Region    `Medical Cost`
##   <dbl> <chr>  <dbl>    <dbl> <chr>  <chr>              <dbl>
## 1    58 male    15.6        2 yes    northwest         17908.
## 2    24 male    29.8        0 yes    northeast         16313.
## 3    50 male    29          5 no     northwest          6819.
## 4    35 male    34          1 no     southeast          5248.
## 5    31 female  17.6        3 yes    southeast         17525.

Promedio de Costos medicos de hombre agrupados por region

medical_costs %>%
  select(Sex:Region, Costo='Medical Cost') %>%
  filter(Sex == 'male') %>%
  group_by(Region) %>%
  summarise(Avg_Costo = mean(Costo))
## # A tibble: 4 × 2
##   Region    Avg_Costo
##   <chr>         <dbl>
## 1 northeast    11959.
## 2 northwest    11737.
## 3 southeast    11910.
## 4 southwest    11910.

Promedio de Costos medicos de Mujeres agrupados por region

medical_costs %>%
  select(Sex:Region, Costo='Medical Cost') %>%
  filter(Sex == 'female') %>%
  group_by(Region) %>%
  summarise(Avg_Costo = mean(Costo))
## # A tibble: 4 × 2
##   Region    Avg_Costo
##   <chr>         <dbl>
## 1 northeast    11870.
## 2 northwest    11976.
## 3 southeast    12088.
## 4 southwest    11737.

Costos medicos segun edad

datos <- medical_costs %>%
  select(Age, Costo='Medical Cost') %>%
  group_by(Age) %>%
  summarise(Avg_Costo = mean(Costo))

plot(datos)

## Costos medicos segun edad en personas que fuman

datos2 <- medical_costs %>%
  select(Age:Smoker , Costo='Medical Cost') %>%
  filter(Smoker == 'yes') %>%
  group_by(Age) %>%
  summarise(Avg_Costo = mean(Costo))

plot(datos2)

## Costos medicos segun edad en personas que no fuman

datos3 <- medical_costs %>%
  select(Age:Smoker , Costo='Medical Cost') %>%
  filter(Smoker == 'no') %>%
  group_by(Age) %>%
  summarise(Avg_Costo = mean(Costo))

plot(datos3)

Costos medicos segun consumo de tabaco

datos4 <- medical_costs %>%
  select(Smoker, Costo='Medical Cost') %>%
  group_by(Smoker) %>%
  summarise(Avg_Costo = mean(Costo))

barplot(
  height = datos4$Avg_Costo,
  names.arg = datos4$Smoker,
  main = "Ingerencia de consumo de tabaco en costos medicos"
)