I found a interesting datase on kaggle Medical Cost Personal Datasets
library(tidyverse)## -- Attaching packages ---------------------------------- tidyverse 1.2.1 --
## <U+221A> ggplot2 2.2.1 <U+221A> purrr 0.2.4
## <U+221A> tibble 1.3.4 <U+221A> dplyr 0.7.4
## <U+221A> tidyr 0.7.2 <U+221A> stringr 1.3.0
## <U+221A> readr 1.1.1 <U+221A> forcats 0.3.0
## -- Conflicts ------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(gganimate)
library(data.table)##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
library(plotly)##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
insurance <- fread("https://s3-ap-southeast-2.amazonaws.com/koki25ando/insurance.csv", data.table = FALSE)
insurance$smoker <- as.factor(insurance$smoker)insurance %>%
ggplot(aes(x=smoker, y = charges, fill = smoker, colour = smoker)) +
geom_violin()insurance %>%
ggplot(aes(x= bmi, y= charges, colour = smoker)) + geom_point() +
geom_smooth(method="lm")These two visualizations show us there are huge differnces between smokers and non-smokers. So after next animation plot, im gonna focus on bmi variables of smokers.
insurance %>%
plot_ly(x = ~bmi, y = ~charges, color = ~smoker, size = ~children, frame = ~age,
type = 'scatter', mode = 'markers', showlegend = T)## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
insurance.smoker <- insurance %>% filter(smoker == "yes")
fit <- lm(charges ~ bmi, data = insurance.smoker)
fit##
## Call:
## lm(formula = charges ~ bmi, data = insurance.smoker)
##
## Coefficients:
## (Intercept) bmi
## -13187 1473
example 1) bmi:30
x = 30
charge.amount <- 1473*x + -13187
charge.amount## [1] 31003