insurance <- read_xlsx("insurance.xlsx")
str(insurance)
## tibble [1,338 × 7] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:1338] 19 18 28 33 32 31 46 37 37 60 ...
## $ sex : chr [1:1338] "female" "male" "male" "male" ...
## $ bmi : num [1:1338] 27.9 33.8 33 22.7 28.9 ...
## $ children: num [1:1338] 0 1 3 0 0 0 1 3 2 0 ...
## $ smoker : chr [1:1338] "yes" "no" "no" "no" ...
## $ region : chr [1:1338] "southwest" "southeast" "southeast" "northwest" ...
## $ charges : num [1:1338] 16885 1726 4449 21984 3867 ...
insurance$sex <- as.factor(insurance$sex)
insurance$smoker <- as.factor(insurance$smoker)
insurance$region <- as.factor(insurance$region)
ggplot(data = insurance, aes(x=children,y=charges)) +
geom_jitter(alpha = 0.5) + theme_bw() +
labs(x="# of children", y="Medical Costs", title = "# of children and Medical Costs")
ggplot(data = insurance, aes(x=region,y=charges)) +
geom_jitter(alpha = 0.5) + theme_bw() +
labs(x="# of children", y="Medical Costs", title = "Region and Medical Costs")
ggplot(data = insurance, aes(x=bmi,y=charges)) +
geom_point() + theme_bw() +
labs(x="BMI", y="Medical Costs", title = "BMI and Medical Costs")
ggplot(data = insurance, aes(x=age,y=bmi)) +
geom_point() + theme_bw() +
labs(x="Age", y="BMI", title = "BMI and Age")
ggplot(data = insurance, aes(x=bmi,y=charges)) +
geom_point() + facet_wrap(~ sex) + theme_bw() +
labs(x="BMI", y="Medical Costs", title = "BMI and Medical Costs by gender")
#scale_fill_viridis_d(option = "D")
ggplot(data = insurance, aes(x=bmi,y=charges)) +
geom_point() + facet_wrap(~ region) + theme_bw() +
labs(x="BMI", y="Medical Costs", title = "BMI and Medical Costs by regions")
ggplot(data = insurance, aes(x=smoker,y=charges)) +
geom_point() + theme_bw() +
labs(x="Smoker", y="Medical Costs", title = "Smoking and Medical Costs")
ggplot(data = insurance, aes(x=smoker,y=charges)) +
geom_point() + geom_boxplot(alpha = 0.5) + facet_wrap(~ sex) + theme_bw() +
labs(x="Smoker", y="Medical Costs", title = "Smoking and Medical Costs by gender")
ggplot(data = insurance, aes(x=smoker,y=charges)) +
geom_point() + geom_boxplot(alpha = 0.5) + facet_wrap(~ region) + theme_bw() +
labs(x="Smoker", y="Medical Costs", title = "Smoking and Medical Costs by region")
ggplot(data = insurance, aes(x=bmi,y=charges)) +
geom_point(aes(color=smoker)) + theme_bw() +
labs(x="BMI", y="Medical Costs", title = "BMI and Medical Costs by smoker")
To be continued…