To get a report for the insurance dataset with response variable bmi, create_report(dataname)
#create_report(dt, y = "bmi")
data_list <- list(dt)
plot_str(data_list)
plot_intro(dt)
plot_missing(dt)
plot_histogram(dt)
bmi with all other continuous featuresplot_scatterplot(split_columns(dt)$continuous, by = "bmi")
plot_qq(dt)
plot_qq(dt, by = "bmi")
plot_bar(dt,maxcat = 20, parallel = TRUE)
plot_boxplot(dt, by= 'sex', ncol = 2)
plot_boxplot(dt, by= 'smoker', ncol = 2)
plot_boxplot(dt, by= 'region', ncol = 2)
plot_correlation(dt)
#Dummify diamonds dataset
head(dummify(dt))
## age bmi children charges sex_female sex_male smoker_no smoker_yes
## 1 19 27.900 0 16884.924 1 0 0 1
## 2 18 33.770 1 1725.552 0 1 1 0
## 3 28 33.000 3 4449.462 0 1 1 0
## 4 33 22.705 0 21984.471 0 1 1 0
## 5 32 28.880 0 3866.855 0 1 1 0
## 6 31 25.740 0 3756.622 1 0 1 0
## region_northeast region_northwest region_southeast region_southwest
## 1 0 0 0 1
## 2 0 0 1 0
## 3 0 0 1 0
## 4 0 1 0 0
## 5 0 1 0 0
## 6 0 0 1 0
#Update columns
head(update_columns(dt, c("sex", "smoker","region"), as.factor))
## age sex bmi children smoker region charges
## 1 19 female 27.900 0 yes southwest 16884.924
## 2 18 male 33.770 1 no southeast 1725.552
## 3 28 male 33.000 3 no southeast 4449.462
## 4 33 male 22.705 0 no northwest 21984.471
## 5 32 male 28.880 0 no northwest 3866.855
## 6 31 female 25.740 0 no southeast 3756.622
## Drop columns
head(drop_columns(dt, 1:2))
## bmi children smoker region charges
## 1 27.900 0 yes southwest 16884.924
## 2 33.770 1 no southeast 1725.552
## 3 33.000 3 no southeast 4449.462
## 4 22.705 0 no northwest 21984.471
## 5 28.880 0 no northwest 3866.855
## 6 25.740 0 no southeast 3756.622