#First, I imported the insurance dataset in R
insurance <- read.csv("insurance.csv", stringsAsFactors = TRUE)
str(insurance)
'data.frame': 1338 obs. of 7 variables:
$ age : int 19 18 28 33 32 31 46 37 37 60 ...
$ sex : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
$ bmi : num 27.9 33.8 33 22.7 28.9 25.7 33.4 27.7 29.8 25.8 ...
$ children: int 0 1 3 0 0 0 1 3 2 0 ...
$ smoker : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
$ region : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
$ expenses: num 16885 1726 4449 21984 3867 ...
#I did the first stepts of activity 7, then used the MLR solution that we built during class
# summarize the charges variable
summary(insurance$expenses)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1122 4740 9382 13270 16640 63770
# histogram of insurance charges
hist(insurance$expenses)

# table of region
table(insurance$region)
northeast northwest southeast southwest
324 325 364 325
# exploring relationships among features: correlation matrix
cor(insurance[c("age", "bmi", "children", "expenses")])
age bmi children expenses
age 1.0000000 0.10934101 0.04246900 0.29900819
bmi 0.1093410 1.00000000 0.01264471 0.19857626
children 0.0424690 0.01264471 1.00000000 0.06799823
expenses 0.2990082 0.19857626 0.06799823 1.00000000
ins_model <- lm(expenses ~ age + children + bmi + sex + smoker + region,
data = insurance)
ins_model <- lm(expenses ~ ., data = insurance) # this is equivalent to above
# add a higher-order "age" term
insurance$age2 <- insurance$age^2
# add an indicator for BMI >= 30
insurance$bmi30 <- ifelse(insurance$bmi >= 30, 1, 0)
# create final model
ins_model2 <- lm(expenses ~ age + age2 + children + bmi + sex +
bmi30*smoker + region, data = insurance)
ins_model2 <- lm(expenses ~ age + age2 + children + bmi + sex +
bmi30*smoker + region, data = insurance)
# Predict for Case 1
case1 <- predict(ins_model2,
data.frame(age = 22, age2 = 22^2, children = 3,
bmi = 24, sex = "female", bmi30 = 0,
smoker = "no", region = "northwest"))
# Predict for Case 2
case2 <- predict(ins_model2,
data.frame(age = 22, age2 = 22^2, children = 1,
bmi = 27, sex = "male", bmi30 = 0,
smoker = "yes", region = "southeast"))
# Print results
cat("Case 1 Prediction:", case1, "\n")
Case 1 Prediction: 5858.241
cat("Case 2 Prediction:", case2, "\n")
Case 2 Prediction: 17219.31
LS0tCnRpdGxlOiAiSW4tY2xhc3MgYWN0aXZpdHkgIzgiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCgpgYGB7cn0KI0ZpcnN0LCBJIGltcG9ydGVkIHRoZSBpbnN1cmFuY2UgZGF0YXNldCBpbiBSCmluc3VyYW5jZSA8LSByZWFkLmNzdigiaW5zdXJhbmNlLmNzdiIsIHN0cmluZ3NBc0ZhY3RvcnMgPSBUUlVFKQpzdHIoaW5zdXJhbmNlKQoKI0kgZGlkIHRoZSBmaXJzdCBzdGVwdHMgb2YgYWN0aXZpdHkgNywgdGhlbiB1c2VkIHRoZSBNTFIgc29sdXRpb24gdGhhdCB3ZSBidWlsdCBkdXJpbmcgY2xhc3MgCgojIHN1bW1hcml6ZSB0aGUgY2hhcmdlcyB2YXJpYWJsZQpzdW1tYXJ5KGluc3VyYW5jZSRleHBlbnNlcykKCiMgaGlzdG9ncmFtIG9mIGluc3VyYW5jZSBjaGFyZ2VzCmhpc3QoaW5zdXJhbmNlJGV4cGVuc2VzKQoKIyB0YWJsZSBvZiByZWdpb24KdGFibGUoaW5zdXJhbmNlJHJlZ2lvbikKCiMgZXhwbG9yaW5nIHJlbGF0aW9uc2hpcHMgYW1vbmcgZmVhdHVyZXM6IGNvcnJlbGF0aW9uIG1hdHJpeApjb3IoaW5zdXJhbmNlW2MoImFnZSIsICJibWkiLCAiY2hpbGRyZW4iLCAiZXhwZW5zZXMiKV0pCgppbnNfbW9kZWwgPC0gbG0oZXhwZW5zZXMgfiBhZ2UgKyBjaGlsZHJlbiArIGJtaSArIHNleCArIHNtb2tlciArIHJlZ2lvbiwKICAgICAgICAgICAgICAgIGRhdGEgPSBpbnN1cmFuY2UpCmluc19tb2RlbCA8LSBsbShleHBlbnNlcyB+IC4sIGRhdGEgPSBpbnN1cmFuY2UpICMgdGhpcyBpcyBlcXVpdmFsZW50IHRvIGFib3ZlCgojIGFkZCBhIGhpZ2hlci1vcmRlciAiYWdlIiB0ZXJtCmluc3VyYW5jZSRhZ2UyIDwtIGluc3VyYW5jZSRhZ2VeMgojIGFkZCBhbiBpbmRpY2F0b3IgZm9yIEJNSSA+PSAzMAppbnN1cmFuY2UkYm1pMzAgPC0gaWZlbHNlKGluc3VyYW5jZSRibWkgPj0gMzAsIDEsIDApCiMgY3JlYXRlIGZpbmFsIG1vZGVsCmluc19tb2RlbDIgPC0gbG0oZXhwZW5zZXMgfiBhZ2UgKyBhZ2UyICsgY2hpbGRyZW4gKyBibWkgKyBzZXggKwogICAgICAgICAgICAgICAgICAgYm1pMzAqc21va2VyICsgcmVnaW9uLCBkYXRhID0gaW5zdXJhbmNlKQoKaW5zX21vZGVsMiA8LSBsbShleHBlbnNlcyB+IGFnZSArIGFnZTIgKyBjaGlsZHJlbiArIGJtaSArIHNleCArCiAgICAgICAgICAgICAgICAgICBibWkzMCpzbW9rZXIgKyByZWdpb24sIGRhdGEgPSBpbnN1cmFuY2UpCgojIFByZWRpY3QgZm9yIENhc2UgMQpjYXNlMSA8LSBwcmVkaWN0KGluc19tb2RlbDIsCiAgICAgICAgZGF0YS5mcmFtZShhZ2UgPSAyMiwgYWdlMiA9IDIyXjIsIGNoaWxkcmVuID0gMywKICAgICAgICAgICAgICAgICAgIGJtaSA9IDI0LCBzZXggPSAiZmVtYWxlIiwgYm1pMzAgPSAwLAogICAgICAgICAgICAgICAgICAgc21va2VyID0gIm5vIiwgcmVnaW9uID0gIm5vcnRod2VzdCIpKQoKIyBQcmVkaWN0IGZvciBDYXNlIDIKY2FzZTIgPC0gcHJlZGljdChpbnNfbW9kZWwyLAogICAgICAgIGRhdGEuZnJhbWUoYWdlID0gMjIsIGFnZTIgPSAyMl4yLCBjaGlsZHJlbiA9IDEsCiAgICAgICAgICAgICAgICAgICBibWkgPSAyNywgc2V4ID0gIm1hbGUiLCBibWkzMCA9IDAsCiAgICAgICAgICAgICAgICAgICBzbW9rZXIgPSAieWVzIiwgcmVnaW9uID0gInNvdXRoZWFzdCIpKQoKIyBQcmludCByZXN1bHRzCmNhdCgiQ2FzZSAxIFByZWRpY3Rpb246IiwgY2FzZTEsICJcbiIpCmNhdCgiQ2FzZSAyIFByZWRpY3Rpb246IiwgY2FzZTIsICJcbiIpCmBgYAoK