install.packages("openxlsx")
install.packages("caret")
# Loading the required libraries
library(openxlsx)
library(caret)
data<- read.xlsx("C:\\Users\\Study\\Desktop\\r files\\DekutR session\\R assignment\\insurance_data.xlsx", sheet=1)
# Checking data
head(data)
# Creating a multiple linear regression model
model <- lm(charges ~ age + children + sex + bmi +smoker + region, data )
summary(model)
Call:
lm(formula = charges ~ age + children + sex + bmi + smoker +
region, data = data)
Residuals:
Min 1Q Median 3Q Max
-11304.9 -2848.1 -982.1 1393.9 29992.8
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -11938.5 987.8 -12.086 < 2e-16 ***
age 256.9 11.9 21.587 < 2e-16 ***
children 475.5 137.8 3.451 0.000577 ***
sexmale -131.3 332.9 -0.394 0.693348
bmi 339.2 28.6 11.860 < 2e-16 ***
smokeryes 23848.5 413.1 57.723 < 2e-16 ***
regionnorthwest -353.0 476.3 -0.741 0.458769
regionsoutheast -1035.0 478.7 -2.162 0.030782 *
regionsouthwest -960.0 477.9 -2.009 0.044765 *
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 6062 on 1329 degrees of freedom
Multiple R-squared: 0.7509, Adjusted R-squared: 0.7494
F-statistic: 500.8 on 8 and 1329 DF, p-value: < 2.2e-16
# Converting a column in the data into a factor
data$sex <- as.factor(data$sex)
data$region <- as.factor(data$region)
data$smoker <- as.factor(data$smoker)
print(data)
levels(data$sex)
[1] "female" "male"
levels(data$region)
[1] "northeast" "northwest" "southeast" "southwest"
levels(data$smoker)
[1] "no" "yes"
# Creating new data for clients aged 20 and 45
new_data <- data.frame(age = c(20,45),
children = c(0,0),
sex = factor("male", "female"),levels = levels(data$sex),
bmi = c(26.22, 39.6),
smoker = factor("yes", "no"), levels = levels(data$smoker),
region = factor("norththeast", "northwest"), levels(data$region))
# making predictions
predictions <- predict(model, new_data)
head(predictions)
1 2 3 4
NA NA NA NA
LS0tDQp0aXRsZTogIlJlYWRpbmcgZXhjZWwgaW50byBSIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJvcGVueGxzeCIpDQppbnN0YWxsLnBhY2thZ2VzKCJjYXJldCIpDQpgYGANCg0KDQpgYGB7cn0NCiMgTG9hZGluZyB0aGUgcmVxdWlyZWQgbGlicmFyaWVzDQpsaWJyYXJ5KG9wZW54bHN4KQ0KbGlicmFyeShjYXJldCkNCmBgYA0KDQoNCmBgYHtyfQ0KZGF0YTwtIHJlYWQueGxzeCgiQzpcXFVzZXJzXFxTdHVkeVxcRGVza3RvcFxcciBmaWxlc1xcRGVrdXRSIHNlc3Npb25cXFIgYXNzaWdubWVudFxcaW5zdXJhbmNlX2RhdGEueGxzeCIsIHNoZWV0PTEpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KIyBDaGVja2luZyBkYXRhDQpoZWFkKGRhdGEpDQpgYGANCg0KDQpgYGB7cn0NCiMgQ3JlYXRpbmcgYSBtdWx0aXBsZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbA0KbW9kZWwgPC0gbG0oY2hhcmdlcyB+IGFnZSArIGNoaWxkcmVuICsgc2V4ICsgYm1pICtzbW9rZXIgKyByZWdpb24sIGRhdGEgKSAgDQpzdW1tYXJ5KG1vZGVsKSAgIyBWaWV3aW5nIHRoZSBzdW1tYXJ5IG9mIHRoZSBtb2RlbA0KYGBgDQoNCmBgYHtyfQ0KIyBDb252ZXJ0aW5nIGEgY29sdW1uIGluIHRoZSBkYXRhIGludG8gYSBmYWN0b3INCmRhdGEkc2V4IDwtIGFzLmZhY3RvcihkYXRhJHNleCkNCmRhdGEkcmVnaW9uIDwtIGFzLmZhY3RvcihkYXRhJHJlZ2lvbikNCmRhdGEkc21va2VyIDwtIGFzLmZhY3RvcihkYXRhJHNtb2tlcikNCnByaW50KGRhdGEpDQpsZXZlbHMoZGF0YSRzZXgpDQpsZXZlbHMoZGF0YSRyZWdpb24pDQpsZXZlbHMoZGF0YSRzbW9rZXIpDQpgYGANCg0KYGBge3J9DQojIENyZWF0aW5nIG5ldyBkYXRhIGZvciBjbGllbnRzIGFnZWQgMjAgYW5kIDQ1DQpuZXdfZGF0YSA8LSBkYXRhLmZyYW1lKGFnZSA9IGMoMjAsNDUpLA0KICAgICAgICAgICAgICAgICAgICAgICBjaGlsZHJlbiA9IGMoMCwwKSwNCiAgICAgICAgICAgICAgICAgICAgICAgc2V4ID0gZmFjdG9yKCJtYWxlIiwgImZlbWFsZSIpLGxldmVscyA9IGxldmVscyhkYXRhJHNleCksDQogICAgICAgICAgICAgICAgICAgICAgIGJtaSA9IGMoMjYuMjIsIDM5LjYpLCANCiAgICAgICAgICAgICAgICAgICAgICAgc21va2VyID0gZmFjdG9yKCJ5ZXMiLCAibm8iKSwgbGV2ZWxzID0gbGV2ZWxzKGRhdGEkc21va2VyKSwNCiAgICAgICAgICAgICAgICAgICAgICAgcmVnaW9uID0gZmFjdG9yKCJub3J0aHRoZWFzdCIsICJub3J0aHdlc3QiKSwgbGV2ZWxzKGRhdGEkcmVnaW9uKSkNCg0KYGBgDQoNCg0KYGBge3J9DQojIG1ha2luZyBwcmVkaWN0aW9ucw0KcHJlZGljdGlvbnMgPC0gcHJlZGljdChtb2RlbCwgbmV3X2RhdGEpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KaGVhZChwcmVkaWN0aW9ucykNCmBgYA0KDQo=