install.packages("openxlsx")
install.packages("caret")
# Loading the required libraries
library(openxlsx)
library(caret)
data<- read.xlsx("C:\\Users\\Study\\Desktop\\r files\\DekutR session\\R assignment\\insurance_data.xlsx", sheet=1)
# Checking data
head(data)
# Creating a multiple linear regression model
model <- lm(charges ~ age + children + sex + bmi +smoker + region, data )
summary(model)

Call:
lm(formula = charges ~ age + children + sex + bmi + smoker + 
    region, data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-11304.9  -2848.1   -982.1   1393.9  29992.8 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)     -11938.5      987.8 -12.086  < 2e-16 ***
age                256.9       11.9  21.587  < 2e-16 ***
children           475.5      137.8   3.451 0.000577 ***
sexmale           -131.3      332.9  -0.394 0.693348    
bmi                339.2       28.6  11.860  < 2e-16 ***
smokeryes        23848.5      413.1  57.723  < 2e-16 ***
regionnorthwest   -353.0      476.3  -0.741 0.458769    
regionsoutheast  -1035.0      478.7  -2.162 0.030782 *  
regionsouthwest   -960.0      477.9  -2.009 0.044765 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 6062 on 1329 degrees of freedom
Multiple R-squared:  0.7509,    Adjusted R-squared:  0.7494 
F-statistic: 500.8 on 8 and 1329 DF,  p-value: < 2.2e-16
# Converting a column in the data into a factor
data$sex <- as.factor(data$sex)
data$region <- as.factor(data$region)
data$smoker <- as.factor(data$smoker)
print(data)
levels(data$sex)
[1] "female" "male"  
levels(data$region)
[1] "northeast" "northwest" "southeast" "southwest"
levels(data$smoker)
[1] "no"  "yes"
# Creating new data for clients aged 20 and 45
new_data <- data.frame(age = c(20,45),
                       children = c(0,0),
                       sex = factor("male", "female"),levels = levels(data$sex),
                       bmi = c(26.22, 39.6), 
                       smoker = factor("yes", "no"), levels = levels(data$smoker),
                       region = factor("norththeast", "northwest"), levels(data$region))
# making predictions
predictions <- predict(model, new_data)
head(predictions)
 1  2  3  4 
NA NA NA NA 
LS0tDQp0aXRsZTogIlJlYWRpbmcgZXhjZWwgaW50byBSIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJvcGVueGxzeCIpDQppbnN0YWxsLnBhY2thZ2VzKCJjYXJldCIpDQpgYGANCg0KDQpgYGB7cn0NCiMgTG9hZGluZyB0aGUgcmVxdWlyZWQgbGlicmFyaWVzDQpsaWJyYXJ5KG9wZW54bHN4KQ0KbGlicmFyeShjYXJldCkNCmBgYA0KDQoNCmBgYHtyfQ0KZGF0YTwtIHJlYWQueGxzeCgiQzpcXFVzZXJzXFxTdHVkeVxcRGVza3RvcFxcciBmaWxlc1xcRGVrdXRSIHNlc3Npb25cXFIgYXNzaWdubWVudFxcaW5zdXJhbmNlX2RhdGEueGxzeCIsIHNoZWV0PTEpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KIyBDaGVja2luZyBkYXRhDQpoZWFkKGRhdGEpDQpgYGANCg0KDQpgYGB7cn0NCiMgQ3JlYXRpbmcgYSBtdWx0aXBsZSBsaW5lYXIgcmVncmVzc2lvbiBtb2RlbA0KbW9kZWwgPC0gbG0oY2hhcmdlcyB+IGFnZSArIGNoaWxkcmVuICsgc2V4ICsgYm1pICtzbW9rZXIgKyByZWdpb24sIGRhdGEgKSAgDQpzdW1tYXJ5KG1vZGVsKSAgIyBWaWV3aW5nIHRoZSBzdW1tYXJ5IG9mIHRoZSBtb2RlbA0KYGBgDQoNCmBgYHtyfQ0KIyBDb252ZXJ0aW5nIGEgY29sdW1uIGluIHRoZSBkYXRhIGludG8gYSBmYWN0b3INCmRhdGEkc2V4IDwtIGFzLmZhY3RvcihkYXRhJHNleCkNCmRhdGEkcmVnaW9uIDwtIGFzLmZhY3RvcihkYXRhJHJlZ2lvbikNCmRhdGEkc21va2VyIDwtIGFzLmZhY3RvcihkYXRhJHNtb2tlcikNCnByaW50KGRhdGEpDQpsZXZlbHMoZGF0YSRzZXgpDQpsZXZlbHMoZGF0YSRyZWdpb24pDQpsZXZlbHMoZGF0YSRzbW9rZXIpDQpgYGANCg0KYGBge3J9DQojIENyZWF0aW5nIG5ldyBkYXRhIGZvciBjbGllbnRzIGFnZWQgMjAgYW5kIDQ1DQpuZXdfZGF0YSA8LSBkYXRhLmZyYW1lKGFnZSA9IGMoMjAsNDUpLA0KICAgICAgICAgICAgICAgICAgICAgICBjaGlsZHJlbiA9IGMoMCwwKSwNCiAgICAgICAgICAgICAgICAgICAgICAgc2V4ID0gZmFjdG9yKCJtYWxlIiwgImZlbWFsZSIpLGxldmVscyA9IGxldmVscyhkYXRhJHNleCksDQogICAgICAgICAgICAgICAgICAgICAgIGJtaSA9IGMoMjYuMjIsIDM5LjYpLCANCiAgICAgICAgICAgICAgICAgICAgICAgc21va2VyID0gZmFjdG9yKCJ5ZXMiLCAibm8iKSwgbGV2ZWxzID0gbGV2ZWxzKGRhdGEkc21va2VyKSwNCiAgICAgICAgICAgICAgICAgICAgICAgcmVnaW9uID0gZmFjdG9yKCJub3J0aHRoZWFzdCIsICJub3J0aHdlc3QiKSwgbGV2ZWxzKGRhdGEkcmVnaW9uKSkNCg0KYGBgDQoNCg0KYGBge3J9DQojIG1ha2luZyBwcmVkaWN0aW9ucw0KcHJlZGljdGlvbnMgPC0gcHJlZGljdChtb2RlbCwgbmV3X2RhdGEpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KaGVhZChwcmVkaWN0aW9ucykNCmBgYA0KDQo=