This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
#upload the data
library(readxl)
ageandheight <- read_excel("AgeHeight.xlsx")
View(ageandheight)
#create the linear regression
lmHeight<-lm(height~age,data=ageandheight)
#understand the results
summary(lmHeight)
##
## Call:
## lm(formula = height ~ age, data = ageandheight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.27238 -0.24248 -0.02762 0.16014 0.47238
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.9283 0.5084 127.71 < 2e-16 ***
## age 0.6350 0.0214 29.66 4.43e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.256 on 10 degrees of freedom
## Multiple R-squared: 0.9888, Adjusted R-squared: 0.9876
## F-statistic: 880 on 1 and 10 DF, p-value: 4.428e-11
#Residuals are errors, (Intercept) here is 'a' in the assumed formula i.e. height= a+ b*age
#height= 64.92 + (0.635 * age)
#MULTIPLE LINEAR REGRESSION
#create a linear regression with two variables
lmHeight2<-lm(height~age+no_siblings, data=ageandheight)
#understand the results
summary(lmHeight2)
##
## Call:
## lm(formula = height ~ age + no_siblings, data = ageandheight)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.26297 -0.22462 -0.02021 0.16102 0.49752
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 64.90554 0.53526 121.260 8.96e-16 ***
## age 0.63751 0.02340 27.249 5.85e-10 ***
## no_siblings -0.01772 0.04735 -0.374 0.717
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2677 on 9 degrees of freedom
## Multiple R-squared: 0.9889, Adjusted R-squared: 0.9865
## F-statistic: 402.2 on 2 and 9 DF, p-value: 1.576e-09
#INSURANCE COMPANY'S BUSINESS MODEL
library(readr)
insuranceDS <- read.csv("~/insurance.csv", stringsAsFactors=TRUE)
View(insuranceDS)
summary(insuranceDS)
## age sex bmi children smoker
## Min. :18.00 female:662 Min. :15.96 Min. :0.000 no :1064
## 1st Qu.:27.00 male :676 1st Qu.:26.30 1st Qu.:0.000 yes: 274
## Median :39.00 Median :30.40 Median :1.000
## Mean :39.21 Mean :30.66 Mean :1.095
## 3rd Qu.:51.00 3rd Qu.:34.69 3rd Qu.:2.000
## Max. :64.00 Max. :53.13 Max. :5.000
## region charges
## northeast:324 Min. : 1122
## northwest:325 1st Qu.: 4740
## southeast:364 Median : 9382
## southwest:325 Mean :13270
## 3rd Qu.:16640
## Max. :63770
# View the structure
str(insuranceDS)
## 'data.frame': 1338 obs. of 7 variables:
## $ age : int 19 18 28 33 32 31 46 37 37 60 ...
## $ sex : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
## $ bmi : num 27.9 33.8 33 22.7 28.9 ...
## $ children: int 0 1 3 0 0 0 1 3 2 0 ...
## $ smoker : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
## $ region : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
## $ charges : num 16885 1726 4449 21984 3867 ...
#display the dimensions of the dataset
dim(insuranceDS)
## [1] 1338 7
#list types for each attribute
sapply(insuranceDS, class)
## age sex bmi children smoker region charges
## "integer" "factor" "numeric" "integer" "factor" "factor" "numeric"
summary(insuranceDS$charges) # if our data is normally distributed then we have values of Mean and Median close to each other
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1122 4740 9382 13270 16640 63770
#histogram of Charges to understand in more details
hist(insuranceDS$charges)
summary(insuranceDS$region)
## northeast northwest southeast southwest
## 324 325 364 325
#check proportion of factor variables
table(insuranceDS$region)
##
## northeast northwest southeast southwest
## 324 325 364 325
table(insuranceDS$smoker)
##
## no yes
## 1064 274
table(insuranceDS$sex)
##
## female male
## 662 676
#Co-relation
cor(insuranceDS[c('age','bmi','children','charges')])
## age bmi children charges
## age 1.0000000 0.1092719 0.04246900 0.29900819
## bmi 0.1092719 1.0000000 0.01275890 0.19834097
## children 0.0424690 0.0127589 1.00000000 0.06799823
## charges 0.2990082 0.1983410 0.06799823 1.00000000
#Scatter plot
pairs(insuranceDS[c('age','bmi','children','charges')])
library(psych)
## Warning: package 'psych' was built under R version 4.3.3
#Enhance Scatter plot
pairs.panels(insuranceDS[c('age','bmi','children','charges')])
ins_model<-lm(charges~age+children+bmi+sex+smoker+region, data=insuranceDS)#or you can write it as: ins_model<-lm(charges~., data=insuranceDS)
ins_model #-11941.6+ (256.8*age) -131.4
##
## Call:
## lm(formula = charges ~ age + children + bmi + sex + smoker +
## region, data = insuranceDS)
##
## Coefficients:
## (Intercept) age children bmi
## -11938.5 256.9 475.5 339.2
## sexmale smokeryes regionnorthwest regionsoutheast
## -131.3 23848.5 -353.0 -1035.0
## regionsouthwest
## -960.1
#understand the model results
summary(ins_model)
##
## Call:
## lm(formula = charges ~ age + children + bmi + sex + smoker +
## region, data = insuranceDS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11304.9 -2848.1 -982.1 1393.9 29992.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11938.5 987.8 -12.086 < 2e-16 ***
## age 256.9 11.9 21.587 < 2e-16 ***
## children 475.5 137.8 3.451 0.000577 ***
## bmi 339.2 28.6 11.860 < 2e-16 ***
## sexmale -131.3 332.9 -0.394 0.693348
## smokeryes 23848.5 413.1 57.723 < 2e-16 ***
## regionnorthwest -353.0 476.3 -0.741 0.458769
## regionsoutheast -1035.0 478.7 -2.162 0.030782 *
## regionsouthwest -960.0 477.9 -2.009 0.044765 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6062 on 1329 degrees of freedom
## Multiple R-squared: 0.7509, Adjusted R-squared: 0.7494
## F-statistic: 500.8 on 8 and 1329 DF, p-value: < 2.2e-16
#Residuals: TrueValue - PredictedValue
#Lets train the model with some additional features
insuranceDS$age2<-insuranceDS$age^2
str(insuranceDS)
## 'data.frame': 1338 obs. of 8 variables:
## $ age : int 19 18 28 33 32 31 46 37 37 60 ...
## $ sex : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
## $ bmi : num 27.9 33.8 33 22.7 28.9 ...
## $ children: int 0 1 3 0 0 0 1 3 2 0 ...
## $ smoker : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
## $ region : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
## $ charges : num 16885 1726 4449 21984 3867 ...
## $ age2 : num 361 324 784 1089 1024 ...
insuranceDS$bmi30<-ifelse(insuranceDS$bmi>=30,1,0)
str(insuranceDS)
## 'data.frame': 1338 obs. of 9 variables:
## $ age : int 19 18 28 33 32 31 46 37 37 60 ...
## $ sex : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
## $ bmi : num 27.9 33.8 33 22.7 28.9 ...
## $ children: int 0 1 3 0 0 0 1 3 2 0 ...
## $ smoker : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
## $ region : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
## $ charges : num 16885 1726 4449 21984 3867 ...
## $ age2 : num 361 324 784 1089 1024 ...
## $ bmi30 : num 0 1 1 0 0 0 1 0 0 0 ...
ins_model3<-lm(charges~., data=insuranceDS)
ins_model3
##
## Call:
## lm(formula = charges ~ ., data = insuranceDS)
##
## Coefficients:
## (Intercept) age sexmale bmi
## -2943.176 -28.533 -166.295 153.905
## children smokeryes regionnorthwest regionsoutheast
## 630.402 23857.543 -400.518 -888.533
## regionsouthwest age2 bmi30
## -947.681 3.603 2727.552
#Adding interaction Effects
#ins_model4<-lm(charges~bmi30+smokeryes+bmi30:smokeryes, data=insuranceDS) # can be written as: lm(charges~bmi30*smoker, data=insuranceDS)
#ins_model4
ins_model5<-lm(charges~.+bmi30*smoker, data=insuranceDS) # can be written as: lm(charges~bmi30*smoker, data=insuranceDS)
ins_model5
##
## Call:
## lm(formula = charges ~ . + bmi30 * smoker, data = insuranceDS)
##
## Coefficients:
## (Intercept) age sexmale bmi
## 134.251 -32.685 -496.824 120.020
## children smokeryes regionnorthwest regionsoutheast
## 678.561 13404.687 -279.204 -828.547
## regionsouthwest age2 bmi30 smokeryes:bmi30
## -1222.644 3.732 -1000.140 19810.753
summary(ins_model5)
##
## Call:
## lm(formula = charges ~ . + bmi30 * smoker, data = insuranceDS)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17296.4 -1656.0 -1263.3 -722.1 24160.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 134.2509 1362.7511 0.099 0.921539
## age -32.6851 59.8242 -0.546 0.584915
## sexmale -496.8245 244.3659 -2.033 0.042240 *
## bmi 120.0196 34.2660 3.503 0.000476 ***
## children 678.5612 105.8831 6.409 2.04e-10 ***
## smokeryes 13404.6866 439.9491 30.469 < 2e-16 ***
## regionnorthwest -279.2038 349.2746 -0.799 0.424212
## regionsoutheast -828.5467 351.6352 -2.356 0.018604 *
## regionsouthwest -1222.6437 350.5285 -3.488 0.000503 ***
## age2 3.7316 0.7463 5.000 6.50e-07 ***
## bmi30 -1000.1403 422.8402 -2.365 0.018159 *
## smokeryes:bmi30 19810.7533 604.6567 32.764 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4445 on 1326 degrees of freedom
## Multiple R-squared: 0.8664, Adjusted R-squared: 0.8653
## F-statistic: 781.7 on 11 and 1326 DF, p-value: < 2.2e-16
insuranceDS$predict<-predict(ins_model5, data.frame(age=30,age2=30^2, children=2, bmi=30, sex='male', bmi30=1, smoker='no', region='northeast'))
insuranceDS$predict
## [1] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [9] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [17] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [25] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [33] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [41] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [49] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [57] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [65] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [73] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [81] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [89] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [97] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [105] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [113] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [121] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [129] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [137] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [145] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [153] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [161] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [169] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [177] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [185] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [193] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [201] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [209] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [217] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [225] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [233] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [241] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [249] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [257] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [265] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [273] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [281] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [289] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [297] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [305] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [313] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [321] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [329] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [337] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [345] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [353] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [361] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [369] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [377] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [385] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [393] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [401] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [409] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [417] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [425] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [433] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [441] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [449] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [457] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [465] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [473] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [481] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [489] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [497] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [505] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [513] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [521] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [529] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [537] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [545] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [553] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [561] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [569] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [577] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [585] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [593] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [601] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [609] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [617] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [625] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [633] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [641] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [649] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [657] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [665] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [673] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [681] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [689] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [697] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [705] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [713] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [721] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [729] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [737] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [745] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [753] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [761] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [769] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [777] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [785] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [793] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [801] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [809] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [817] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [825] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [833] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [841] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [849] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [857] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [865] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [873] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [881] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [889] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [897] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [905] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [913] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [921] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [929] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [937] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [945] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [953] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [961] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [969] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [977] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [985] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [993] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1001] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1009] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1017] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1025] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1033] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1041] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1049] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1057] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1065] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1073] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1081] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1089] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1097] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1105] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1113] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1121] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1129] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1137] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1145] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1153] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1161] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1169] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1177] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1185] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1193] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1201] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1209] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1217] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1225] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1233] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1241] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1249] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1257] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1265] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1273] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1281] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1289] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1297] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1305] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1313] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1321] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1329] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1337] 5972.859 5972.859
# Split the dataset into 70% training and 30% testing
train_indices <- sample(1:nrow(insuranceDS), 0.7 * nrow(insuranceDS))
train <- insuranceDS[train_indices, ]
test <- insuranceDS[-train_indices, ]
ins_model2<-lm(charges~., data=train)#or you can write it as: ins_model<-lm(charges~age+children+bmi+sex+smoker+region, data=train)
ins_model2
##
## Call:
## lm(formula = charges ~ ., data = train)
##
## Coefficients:
## (Intercept) age sexmale bmi
## -4974.46 27.79 77.35 184.53
## children smokeryes regionnorthwest regionsoutheast
## 536.08 24225.41 -385.91 -724.16
## regionsouthwest age2 bmi30 predict
## -656.23 2.82 2818.93 NA
summary(ins_model2)
##
## Call:
## lm(formula = charges ~ ., data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12175.6 -3448.2 332.5 1761.2 28605.6
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -4974.457 2125.851 -2.340 0.01950 *
## age 27.793 92.807 0.299 0.76465
## sexmale 77.353 386.620 0.200 0.84147
## bmi 184.534 55.608 3.319 0.00094 ***
## children 536.076 165.067 3.248 0.00121 **
## smokeryes 24225.407 465.278 52.067 < 2e-16 ***
## regionnorthwest -385.908 546.731 -0.706 0.48046
## regionsoutheast -724.162 551.464 -1.313 0.18945
## regionsouthwest -656.233 553.467 -1.186 0.23605
## age2 2.820 1.156 2.440 0.01487 *
## bmi30 2818.932 648.139 4.349 1.52e-05 ***
## predict NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5854 on 925 degrees of freedom
## Multiple R-squared: 0.7792, Adjusted R-squared: 0.7768
## F-statistic: 326.5 on 10 and 925 DF, p-value: < 2.2e-16
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.