R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

#upload the data

library(readxl)
ageandheight <- read_excel("AgeHeight.xlsx")
View(ageandheight)

#create the linear regression
lmHeight<-lm(height~age,data=ageandheight)

#understand the results
summary(lmHeight)
## 
## Call:
## lm(formula = height ~ age, data = ageandheight)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.27238 -0.24248 -0.02762  0.16014  0.47238 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  64.9283     0.5084  127.71  < 2e-16 ***
## age           0.6350     0.0214   29.66 4.43e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.256 on 10 degrees of freedom
## Multiple R-squared:  0.9888, Adjusted R-squared:  0.9876 
## F-statistic:   880 on 1 and 10 DF,  p-value: 4.428e-11
#Residuals are errors, (Intercept) here is 'a' in the assumed formula i.e. height= a+ b*age
#height= 64.92 + (0.635 * age)

#MULTIPLE LINEAR REGRESSION
#create a linear regression with two variables
lmHeight2<-lm(height~age+no_siblings, data=ageandheight)
#understand the results
summary(lmHeight2)
## 
## Call:
## lm(formula = height ~ age + no_siblings, data = ageandheight)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.26297 -0.22462 -0.02021  0.16102  0.49752 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 64.90554    0.53526 121.260 8.96e-16 ***
## age          0.63751    0.02340  27.249 5.85e-10 ***
## no_siblings -0.01772    0.04735  -0.374    0.717    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2677 on 9 degrees of freedom
## Multiple R-squared:  0.9889, Adjusted R-squared:  0.9865 
## F-statistic: 402.2 on 2 and 9 DF,  p-value: 1.576e-09
#INSURANCE COMPANY'S BUSINESS MODEL

library(readr)
insuranceDS <- read.csv("~/insurance.csv", stringsAsFactors=TRUE)
View(insuranceDS)
summary(insuranceDS)
##       age            sex           bmi           children     smoker    
##  Min.   :18.00   female:662   Min.   :15.96   Min.   :0.000   no :1064  
##  1st Qu.:27.00   male  :676   1st Qu.:26.30   1st Qu.:0.000   yes: 274  
##  Median :39.00                Median :30.40   Median :1.000             
##  Mean   :39.21                Mean   :30.66   Mean   :1.095             
##  3rd Qu.:51.00                3rd Qu.:34.69   3rd Qu.:2.000             
##  Max.   :64.00                Max.   :53.13   Max.   :5.000             
##        region       charges     
##  northeast:324   Min.   : 1122  
##  northwest:325   1st Qu.: 4740  
##  southeast:364   Median : 9382  
##  southwest:325   Mean   :13270  
##                  3rd Qu.:16640  
##                  Max.   :63770
# View the structure
str(insuranceDS)
## 'data.frame':    1338 obs. of  7 variables:
##  $ age     : int  19 18 28 33 32 31 46 37 37 60 ...
##  $ sex     : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
##  $ bmi     : num  27.9 33.8 33 22.7 28.9 ...
##  $ children: int  0 1 3 0 0 0 1 3 2 0 ...
##  $ smoker  : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
##  $ region  : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
##  $ charges : num  16885 1726 4449 21984 3867 ...
#display the dimensions of the dataset
dim(insuranceDS)
## [1] 1338    7
#list types for each attribute
sapply(insuranceDS, class)
##       age       sex       bmi  children    smoker    region   charges 
## "integer"  "factor" "numeric" "integer"  "factor"  "factor" "numeric"
summary(insuranceDS$charges) # if our data is normally distributed then we have values of Mean and Median close to each other 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1122    4740    9382   13270   16640   63770
#histogram of Charges to understand in more details
hist(insuranceDS$charges)

summary(insuranceDS$region)
## northeast northwest southeast southwest 
##       324       325       364       325
#check proportion of factor variables
table(insuranceDS$region)
## 
## northeast northwest southeast southwest 
##       324       325       364       325
table(insuranceDS$smoker)
## 
##   no  yes 
## 1064  274
table(insuranceDS$sex)
## 
## female   male 
##    662    676
#Co-relation
cor(insuranceDS[c('age','bmi','children','charges')])
##                age       bmi   children    charges
## age      1.0000000 0.1092719 0.04246900 0.29900819
## bmi      0.1092719 1.0000000 0.01275890 0.19834097
## children 0.0424690 0.0127589 1.00000000 0.06799823
## charges  0.2990082 0.1983410 0.06799823 1.00000000
#Scatter plot
pairs(insuranceDS[c('age','bmi','children','charges')])
library(psych)
## Warning: package 'psych' was built under R version 4.3.3

#Enhance Scatter plot
pairs.panels(insuranceDS[c('age','bmi','children','charges')])

ins_model<-lm(charges~age+children+bmi+sex+smoker+region, data=insuranceDS)#or you can write it as: ins_model<-lm(charges~., data=insuranceDS)
ins_model #-11941.6+ (256.8*age) -131.4 
## 
## Call:
## lm(formula = charges ~ age + children + bmi + sex + smoker + 
##     region, data = insuranceDS)
## 
## Coefficients:
##     (Intercept)              age         children              bmi  
##        -11938.5            256.9            475.5            339.2  
##         sexmale        smokeryes  regionnorthwest  regionsoutheast  
##          -131.3          23848.5           -353.0          -1035.0  
## regionsouthwest  
##          -960.1
#understand the model results
summary(ins_model)
## 
## Call:
## lm(formula = charges ~ age + children + bmi + sex + smoker + 
##     region, data = insuranceDS)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11304.9  -2848.1   -982.1   1393.9  29992.8 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -11938.5      987.8 -12.086  < 2e-16 ***
## age                256.9       11.9  21.587  < 2e-16 ***
## children           475.5      137.8   3.451 0.000577 ***
## bmi                339.2       28.6  11.860  < 2e-16 ***
## sexmale           -131.3      332.9  -0.394 0.693348    
## smokeryes        23848.5      413.1  57.723  < 2e-16 ***
## regionnorthwest   -353.0      476.3  -0.741 0.458769    
## regionsoutheast  -1035.0      478.7  -2.162 0.030782 *  
## regionsouthwest   -960.0      477.9  -2.009 0.044765 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6062 on 1329 degrees of freedom
## Multiple R-squared:  0.7509, Adjusted R-squared:  0.7494 
## F-statistic: 500.8 on 8 and 1329 DF,  p-value: < 2.2e-16
#Residuals: TrueValue - PredictedValue


#Lets train the model with some additional features
insuranceDS$age2<-insuranceDS$age^2
str(insuranceDS)
## 'data.frame':    1338 obs. of  8 variables:
##  $ age     : int  19 18 28 33 32 31 46 37 37 60 ...
##  $ sex     : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
##  $ bmi     : num  27.9 33.8 33 22.7 28.9 ...
##  $ children: int  0 1 3 0 0 0 1 3 2 0 ...
##  $ smoker  : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
##  $ region  : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
##  $ charges : num  16885 1726 4449 21984 3867 ...
##  $ age2    : num  361 324 784 1089 1024 ...
insuranceDS$bmi30<-ifelse(insuranceDS$bmi>=30,1,0)
str(insuranceDS)
## 'data.frame':    1338 obs. of  9 variables:
##  $ age     : int  19 18 28 33 32 31 46 37 37 60 ...
##  $ sex     : Factor w/ 2 levels "female","male": 1 2 2 2 2 1 1 1 2 1 ...
##  $ bmi     : num  27.9 33.8 33 22.7 28.9 ...
##  $ children: int  0 1 3 0 0 0 1 3 2 0 ...
##  $ smoker  : Factor w/ 2 levels "no","yes": 2 1 1 1 1 1 1 1 1 1 ...
##  $ region  : Factor w/ 4 levels "northeast","northwest",..: 4 3 3 2 2 3 3 2 1 2 ...
##  $ charges : num  16885 1726 4449 21984 3867 ...
##  $ age2    : num  361 324 784 1089 1024 ...
##  $ bmi30   : num  0 1 1 0 0 0 1 0 0 0 ...
ins_model3<-lm(charges~., data=insuranceDS)
ins_model3
## 
## Call:
## lm(formula = charges ~ ., data = insuranceDS)
## 
## Coefficients:
##     (Intercept)              age          sexmale              bmi  
##       -2943.176          -28.533         -166.295          153.905  
##        children        smokeryes  regionnorthwest  regionsoutheast  
##         630.402        23857.543         -400.518         -888.533  
## regionsouthwest             age2            bmi30  
##        -947.681            3.603         2727.552
#Adding interaction Effects
#ins_model4<-lm(charges~bmi30+smokeryes+bmi30:smokeryes, data=insuranceDS) # can be written as: lm(charges~bmi30*smoker, data=insuranceDS)
#ins_model4

ins_model5<-lm(charges~.+bmi30*smoker, data=insuranceDS) # can be written as: lm(charges~bmi30*smoker, data=insuranceDS)
ins_model5
## 
## Call:
## lm(formula = charges ~ . + bmi30 * smoker, data = insuranceDS)
## 
## Coefficients:
##     (Intercept)              age          sexmale              bmi  
##         134.251          -32.685         -496.824          120.020  
##        children        smokeryes  regionnorthwest  regionsoutheast  
##         678.561        13404.687         -279.204         -828.547  
## regionsouthwest             age2            bmi30  smokeryes:bmi30  
##       -1222.644            3.732        -1000.140        19810.753
summary(ins_model5)
## 
## Call:
## lm(formula = charges ~ . + bmi30 * smoker, data = insuranceDS)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -17296.4  -1656.0  -1263.3   -722.1  24160.2 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       134.2509  1362.7511   0.099 0.921539    
## age               -32.6851    59.8242  -0.546 0.584915    
## sexmale          -496.8245   244.3659  -2.033 0.042240 *  
## bmi               120.0196    34.2660   3.503 0.000476 ***
## children          678.5612   105.8831   6.409 2.04e-10 ***
## smokeryes       13404.6866   439.9491  30.469  < 2e-16 ***
## regionnorthwest  -279.2038   349.2746  -0.799 0.424212    
## regionsoutheast  -828.5467   351.6352  -2.356 0.018604 *  
## regionsouthwest -1222.6437   350.5285  -3.488 0.000503 ***
## age2                3.7316     0.7463   5.000 6.50e-07 ***
## bmi30           -1000.1403   422.8402  -2.365 0.018159 *  
## smokeryes:bmi30 19810.7533   604.6567  32.764  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4445 on 1326 degrees of freedom
## Multiple R-squared:  0.8664, Adjusted R-squared:  0.8653 
## F-statistic: 781.7 on 11 and 1326 DF,  p-value: < 2.2e-16
insuranceDS$predict<-predict(ins_model5, data.frame(age=30,age2=30^2, children=2, bmi=30, sex='male', bmi30=1, smoker='no', region='northeast'))
insuranceDS$predict
##    [1] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##    [9] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [17] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [25] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [33] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [41] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [49] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [57] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [65] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [73] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [81] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [89] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##   [97] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [105] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [113] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [121] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [129] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [137] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [145] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [153] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [161] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [169] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [177] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [185] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [193] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [201] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [209] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [217] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [225] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [233] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [241] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [249] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [257] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [265] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [273] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [281] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [289] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [297] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [305] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [313] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [321] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [329] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [337] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [345] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [353] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [361] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [369] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [377] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [385] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [393] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [401] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [409] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [417] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [425] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [433] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [441] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [449] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [457] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [465] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [473] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [481] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [489] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [497] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [505] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [513] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [521] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [529] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [537] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [545] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [553] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [561] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [569] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [577] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [585] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [593] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [601] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [609] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [617] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [625] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [633] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [641] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [649] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [657] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [665] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [673] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [681] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [689] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [697] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [705] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [713] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [721] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [729] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [737] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [745] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [753] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [761] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [769] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [777] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [785] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [793] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [801] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [809] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [817] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [825] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [833] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [841] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [849] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [857] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [865] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [873] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [881] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [889] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [897] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [905] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [913] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [921] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [929] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [937] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [945] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [953] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [961] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [969] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [977] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [985] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
##  [993] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1001] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1009] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1017] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1025] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1033] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1041] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1049] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1057] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1065] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1073] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1081] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1089] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1097] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1105] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1113] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1121] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1129] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1137] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1145] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1153] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1161] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1169] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1177] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1185] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1193] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1201] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1209] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1217] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1225] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1233] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1241] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1249] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1257] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1265] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1273] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1281] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1289] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1297] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1305] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1313] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1321] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1329] 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859 5972.859
## [1337] 5972.859 5972.859
# Split the dataset into 70% training and 30% testing
train_indices <- sample(1:nrow(insuranceDS), 0.7 * nrow(insuranceDS))
train <- insuranceDS[train_indices, ]
test <- insuranceDS[-train_indices, ]

ins_model2<-lm(charges~., data=train)#or you can write it as: ins_model<-lm(charges~age+children+bmi+sex+smoker+region, data=train)
ins_model2
## 
## Call:
## lm(formula = charges ~ ., data = train)
## 
## Coefficients:
##     (Intercept)              age          sexmale              bmi  
##        -4974.46            27.79            77.35           184.53  
##        children        smokeryes  regionnorthwest  regionsoutheast  
##          536.08         24225.41          -385.91          -724.16  
## regionsouthwest             age2            bmi30          predict  
##         -656.23             2.82          2818.93               NA
summary(ins_model2)
## 
## Call:
## lm(formula = charges ~ ., data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12175.6  -3448.2    332.5   1761.2  28605.6 
## 
## Coefficients: (1 not defined because of singularities)
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -4974.457   2125.851  -2.340  0.01950 *  
## age                27.793     92.807   0.299  0.76465    
## sexmale            77.353    386.620   0.200  0.84147    
## bmi               184.534     55.608   3.319  0.00094 ***
## children          536.076    165.067   3.248  0.00121 ** 
## smokeryes       24225.407    465.278  52.067  < 2e-16 ***
## regionnorthwest  -385.908    546.731  -0.706  0.48046    
## regionsoutheast  -724.162    551.464  -1.313  0.18945    
## regionsouthwest  -656.233    553.467  -1.186  0.23605    
## age2                2.820      1.156   2.440  0.01487 *  
## bmi30            2818.932    648.139   4.349 1.52e-05 ***
## predict                NA         NA      NA       NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5854 on 925 degrees of freedom
## Multiple R-squared:  0.7792, Adjusted R-squared:  0.7768 
## F-statistic: 326.5 on 10 and 925 DF,  p-value: < 2.2e-16

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.