Lendo o conjunto de dados

dataset = read.csv('insurance.csv')

Codificando atributos como fatores

dataset$sex = as.numeric(factor(dataset$sex,
                                levels = c('female', 'male'),
                                labels = c(1, 2)))

dataset$smoker = as.numeric(factor(dataset$smoker,
                                   levels = c('no', 'yes'),
                                   labels = c(0, 1)))

Dividindo o conjunto de dados em treinamento e teste

# install.packages('caTools')
library(caTools)
split = sample.split(dataset$charges, SplitRatio = 0.8)
training_set = subset(dataset, split == TRUE)
test_set = subset(dataset, split == FALSE)

Regressão linear simples no conjunto de treinamento

regressor = lm(formula = charges ~ age ,
               data = training_set)
summary(regressor)
## 
## Call:
## lm(formula = charges ~ age, data = training_set)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -8231  -6836  -6110   5423  47598 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3026.6     1079.1   2.805  0.00513 ** 
## age            266.0       26.2  10.152  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11870 on 1068 degrees of freedom
## Multiple R-squared:  0.08801,    Adjusted R-squared:  0.08716 
## F-statistic: 103.1 on 1 and 1068 DF,  p-value: < 2.2e-16
y_pred = predict(regressor, newdata = test_set)
library(ggplot2)
ggplot() +
  geom_point(aes(x = training_set$age, y = training_set$charges),
             colour = 'red') +
  geom_line(aes(x = training_set$age, y = predict(regressor, newdata = training_set)),
            colour = 'blue') +
  ggtitle('Custos x Idade') +
  xlab('Idade') +
  ylab('Custos')