library(readr) # Para importar datos
library(dplyr) # Para filtrar
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr) # Para datos tabulares
library(ggplot2) # Para visualizar
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(caret) # Para particionar
## Loading required package: lattice
library(Metrics) # Para determinar rmse
##
## Attaching package: 'Metrics'
## The following objects are masked from 'package:caret':
##
## precision, recall
library(rpart) # Para árbol
library(rpart.plot) # Para árbol
datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/Analisis-Inteligente-de-datos/main/datos/Advertising.csv")
head(datos,10)
## X TV Radio Newspaper Sales
## 1 1 230.1 37.8 69.2 22.1
## 2 2 44.5 39.3 45.1 10.4
## 3 3 17.2 45.9 69.3 9.3
## 4 4 151.5 41.3 58.5 18.5
## 5 5 180.8 10.8 58.4 12.9
## 6 6 8.7 48.9 75.0 7.2
## 7 7 57.5 32.8 23.5 11.8
## 8 8 120.2 19.6 11.6 13.2
## 9 9 8.6 2.1 1.0 4.8
## 10 10 199.8 2.6 21.2 10.6
summary(datos)
## X TV Radio Newspaper
## Min. : 1.00 Min. : 0.70 Min. : 0.000 Min. : 0.30
## 1st Qu.: 50.75 1st Qu.: 74.38 1st Qu.: 9.975 1st Qu.: 12.75
## Median :100.50 Median :149.75 Median :22.900 Median : 25.75
## Mean :100.50 Mean :147.04 Mean :23.264 Mean : 30.55
## 3rd Qu.:150.25 3rd Qu.:218.82 3rd Qu.:36.525 3rd Qu.: 45.10
## Max. :200.00 Max. :296.40 Max. :49.600 Max. :114.00
## Sales
## Min. : 1.60
## 1st Qu.:10.38
## Median :12.90
## Mean :14.02
## 3rd Qu.:17.40
## Max. :27.00
str(datos)
## 'data.frame': 200 obs. of 5 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ TV : num 230.1 44.5 17.2 151.5 180.8 ...
## $ Radio : num 37.8 39.3 45.9 41.3 10.8 48.9 32.8 19.6 2.1 2.6 ...
## $ Newspaper: num 69.2 45.1 69.3 58.5 58.4 75 23.5 11.6 1 21.2 ...
## $ Sales : num 22.1 10.4 9.3 18.5 12.9 7.2 11.8 13.2 4.8 10.6 ...
datos <- datos %>% select (TV, Radio, Newspaper, Sales)
datos
## TV Radio Newspaper Sales
## 1 230.1 37.8 69.2 22.1
## 2 44.5 39.3 45.1 10.4
## 3 17.2 45.9 69.3 9.3
## 4 151.5 41.3 58.5 18.5
## 5 180.8 10.8 58.4 12.9
## 6 8.7 48.9 75.0 7.2
## 7 57.5 32.8 23.5 11.8
## 8 120.2 19.6 11.6 13.2
## 9 8.6 2.1 1.0 4.8
## 10 199.8 2.6 21.2 10.6
## 11 66.1 5.8 24.2 8.6
## 12 214.7 24.0 4.0 17.4
## 13 23.8 35.1 65.9 9.2
## 14 97.5 7.6 7.2 9.7
## 15 204.1 32.9 46.0 19.0
## 16 195.4 47.7 52.9 22.4
## 17 67.8 36.6 114.0 12.5
## 18 281.4 39.6 55.8 24.4
## 19 69.2 20.5 18.3 11.3
## 20 147.3 23.9 19.1 14.6
## 21 218.4 27.7 53.4 18.0
## 22 237.4 5.1 23.5 12.5
## 23 13.2 15.9 49.6 5.6
## 24 228.3 16.9 26.2 15.5
## 25 62.3 12.6 18.3 9.7
## 26 262.9 3.5 19.5 12.0
## 27 142.9 29.3 12.6 15.0
## 28 240.1 16.7 22.9 15.9
## 29 248.8 27.1 22.9 18.9
## 30 70.6 16.0 40.8 10.5
## 31 292.9 28.3 43.2 21.4
## 32 112.9 17.4 38.6 11.9
## 33 97.2 1.5 30.0 9.6
## 34 265.6 20.0 0.3 17.4
## 35 95.7 1.4 7.4 9.5
## 36 290.7 4.1 8.5 12.8
## 37 266.9 43.8 5.0 25.4
## 38 74.7 49.4 45.7 14.7
## 39 43.1 26.7 35.1 10.1
## 40 228.0 37.7 32.0 21.5
## 41 202.5 22.3 31.6 16.6
## 42 177.0 33.4 38.7 17.1
## 43 293.6 27.7 1.8 20.7
## 44 206.9 8.4 26.4 12.9
## 45 25.1 25.7 43.3 8.5
## 46 175.1 22.5 31.5 14.9
## 47 89.7 9.9 35.7 10.6
## 48 239.9 41.5 18.5 23.2
## 49 227.2 15.8 49.9 14.8
## 50 66.9 11.7 36.8 9.7
## 51 199.8 3.1 34.6 11.4
## 52 100.4 9.6 3.6 10.7
## 53 216.4 41.7 39.6 22.6
## 54 182.6 46.2 58.7 21.2
## 55 262.7 28.8 15.9 20.2
## 56 198.9 49.4 60.0 23.7
## 57 7.3 28.1 41.4 5.5
## 58 136.2 19.2 16.6 13.2
## 59 210.8 49.6 37.7 23.8
## 60 210.7 29.5 9.3 18.4
## 61 53.5 2.0 21.4 8.1
## 62 261.3 42.7 54.7 24.2
## 63 239.3 15.5 27.3 15.7
## 64 102.7 29.6 8.4 14.0
## 65 131.1 42.8 28.9 18.0
## 66 69.0 9.3 0.9 9.3
## 67 31.5 24.6 2.2 9.5
## 68 139.3 14.5 10.2 13.4
## 69 237.4 27.5 11.0 18.9
## 70 216.8 43.9 27.2 22.3
## 71 199.1 30.6 38.7 18.3
## 72 109.8 14.3 31.7 12.4
## 73 26.8 33.0 19.3 8.8
## 74 129.4 5.7 31.3 11.0
## 75 213.4 24.6 13.1 17.0
## 76 16.9 43.7 89.4 8.7
## 77 27.5 1.6 20.7 6.9
## 78 120.5 28.5 14.2 14.2
## 79 5.4 29.9 9.4 5.3
## 80 116.0 7.7 23.1 11.0
## 81 76.4 26.7 22.3 11.8
## 82 239.8 4.1 36.9 12.3
## 83 75.3 20.3 32.5 11.3
## 84 68.4 44.5 35.6 13.6
## 85 213.5 43.0 33.8 21.7
## 86 193.2 18.4 65.7 15.2
## 87 76.3 27.5 16.0 12.0
## 88 110.7 40.6 63.2 16.0
## 89 88.3 25.5 73.4 12.9
## 90 109.8 47.8 51.4 16.7
## 91 134.3 4.9 9.3 11.2
## 92 28.6 1.5 33.0 7.3
## 93 217.7 33.5 59.0 19.4
## 94 250.9 36.5 72.3 22.2
## 95 107.4 14.0 10.9 11.5
## 96 163.3 31.6 52.9 16.9
## 97 197.6 3.5 5.9 11.7
## 98 184.9 21.0 22.0 15.5
## 99 289.7 42.3 51.2 25.4
## 100 135.2 41.7 45.9 17.2
## 101 222.4 4.3 49.8 11.7
## 102 296.4 36.3 100.9 23.8
## 103 280.2 10.1 21.4 14.8
## 104 187.9 17.2 17.9 14.7
## 105 238.2 34.3 5.3 20.7
## 106 137.9 46.4 59.0 19.2
## 107 25.0 11.0 29.7 7.2
## 108 90.4 0.3 23.2 8.7
## 109 13.1 0.4 25.6 5.3
## 110 255.4 26.9 5.5 19.8
## 111 225.8 8.2 56.5 13.4
## 112 241.7 38.0 23.2 21.8
## 113 175.7 15.4 2.4 14.1
## 114 209.6 20.6 10.7 15.9
## 115 78.2 46.8 34.5 14.6
## 116 75.1 35.0 52.7 12.6
## 117 139.2 14.3 25.6 12.2
## 118 76.4 0.8 14.8 9.4
## 119 125.7 36.9 79.2 15.9
## 120 19.4 16.0 22.3 6.6
## 121 141.3 26.8 46.2 15.5
## 122 18.8 21.7 50.4 7.0
## 123 224.0 2.4 15.6 11.6
## 124 123.1 34.6 12.4 15.2
## 125 229.5 32.3 74.2 19.7
## 126 87.2 11.8 25.9 10.6
## 127 7.8 38.9 50.6 6.6
## 128 80.2 0.0 9.2 8.8
## 129 220.3 49.0 3.2 24.7
## 130 59.6 12.0 43.1 9.7
## 131 0.7 39.6 8.7 1.6
## 132 265.2 2.9 43.0 12.7
## 133 8.4 27.2 2.1 5.7
## 134 219.8 33.5 45.1 19.6
## 135 36.9 38.6 65.6 10.8
## 136 48.3 47.0 8.5 11.6
## 137 25.6 39.0 9.3 9.5
## 138 273.7 28.9 59.7 20.8
## 139 43.0 25.9 20.5 9.6
## 140 184.9 43.9 1.7 20.7
## 141 73.4 17.0 12.9 10.9
## 142 193.7 35.4 75.6 19.2
## 143 220.5 33.2 37.9 20.1
## 144 104.6 5.7 34.4 10.4
## 145 96.2 14.8 38.9 11.4
## 146 140.3 1.9 9.0 10.3
## 147 240.1 7.3 8.7 13.2
## 148 243.2 49.0 44.3 25.4
## 149 38.0 40.3 11.9 10.9
## 150 44.7 25.8 20.6 10.1
## 151 280.7 13.9 37.0 16.1
## 152 121.0 8.4 48.7 11.6
## 153 197.6 23.3 14.2 16.6
## 154 171.3 39.7 37.7 19.0
## 155 187.8 21.1 9.5 15.6
## 156 4.1 11.6 5.7 3.2
## 157 93.9 43.5 50.5 15.3
## 158 149.8 1.3 24.3 10.1
## 159 11.7 36.9 45.2 7.3
## 160 131.7 18.4 34.6 12.9
## 161 172.5 18.1 30.7 14.4
## 162 85.7 35.8 49.3 13.3
## 163 188.4 18.1 25.6 14.9
## 164 163.5 36.8 7.4 18.0
## 165 117.2 14.7 5.4 11.9
## 166 234.5 3.4 84.8 11.9
## 167 17.9 37.6 21.6 8.0
## 168 206.8 5.2 19.4 12.2
## 169 215.4 23.6 57.6 17.1
## 170 284.3 10.6 6.4 15.0
## 171 50.0 11.6 18.4 8.4
## 172 164.5 20.9 47.4 14.5
## 173 19.6 20.1 17.0 7.6
## 174 168.4 7.1 12.8 11.7
## 175 222.4 3.4 13.1 11.5
## 176 276.9 48.9 41.8 27.0
## 177 248.4 30.2 20.3 20.2
## 178 170.2 7.8 35.2 11.7
## 179 276.7 2.3 23.7 11.8
## 180 165.6 10.0 17.6 12.6
## 181 156.6 2.6 8.3 10.5
## 182 218.5 5.4 27.4 12.2
## 183 56.2 5.7 29.7 8.7
## 184 287.6 43.0 71.8 26.2
## 185 253.8 21.3 30.0 17.6
## 186 205.0 45.1 19.6 22.6
## 187 139.5 2.1 26.6 10.3
## 188 191.1 28.7 18.2 17.3
## 189 286.0 13.9 3.7 15.9
## 190 18.7 12.1 23.4 6.7
## 191 39.5 41.1 5.8 10.8
## 192 75.5 10.8 6.0 9.9
## 193 17.2 4.1 31.6 5.9
## 194 166.8 42.0 3.6 19.6
## 195 149.7 35.6 6.0 17.3
## 196 38.2 3.7 13.8 7.6
## 197 94.2 4.9 8.1 9.7
## 198 177.0 9.3 6.4 12.8
## 199 283.6 42.0 66.2 25.5
## 200 232.1 8.6 8.7 13.4
head(datos, 20)
## TV Radio Newspaper Sales
## 1 230.1 37.8 69.2 22.1
## 2 44.5 39.3 45.1 10.4
## 3 17.2 45.9 69.3 9.3
## 4 151.5 41.3 58.5 18.5
## 5 180.8 10.8 58.4 12.9
## 6 8.7 48.9 75.0 7.2
## 7 57.5 32.8 23.5 11.8
## 8 120.2 19.6 11.6 13.2
## 9 8.6 2.1 1.0 4.8
## 10 199.8 2.6 21.2 10.6
## 11 66.1 5.8 24.2 8.6
## 12 214.7 24.0 4.0 17.4
## 13 23.8 35.1 65.9 9.2
## 14 97.5 7.6 7.2 9.7
## 15 204.1 32.9 46.0 19.0
## 16 195.4 47.7 52.9 22.4
## 17 67.8 36.6 114.0 12.5
## 18 281.4 39.6 55.8 24.4
## 19 69.2 20.5 18.3 11.3
## 20 147.3 23.9 19.1 14.6
tail(datos, 20)
## TV Radio Newspaper Sales
## 181 156.6 2.6 8.3 10.5
## 182 218.5 5.4 27.4 12.2
## 183 56.2 5.7 29.7 8.7
## 184 287.6 43.0 71.8 26.2
## 185 253.8 21.3 30.0 17.6
## 186 205.0 45.1 19.6 22.6
## 187 139.5 2.1 26.6 10.3
## 188 191.1 28.7 18.2 17.3
## 189 286.0 13.9 3.7 15.9
## 190 18.7 12.1 23.4 6.7
## 191 39.5 41.1 5.8 10.8
## 192 75.5 10.8 6.0 9.9
## 193 17.2 4.1 31.6 5.9
## 194 166.8 42.0 3.6 19.6
## 195 149.7 35.6 6.0 17.3
## 196 38.2 3.7 13.8 7.6
## 197 94.2 4.9 8.1 9.7
## 198 177.0 9.3 6.4 12.8
## 199 283.6 42.0 66.2 25.5
## 200 232.1 8.6 8.7 13.4
n <- nrow(datos)
set.seed(1550)
entrena <- createDataPartition(y = datos$Sales, p = 0.70, list = FALSE, times = 1)
# Datos entrenamiento
datos.entrenamiento <- datos[entrena, ] # [renglones, columna]
# Datos validación
datos.validacion <- datos[-entrena, ]
kable(head(datos.entrenamiento, 20), caption = "Datos de Entrenamiento. Primeros 20 registros")
| TV | Radio | Newspaper | Sales | |
|---|---|---|---|---|
| 3 | 17.2 | 45.9 | 69.3 | 9.3 |
| 4 | 151.5 | 41.3 | 58.5 | 18.5 |
| 5 | 180.8 | 10.8 | 58.4 | 12.9 |
| 7 | 57.5 | 32.8 | 23.5 | 11.8 |
| 9 | 8.6 | 2.1 | 1.0 | 4.8 |
| 10 | 199.8 | 2.6 | 21.2 | 10.6 |
| 15 | 204.1 | 32.9 | 46.0 | 19.0 |
| 16 | 195.4 | 47.7 | 52.9 | 22.4 |
| 17 | 67.8 | 36.6 | 114.0 | 12.5 |
| 23 | 13.2 | 15.9 | 49.6 | 5.6 |
| 24 | 228.3 | 16.9 | 26.2 | 15.5 |
| 25 | 62.3 | 12.6 | 18.3 | 9.7 |
| 26 | 262.9 | 3.5 | 19.5 | 12.0 |
| 27 | 142.9 | 29.3 | 12.6 | 15.0 |
| 28 | 240.1 | 16.7 | 22.9 | 15.9 |
| 29 | 248.8 | 27.1 | 22.9 | 18.9 |
| 31 | 292.9 | 28.3 | 43.2 | 21.4 |
| 32 | 112.9 | 17.4 | 38.6 | 11.9 |
| 33 | 97.2 | 1.5 | 30.0 | 9.6 |
| 35 | 95.7 | 1.4 | 7.4 | 9.5 |
kable(tail(datos.entrenamiento, 20), caption = "Datos de entrenamiento ültimos 20 registros")
| TV | Radio | Newspaper | Sales | |
|---|---|---|---|---|
| 179 | 276.7 | 2.3 | 23.7 | 11.8 |
| 180 | 165.6 | 10.0 | 17.6 | 12.6 |
| 181 | 156.6 | 2.6 | 8.3 | 10.5 |
| 184 | 287.6 | 43.0 | 71.8 | 26.2 |
| 185 | 253.8 | 21.3 | 30.0 | 17.6 |
| 186 | 205.0 | 45.1 | 19.6 | 22.6 |
| 187 | 139.5 | 2.1 | 26.6 | 10.3 |
| 188 | 191.1 | 28.7 | 18.2 | 17.3 |
| 189 | 286.0 | 13.9 | 3.7 | 15.9 |
| 190 | 18.7 | 12.1 | 23.4 | 6.7 |
| 191 | 39.5 | 41.1 | 5.8 | 10.8 |
| 192 | 75.5 | 10.8 | 6.0 | 9.9 |
| 193 | 17.2 | 4.1 | 31.6 | 5.9 |
| 194 | 166.8 | 42.0 | 3.6 | 19.6 |
| 195 | 149.7 | 35.6 | 6.0 | 17.3 |
| 196 | 38.2 | 3.7 | 13.8 | 7.6 |
| 197 | 94.2 | 4.9 | 8.1 | 9.7 |
| 198 | 177.0 | 9.3 | 6.4 | 12.8 |
| 199 | 283.6 | 42.0 | 66.2 | 25.5 |
| 200 | 232.1 | 8.6 | 8.7 | 13.4 |
kable(head(datos.validacion, 20), caption = "Datos de Validación Primeros 20 registros")
| TV | Radio | Newspaper | Sales | |
|---|---|---|---|---|
| 1 | 230.1 | 37.8 | 69.2 | 22.1 |
| 2 | 44.5 | 39.3 | 45.1 | 10.4 |
| 6 | 8.7 | 48.9 | 75.0 | 7.2 |
| 8 | 120.2 | 19.6 | 11.6 | 13.2 |
| 11 | 66.1 | 5.8 | 24.2 | 8.6 |
| 12 | 214.7 | 24.0 | 4.0 | 17.4 |
| 13 | 23.8 | 35.1 | 65.9 | 9.2 |
| 14 | 97.5 | 7.6 | 7.2 | 9.7 |
| 18 | 281.4 | 39.6 | 55.8 | 24.4 |
| 19 | 69.2 | 20.5 | 18.3 | 11.3 |
| 20 | 147.3 | 23.9 | 19.1 | 14.6 |
| 21 | 218.4 | 27.7 | 53.4 | 18.0 |
| 22 | 237.4 | 5.1 | 23.5 | 12.5 |
| 30 | 70.6 | 16.0 | 40.8 | 10.5 |
| 34 | 265.6 | 20.0 | 0.3 | 17.4 |
| 38 | 74.7 | 49.4 | 45.7 | 14.7 |
| 40 | 228.0 | 37.7 | 32.0 | 21.5 |
| 41 | 202.5 | 22.3 | 31.6 | 16.6 |
| 43 | 293.6 | 27.7 | 1.8 | 20.7 |
| 45 | 25.1 | 25.7 | 43.3 | 8.5 |
kable(tail(datos.validacion, 20), caption = "Datos de validació últimos 20 registros")
| TV | Radio | Newspaper | Sales | |
|---|---|---|---|---|
| 109 | 13.1 | 0.4 | 25.6 | 5.3 |
| 110 | 255.4 | 26.9 | 5.5 | 19.8 |
| 114 | 209.6 | 20.6 | 10.7 | 15.9 |
| 116 | 75.1 | 35.0 | 52.7 | 12.6 |
| 121 | 141.3 | 26.8 | 46.2 | 15.5 |
| 130 | 59.6 | 12.0 | 43.1 | 9.7 |
| 131 | 0.7 | 39.6 | 8.7 | 1.6 |
| 132 | 265.2 | 2.9 | 43.0 | 12.7 |
| 134 | 219.8 | 33.5 | 45.1 | 19.6 |
| 138 | 273.7 | 28.9 | 59.7 | 20.8 |
| 144 | 104.6 | 5.7 | 34.4 | 10.4 |
| 153 | 197.6 | 23.3 | 14.2 | 16.6 |
| 154 | 171.3 | 39.7 | 37.7 | 19.0 |
| 158 | 149.8 | 1.3 | 24.3 | 10.1 |
| 165 | 117.2 | 14.7 | 5.4 | 11.9 |
| 168 | 206.8 | 5.2 | 19.4 | 12.2 |
| 170 | 284.3 | 10.6 | 6.4 | 15.0 |
| 175 | 222.4 | 3.4 | 13.1 | 11.5 |
| 182 | 218.5 | 5.4 | 27.4 | 12.2 |
| 183 | 56.2 | 5.7 | 29.7 | 8.7 |
modelo_ar <- rpart(data = datos.entrenamiento,formula = Sales ~ TV + Radio + Newspaper )
modelo_ar
## n= 142
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 142 3930.13500 14.061970
## 2) TV< 101.55 50 335.17780 9.262000
## 4) TV< 30.05 19 44.27684 6.726316 *
## 5) TV>=30.05 31 93.86194 10.816130
## 10) Radio< 29.75 21 27.26952 10.004760 *
## 11) Radio>=29.75 10 23.73600 12.520000 *
## 3) TV>=101.55 92 1816.89100 16.670650
## 6) Radio< 21.2 45 132.65110 13.044440
## 12) Radio< 10.05 24 20.75958 11.820830 *
## 13) Radio>=10.05 21 34.89143 14.442860 *
## 7) Radio>=21.2 47 525.97490 20.142550
## 14) TV< 179.8 14 41.55214 16.764290 *
## 15) TV>=179.8 33 256.86060 21.575760
## 30) Radio< 35.85 14 21.70857 18.871430 *
## 31) Radio>=35.85 19 57.32105 23.568420 *
summary(modelo_ar)
## Call:
## rpart(formula = Sales ~ TV + Radio + Newspaper, data = datos.entrenamiento)
## n= 142
##
## CP nsplit rel error xerror xstd
## 1 0.45241862 0 1.00000000 1.0058898 0.10594717
## 2 0.29471376 1 0.54758138 0.6479956 0.06268588
## 3 0.05790187 2 0.25286762 0.3348100 0.03642435
## 4 0.05013544 3 0.19496575 0.3168308 0.03257395
## 5 0.04524806 4 0.14483031 0.2803904 0.02836161
## 6 0.01959223 5 0.09958225 0.2108761 0.02470429
## 7 0.01090457 6 0.07999002 0.1587899 0.01951078
## 8 0.01000000 7 0.06908546 0.1497569 0.01685195
##
## Variable importance
## TV Radio Newspaper
## 53 36 11
##
## Node number 1: 142 observations, complexity param=0.4524186
## mean=14.06197, MSE=27.677
## left son=2 (50 obs) right son=3 (92 obs)
## Primary splits:
## TV < 101.55 to the left, improve=0.4524186, (0 missing)
## Radio < 41.2 to the left, improve=0.3184424, (0 missing)
## Newspaper < 51 to the left, improve=0.1033655, (0 missing)
## Surrogate splits:
## Radio < 1.7 to the left, agree=0.683, adj=0.10, (0 split)
## Newspaper < 2.3 to the left, agree=0.669, adj=0.06, (0 split)
##
## Node number 2: 50 observations, complexity param=0.05013544
## mean=9.262, MSE=6.703556
## left son=4 (19 obs) right son=5 (31 obs)
## Primary splits:
## TV < 30.05 to the left, improve=0.58786420, (0 missing)
## Radio < 31.35 to the left, improve=0.19123480, (0 missing)
## Newspaper < 33.75 to the left, improve=0.08279888, (0 missing)
## Surrogate splits:
## Newspaper < 40.15 to the right, agree=0.66, adj=0.105, (0 split)
##
## Node number 3: 92 observations, complexity param=0.2947138
## mean=16.67065, MSE=19.74881
## left son=6 (45 obs) right son=7 (47 obs)
## Primary splits:
## Radio < 21.2 to the left, improve=0.6374983, (0 missing)
## TV < 181.7 to the left, improve=0.2020322, (0 missing)
## Newspaper < 37.35 to the left, improve=0.1365813, (0 missing)
## Surrogate splits:
## Newspaper < 37.35 to the left, agree=0.652, adj=0.289, (0 split)
## TV < 189.75 to the left, agree=0.598, adj=0.178, (0 split)
##
## Node number 4: 19 observations
## mean=6.726316, MSE=2.33036
##
## Node number 5: 31 observations, complexity param=0.01090457
## mean=10.81613, MSE=3.027804
## left son=10 (21 obs) right son=11 (10 obs)
## Primary splits:
## Radio < 29.75 to the left, improve=0.4565899, (0 missing)
## Newspaper < 32.25 to the left, improve=0.4512462, (0 missing)
## TV < 53.75 to the left, improve=0.1207227, (0 missing)
## Surrogate splits:
## Newspaper < 44.1 to the left, agree=0.774, adj=0.3, (0 split)
## TV < 38.1 to the right, agree=0.710, adj=0.1, (0 split)
##
## Node number 6: 45 observations, complexity param=0.01959223
## mean=13.04444, MSE=2.947802
## left son=12 (24 obs) right son=13 (21 obs)
## Primary splits:
## Radio < 10.05 to the left, improve=0.58047080, (0 missing)
## TV < 171.35 to the left, improve=0.27832210, (0 missing)
## Newspaper < 21.3 to the left, improve=0.02044107, (0 missing)
## Surrogate splits:
## Newspaper < 21.3 to the left, agree=0.622, adj=0.190, (0 split)
## TV < 196.5 to the right, agree=0.600, adj=0.143, (0 split)
##
## Node number 7: 47 observations, complexity param=0.05790187
## mean=20.14255, MSE=11.19096
## left son=14 (14 obs) right son=15 (33 obs)
## Primary splits:
## TV < 179.8 to the left, improve=0.4326483, (0 missing)
## Radio < 41.4 to the left, improve=0.4245020, (0 missing)
## Newspaper < 18.35 to the left, improve=0.1337402, (0 missing)
## Surrogate splits:
## Newspaper < 8.85 to the left, agree=0.723, adj=0.071, (0 split)
##
## Node number 10: 21 observations
## mean=10.00476, MSE=1.298549
##
## Node number 11: 10 observations
## mean=12.52, MSE=2.3736
##
## Node number 12: 24 observations
## mean=11.82083, MSE=0.8649826
##
## Node number 13: 21 observations
## mean=14.44286, MSE=1.661497
##
## Node number 14: 14 observations
## mean=16.76429, MSE=2.96801
##
## Node number 15: 33 observations, complexity param=0.04524806
## mean=21.57576, MSE=7.783655
## left son=30 (14 obs) right son=31 (19 obs)
## Primary splits:
## Radio < 35.85 to the left, improve=0.69232490, (0 missing)
## TV < 257.55 to the left, improve=0.35640150, (0 missing)
## Newspaper < 31.9 to the left, improve=0.08238753, (0 missing)
## Surrogate splits:
## TV < 238.65 to the left, agree=0.606, adj=0.071, (0 split)
## Newspaper < 18.35 to the left, agree=0.606, adj=0.071, (0 split)
##
## Node number 30: 14 observations
## mean=18.87143, MSE=1.550612
##
## Node number 31: 19 observations
## mean=23.56842, MSE=3.016898
rpart.plot(modelo_ar)
predicciones <- predict(object = modelo_ar, newdata = datos.validacion)
comparaciones <- data.frame(datos.validacion, predicciones)
comparaciones
## TV Radio Newspaper Sales predicciones
## 1 230.1 37.8 69.2 22.1 23.568421
## 2 44.5 39.3 45.1 10.4 12.520000
## 6 8.7 48.9 75.0 7.2 6.726316
## 8 120.2 19.6 11.6 13.2 14.442857
## 11 66.1 5.8 24.2 8.6 10.004762
## 12 214.7 24.0 4.0 17.4 18.871429
## 13 23.8 35.1 65.9 9.2 6.726316
## 14 97.5 7.6 7.2 9.7 10.004762
## 18 281.4 39.6 55.8 24.4 23.568421
## 19 69.2 20.5 18.3 11.3 10.004762
## 20 147.3 23.9 19.1 14.6 16.764286
## 21 218.4 27.7 53.4 18.0 18.871429
## 22 237.4 5.1 23.5 12.5 11.820833
## 30 70.6 16.0 40.8 10.5 10.004762
## 34 265.6 20.0 0.3 17.4 14.442857
## 38 74.7 49.4 45.7 14.7 12.520000
## 40 228.0 37.7 32.0 21.5 23.568421
## 41 202.5 22.3 31.6 16.6 18.871429
## 43 293.6 27.7 1.8 20.7 18.871429
## 45 25.1 25.7 43.3 8.5 6.726316
## 46 175.1 22.5 31.5 14.9 16.764286
## 50 66.9 11.7 36.8 9.7 10.004762
## 53 216.4 41.7 39.6 22.6 23.568421
## 55 262.7 28.8 15.9 20.2 18.871429
## 58 136.2 19.2 16.6 13.2 14.442857
## 61 53.5 2.0 21.4 8.1 10.004762
## 71 199.1 30.6 38.7 18.3 18.871429
## 73 26.8 33.0 19.3 8.8 6.726316
## 77 27.5 1.6 20.7 6.9 6.726316
## 83 75.3 20.3 32.5 11.3 10.004762
## 87 76.3 27.5 16.0 12.0 10.004762
## 95 107.4 14.0 10.9 11.5 14.442857
## 96 163.3 31.6 52.9 16.9 16.764286
## 97 197.6 3.5 5.9 11.7 11.820833
## 99 289.7 42.3 51.2 25.4 23.568421
## 100 135.2 41.7 45.9 17.2 16.764286
## 105 238.2 34.3 5.3 20.7 18.871429
## 108 90.4 0.3 23.2 8.7 10.004762
## 109 13.1 0.4 25.6 5.3 6.726316
## 110 255.4 26.9 5.5 19.8 18.871429
## 114 209.6 20.6 10.7 15.9 14.442857
## 116 75.1 35.0 52.7 12.6 12.520000
## 121 141.3 26.8 46.2 15.5 16.764286
## 130 59.6 12.0 43.1 9.7 10.004762
## 131 0.7 39.6 8.7 1.6 6.726316
## 132 265.2 2.9 43.0 12.7 11.820833
## 134 219.8 33.5 45.1 19.6 18.871429
## 138 273.7 28.9 59.7 20.8 18.871429
## 144 104.6 5.7 34.4 10.4 11.820833
## 153 197.6 23.3 14.2 16.6 18.871429
## 154 171.3 39.7 37.7 19.0 16.764286
## 158 149.8 1.3 24.3 10.1 11.820833
## 165 117.2 14.7 5.4 11.9 14.442857
## 168 206.8 5.2 19.4 12.2 11.820833
## 170 284.3 10.6 6.4 15.0 14.442857
## 175 222.4 3.4 13.1 11.5 11.820833
## 182 218.5 5.4 27.4 12.2 11.820833
## 183 56.2 5.7 29.7 8.7 10.004762
rmse <- rmse(actual = comparaciones$Sales, predicted = comparaciones$predicciones)
rmse
## [1] 1.649371
ggplot(data = comparaciones) +
geom_line(aes(x = 1:nrow(comparaciones), y = Sales), col='blue') +
geom_line(aes(x = 1:nrow(comparaciones), y = predicciones), col='red') +
ggtitle(label="Valores reales vs predichos Adverstising", subtitle = "Arbol de Regresión")
TV <- c(140, 160)
Radio <- c(60, 40)
Newspaper <- c(80, 90)
nuevos <- data.frame(TV, Radio, Newspaper)
nuevos
## TV Radio Newspaper
## 1 140 60 80
## 2 160 40 90
Y.predicciones <- predict(object = modelo_ar, newdata = nuevos)
Y.predicciones
## 1 2
## 16.76429 16.76429
Usando la semilla de 1550 y una partición de datos del 70/30 en datos de entrenamiento y validación, obtuve los siguientes datos:
rpart.plot(modelo_ar)
El punto de inflexión parte de lo invertido en TV, según los datos recolectados, todo caso en el que se invertía menos de 102 millones, resultaba en ventas menores a las 14 unidades, siendo la media de ventas un 14.02, dejando un 35% de los datos en inversiones menores a los 102, y un 65% a las inversiones mayores a 102, esto exclusivamente para las inversiones de TV.
Para igualar el peor dato de ventas, el único dato importante es el de TV, ya que se tendría que invertir menos de 30 en dicho dato, para tener una media de 6.7, con un índice de dispersión de 1.649371 por arriba o por debajo. Este dato es cumplido por un 13% del total de los datos.
Mientras que para alcanzar el mayor dato de ventas, són indispensables 2 campos, el de la TV y el de la Radio. Necesitando un mínimo de 180 y 36 de inversión en TV y Radio respectivamente.
Por último obtuve que el índice de dispersión es de 1.649371.
rmse
## [1] 1.649371