Objetivo

Construir un modelo de árbol de regresión con datos atmosféricos de la cd. de NY con la finalidad de valorar la variable explicativa temperatura y realizar predicciones.

Librerías

library(readr)
library(ggplot2)

library(rpart) # Para crear arboles
library(tree) # Para crear arboles
library(rpart.plot) # Para visualizar arbol

Datos

datos <- read.csv("https://raw.githubusercontent.com/rpizarrog/Industrias-4.0/main/datos/temperatura%20condiciones%20NY.csv")

datos
##       X Ozono Radiacion Vel.viento Temperatura Mes Dia
## 1     1  41.0       190        7.4          67   5   1
## 2     2  36.0       118        8.0          72   5   2
## 3     3  12.0       149       12.6          74   5   3
## 4     4  18.0       313       11.5          62   5   4
## 5     5  31.5       205       14.3          56   5   5
## 6     6  28.0       205       14.9          66   5   6
## 7     7  23.0       299        8.6          65   5   7
## 8     8  19.0        99       13.8          59   5   8
## 9     9   8.0        19       20.1          61   5   9
## 10   10  31.5       194        8.6          69   5  10
## 11   11   7.0       205        6.9          74   5  11
## 12   12  16.0       256        9.7          69   5  12
## 13   13  11.0       290        9.2          66   5  13
## 14   14  14.0       274       10.9          68   5  14
## 15   15  18.0        65       13.2          58   5  15
## 16   16  14.0       334       11.5          64   5  16
## 17   17  34.0       307       12.0          66   5  17
## 18   18   6.0        78       18.4          57   5  18
## 19   19  30.0       322       11.5          68   5  19
## 20   20  11.0        44        9.7          62   5  20
## 21   21   1.0         8        9.7          59   5  21
## 22   22  11.0       320       16.6          73   5  22
## 23   23   4.0        25        9.7          61   5  23
## 24   24  32.0        92       12.0          61   5  24
## 25   25  31.5        66       16.6          57   5  25
## 26   26  31.5       266       14.9          58   5  26
## 27   27  31.5       205        8.0          57   5  27
## 28   28  23.0        13       12.0          67   5  28
## 29   29  45.0       252       14.9          81   5  29
## 30   30 115.0       223        5.7          79   5  30
## 31   31  37.0       279        7.4          76   5  31
## 32   32  31.5       286        8.6          78   6   1
## 33   33  31.5       287        9.7          74   6   2
## 34   34  31.5       242       16.1          67   6   3
## 35   35  31.5       186        9.2          84   6   4
## 36   36  31.5       220        8.6          85   6   5
## 37   37  31.5       264       14.3          79   6   6
## 38   38  29.0       127        9.7          82   6   7
## 39   39  31.5       273        6.9          87   6   8
## 40   40  71.0       291       13.8          90   6   9
## 41   41  39.0       323       11.5          87   6  10
## 42   42  31.5       259       10.9          93   6  11
## 43   43  31.5       250        9.2          92   6  12
## 44   44  23.0       148        8.0          82   6  13
## 45   45  31.5       332       13.8          80   6  14
## 46   46  31.5       322       11.5          79   6  15
## 47   47  21.0       191       14.9          77   6  16
## 48   48  37.0       284       20.7          72   6  17
## 49   49  20.0        37        9.2          65   6  18
## 50   50  12.0       120       11.5          73   6  19
## 51   51  13.0       137       10.3          76   6  20
## 52   52  31.5       150        6.3          77   6  21
## 53   53  31.5        59        1.7          76   6  22
## 54   54  31.5        91        4.6          76   6  23
## 55   55  31.5       250        6.3          76   6  24
## 56   56  31.5       135        8.0          75   6  25
## 57   57  31.5       127        8.0          78   6  26
## 58   58  31.5        47       10.3          73   6  27
## 59   59  31.5        98       11.5          80   6  28
## 60   60  31.5        31       14.9          77   6  29
## 61   61  31.5       138        8.0          83   6  30
## 62   62 135.0       269        4.1          84   7   1
## 63   63  49.0       248        9.2          85   7   2
## 64   64  32.0       236        9.2          81   7   3
## 65   65  31.5       101       10.9          84   7   4
## 66   66  64.0       175        4.6          83   7   5
## 67   67  40.0       314       10.9          83   7   6
## 68   68  77.0       276        5.1          88   7   7
## 69   69  97.0       267        6.3          92   7   8
## 70   70  97.0       272        5.7          92   7   9
## 71   71  85.0       175        7.4          89   7  10
## 72   72  31.5       139        8.6          82   7  11
## 73   73  10.0       264       14.3          73   7  12
## 74   74  27.0       175       14.9          81   7  13
## 75   75  31.5       291       14.9          91   7  14
## 76   76   7.0        48       14.3          80   7  15
## 77   77  48.0       260        6.9          81   7  16
## 78   78  35.0       274       10.3          82   7  17
## 79   79  61.0       285        6.3          84   7  18
## 80   80  79.0       187        5.1          87   7  19
## 81   81  63.0       220       11.5          85   7  20
## 82   82  16.0         7        6.9          74   7  21
## 83   83  31.5       258        9.7          81   7  22
## 84   84  31.5       295       11.5          82   7  23
## 85   85  80.0       294        8.6          86   7  24
## 86   86 108.0       223        8.0          85   7  25
## 87   87  20.0        81        8.6          82   7  26
## 88   88  52.0        82       12.0          86   7  27
## 89   89  82.0       213        7.4          88   7  28
## 90   90  50.0       275        7.4          86   7  29
## 91   91  64.0       253        7.4          83   7  30
## 92   92  59.0       254        9.2          81   7  31
## 93   93  39.0        83        6.9          81   8   1
## 94   94   9.0        24       13.8          81   8   2
## 95   95  16.0        77        7.4          82   8   3
## 96   96  78.0       205        6.9          86   8   4
## 97   97  35.0       205        7.4          85   8   5
## 98   98  66.0       205        4.6          87   8   6
## 99   99 122.0       255        4.0          89   8   7
## 100 100  89.0       229       10.3          90   8   8
## 101 101 110.0       207        8.0          90   8   9
## 102 102  31.5       222        8.6          92   8  10
## 103 103  31.5       137       11.5          86   8  11
## 104 104  44.0       192       11.5          86   8  12
## 105 105  28.0       273       11.5          82   8  13
## 106 106  65.0       157        9.7          80   8  14
## 107 107  31.5        64       11.5          79   8  15
## 108 108  22.0        71       10.3          77   8  16
## 109 109  59.0        51        6.3          79   8  17
## 110 110  23.0       115        7.4          76   8  18
## 111 111  31.0       244       10.9          78   8  19
## 112 112  44.0       190       10.3          78   8  20
## 113 113  21.0       259       15.5          77   8  21
## 114 114   9.0        36       14.3          72   8  22
## 115 115  31.5       255       12.6          75   8  23
## 116 116  45.0       212        9.7          79   8  24
## 117 117 168.0       238        3.4          81   8  25
## 118 118  73.0       215        8.0          86   8  26
## 119 119  31.5       153        5.7          88   8  27
## 120 120  76.0       203        9.7          97   8  28
## 121 121 118.0       225        2.3          94   8  29
## 122 122  84.0       237        6.3          96   8  30
## 123 123  85.0       188        6.3          94   8  31
## 124 124  96.0       167        6.9          91   9   1
## 125 125  78.0       197        5.1          92   9   2
## 126 126  73.0       183        2.8          93   9   3
## 127 127  91.0       189        4.6          93   9   4
## 128 128  47.0        95        7.4          87   9   5
## 129 129  32.0        92       15.5          84   9   6
## 130 130  20.0       252       10.9          80   9   7
## 131 131  23.0       220       10.3          78   9   8
## 132 132  21.0       230       10.9          75   9   9
## 133 133  24.0       259        9.7          73   9  10
## 134 134  44.0       236       14.9          81   9  11
## 135 135  21.0       259       15.5          76   9  12
## 136 136  28.0       238        6.3          77   9  13
## 137 137   9.0        24       10.9          71   9  14
## 138 138  13.0       112       11.5          71   9  15
## 139 139  46.0       237        6.9          78   9  16
## 140 140  18.0       224       13.8          67   9  17
## 141 141  13.0        27       10.3          76   9  18
## 142 142  24.0       238       10.3          68   9  19
## 143 143  16.0       201        8.0          82   9  20
## 144 144  13.0       238       12.6          64   9  21
## 145 145  23.0        14        9.2          71   9  22
## 146 146  36.0       139       10.3          81   9  23
## 147 147   7.0        49       10.3          69   9  24
## 148 148  14.0        20       16.6          63   9  25
## 149 149  30.0       193        6.9          70   9  26
## 150 150  31.5       145       13.2          77   9  27
## 151 151  14.0       191       14.3          75   9  28
## 152 152  18.0       131        8.0          76   9  29
## 153 153  20.0       223       11.5          68   9  30

Se cargan los datos de las condiciones del ambiente de la cd. de NY.

Se describen los datos con summary() y str()

summary(datos)
##        X           Ozono          Radiacion       Vel.viento    
##  Min.   :  1   Min.   :  1.00   Min.   :  7.0   Min.   : 1.700  
##  1st Qu.: 39   1st Qu.: 21.00   1st Qu.:120.0   1st Qu.: 7.400  
##  Median : 77   Median : 31.50   Median :205.0   Median : 9.700  
##  Mean   : 77   Mean   : 39.56   Mean   :186.8   Mean   : 9.958  
##  3rd Qu.:115   3rd Qu.: 46.00   3rd Qu.:256.0   3rd Qu.:11.500  
##  Max.   :153   Max.   :168.00   Max.   :334.0   Max.   :20.700  
##   Temperatura         Mes             Dia      
##  Min.   :56.00   Min.   :5.000   Min.   : 1.0  
##  1st Qu.:72.00   1st Qu.:6.000   1st Qu.: 8.0  
##  Median :79.00   Median :7.000   Median :16.0  
##  Mean   :77.88   Mean   :6.993   Mean   :15.8  
##  3rd Qu.:85.00   3rd Qu.:8.000   3rd Qu.:23.0  
##  Max.   :97.00   Max.   :9.000   Max.   :31.0
str(datos)
## 'data.frame':    153 obs. of  7 variables:
##  $ X          : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ Ozono      : num  41 36 12 18 31.5 28 23 19 8 31.5 ...
##  $ Radiacion  : int  190 118 149 313 205 205 299 99 19 194 ...
##  $ Vel.viento : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
##  $ Temperatura: int  67 72 74 62 56 66 65 59 61 69 ...
##  $ Mes        : int  5 5 5 5 5 5 5 5 5 5 ...
##  $ Dia        : int  1 2 3 4 5 6 7 8 9 10 ...

Construir modelo de árbol de regresión

modelo <- rpart(data=datos, formula = Temperatura ~ Ozono + Radiacion + Vel.viento + Mes)
# modelo

sm <- summary(modelo)
## Call:
## rpart(formula = Temperatura ~ Ozono + Radiacion + Vel.viento + 
##     Mes, data = datos)
##   n= 153 
## 
##           CP nsplit rel error    xerror       xstd
## 1 0.43429937      0 1.0000000 1.0072851 0.10212141
## 2 0.17012108      1 0.5657006 0.5712855 0.05934825
## 3 0.07642747      2 0.3955795 0.4614733 0.04783078
## 4 0.03100834      3 0.3191521 0.3714267 0.04001974
## 5 0.01566744      4 0.2881437 0.3905500 0.04289275
## 6 0.01000000      5 0.2724763 0.3829833 0.04210009
## 
## Variable importance
##        Mes      Ozono Vel.viento  Radiacion 
##         49         32         11          8 
## 
## Node number 1: 153 observations,    complexity param=0.4342994
##   mean=77.88235, MSE=89.00577 
##   left son=2 (31 obs) right son=3 (122 obs)
##   Primary splits:
##       Mes        < 5.5   to the left,  improve=0.4342994, (0 missing)
##       Ozono      < 46.5  to the left,  improve=0.3322210, (0 missing)
##       Vel.viento < 8.9   to the right, improve=0.1498241, (0 missing)
##       Radiacion  < 79.5  to the left,  improve=0.1405230, (0 missing)
##   Surrogate splits:
##       Ozono      < 6.5   to the left,  agree=0.817, adj=0.097, (0 split)
##       Radiacion  < 297   to the right, agree=0.810, adj=0.065, (0 split)
##       Vel.viento < 16.35 to the right, agree=0.810, adj=0.065, (0 split)
## 
## Node number 2: 31 observations,    complexity param=0.03100834
##   mean=65.54839, MSE=45.47347 
##   left son=4 (10 obs) right son=5 (21 obs)
##   Primary splits:
##       Radiacion  < 108.5 to the left,  improve=0.2995493, (0 missing)
##       Ozono      < 31.75 to the left,  improve=0.2438493, (0 missing)
##       Vel.viento < 8.9   to the right, improve=0.1431859, (0 missing)
##   Surrogate splits:
##       Ozono      < 6.5   to the left,  agree=0.774, adj=0.3, (0 split)
##       Vel.viento < 15.75 to the right, agree=0.742, adj=0.2, (0 split)
## 
## Node number 3: 122 observations,    complexity param=0.1701211
##   mean=81.01639, MSE=51.5899 
##   left son=6 (97 obs) right son=7 (25 obs)
##   Primary splits:
##       Ozono      < 65.5  to the left,  improve=0.3680809, (0 missing)
##       Vel.viento < 8.9   to the right, improve=0.1509717, (0 missing)
##       Radiacion  < 50    to the left,  improve=0.1474241, (0 missing)
##       Mes        < 8.5   to the right, improve=0.1071032, (0 missing)
##   Surrogate splits:
##       Vel.viento < 5.4   to the right, agree=0.852, adj=0.28, (0 split)
## 
## Node number 4: 10 observations
##   mean=60.2, MSE=7.96 
## 
## Node number 5: 21 observations
##   mean=68.09524, MSE=43.22902 
## 
## Node number 6: 97 observations,    complexity param=0.07642747
##   mean=78.80412, MSE=37.27091 
##   left son=12 (33 obs) right son=13 (64 obs)
##   Primary splits:
##       Ozono      < 25.5  to the left,  improve=0.2878838, (0 missing)
##       Mes        < 8.5   to the right, improve=0.1787796, (0 missing)
##       Radiacion  < 50    to the left,  improve=0.1426820, (0 missing)
##       Vel.viento < 12.3  to the right, improve=0.0680503, (0 missing)
##   Surrogate splits:
##       Mes        < 8.5   to the right, agree=0.763, adj=0.303, (0 split)
##       Radiacion  < 50    to the left,  agree=0.742, adj=0.242, (0 split)
##       Vel.viento < 13.5  to the right, agree=0.670, adj=0.030, (0 split)
## 
## Node number 7: 25 observations
##   mean=89.6, MSE=14.48 
## 
## Node number 12: 33 observations,    complexity param=0.01566744
##   mean=74.24242, MSE=27.94123 
##   left son=24 (26 obs) right son=25 (7 obs)
##   Primary splits:
##       Vel.viento < 8.9   to the right, improve=0.23139180, (0 missing)
##       Mes        < 8.5   to the right, improve=0.14754940, (0 missing)
##       Radiacion  < 42.5  to the left,  improve=0.07846079, (0 missing)
##       Ozono      < 15    to the left,  improve=0.06158346, (0 missing)
## 
## Node number 13: 64 observations
##   mean=81.15625, MSE=25.81934 
## 
## Node number 24: 26 observations
##   mean=72.92308, MSE=24.22485 
## 
## Node number 25: 7 observations
##   mean=79.14286, MSE=11.26531
sm
## n= 153 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 153 13617.88000 77.88235  
##    2) Mes< 5.5 31  1409.67700 65.54839  
##      4) Radiacion< 108.5 10    79.60000 60.20000 *
##      5) Radiacion>=108.5 21   907.80950 68.09524 *
##    3) Mes>=5.5 122  6293.96700 81.01639  
##      6) Ozono< 65.5 97  3615.27800 78.80412  
##       12) Ozono< 25.5 33   922.06060 74.24242  
##         24) Vel.viento>=8.9 26   629.84620 72.92308 *
##         25) Vel.viento< 8.9 7    78.85714 79.14286 *
##       13) Ozono>=25.5 64  1652.43800 81.15625 *
##      7) Ozono>=65.5 25   362.00000 89.60000 *

Visualización del árbol

Se muestra un árbol de regresión

prp(modelo, main="Arbol de regresión",
    nn = TRUE, # display the node numbers
    fallen.leaves = TRUE,  # put the leaves on the bottom of the page
    shadow.col = "gray",   # shadows under the leaves
    branch.lty = 3,        # draw branches using dotted lines
    branch = .5,           # change angle of branch lines
    faclen = 0,            # faclen = 0 to print full factor names
    trace = 1,             # print the auto calculated cex, xlim, ylim
    split.cex = 1.2,       # make the split text larger than the node text
    split.prefix = "is ",  # put "is " before split text
    split.suffix = "?",    # put "?" after split text
    split.box.col = "lightblue",   # lightgray split boxes (default is white)
    split.border.col = "darkgray", # darkgray border on split boxes
    split.round = 0.5)             # round the split box corners a tad
## cex 1   xlim c(-0.2, 1.2)   ylim c(0, 1)

Predecir resultados

Se predicen temperaturas con nuevos datos de las variables independientes.

ozono <- 60 
radiacion <- 80
velocidad.viento <- 90
mes <- 4

ozono; radiacion; velocidad.viento; mes
## [1] 60
## [1] 80
## [1] 90
## [1] 4

La predicción:

predict(object = modelo, newdata = data.frame(Ozono = ozono, Radiacion = radiacion, Vel.viento = velocidad.viento, Mes = mes))
##    1 
## 60.2

La temperatura es en grados Fareingeith

Cuales serían las condicioens de temperatura para varios nuevos registros:

ozono <- c(60,70,90,100) 
radiacion <- c(80,100, 150, 200) 
velocidad.viento <- c(90, 60, 50, 40)
mes <- c(4, 4, 5, 6)

ozono; radiacion; velocidad.viento; mes
## [1]  60  70  90 100
## [1]  80 100 150 200
## [1] 90 60 50 40
## [1] 4 4 5 6

Se generan las predicciones:

predicciones <- predict(object = modelo, newdata = data.frame(Ozono = ozono, Radiacion = radiacion, Vel.viento = velocidad.viento, Mes = mes))
predicciones
##        1        2        3        4 
## 60.20000 60.20000 68.09524 89.60000

Formar un nuevo conjunto de datos mostrando los valores nuevos y las predicciones:

las.predicciones <- data.frame(Ozono = ozono, Radiacion = radiacion, Vel.viento = velocidad.viento, Mes = mes)

las.predicciones <- cbind(las.predicciones, predicciones)

las.predicciones
##   Ozono Radiacion Vel.viento Mes predicciones
## 1    60        80         90   4     60.20000
## 2    70       100         60   4     60.20000
## 3    90       150         50   5     68.09524
## 4   100       200         40   6     89.60000

FALTA VALORAR LA EFICIENCIA DEL MODELO O EL GRADO DE EXPLICACIÓN QUE TIENEN LAS VARIABES INDEPENDIENTE CON RESPECTO A LA TEMPERATURA.

Pendiente…