Trabajo Practico N° 2

Modulo 6

Relaciones lineales y no lineales

A Exploracion de datos

#install.packages("readxl")
#install.packages("tidyverse")
#install.packages("lmtest")
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

1 Cargamos la base de datos

maiz <- read_csv("MAIZ_FERTILIZANTE.csv")
## Rows: 50 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (2): DOSIS_N, DIAMETRO_TALLO
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
maiz
## # A tibble: 50 × 2
##    DOSIS_N DIAMETRO_TALLO
##      <dbl>          <dbl>
##  1       0           8.37
##  2       0          11.5 
##  3       0          10.4 
##  4       0           7.74
##  5       0           9.13
##  6       0          12.5 
##  7       0           6.36
##  8       0           9.36
##  9       0          11.9 
## 10       0           8.70
## # ℹ 40 more rows

2 Exploracion estructura

glimpse(maiz)
## Rows: 50
## Columns: 2
## $ DOSIS_N        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 50, 50, 50, 50, 50, 50, 5…
## $ DIAMETRO_TALLO <dbl> 8.371554, 11.496018, 10.424468, 7.740558, 9.132100, 12.…

3 Grafico de dispersion

ggplot(maiz, aes(x = DOSIS_N, y = DIAMETRO_TALLO ,color=DOSIS_N )) +
  geom_point() +
  labs(title = "Relación entre Dosis y Diametro tallo") +
  theme_minimal()

B Ajuste de modelos

4 Modelo lineal

# Modelado
MODELO_lineal <- lm(DIAMETRO_TALLO ~DOSIS_N , data = maiz)
summary(MODELO_lineal)
## 
## Call:
## lm(formula = DIAMETRO_TALLO ~ DOSIS_N, data = maiz)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.5481 -1.5960  0.2347  1.4300  4.3449 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 11.908075   0.598242  19.905  < 2e-16 ***
## DOSIS_N      0.041118   0.004885   8.418 5.18e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.442 on 48 degrees of freedom
## Multiple R-squared:  0.5962, Adjusted R-squared:  0.5878 
## F-statistic: 70.86 on 1 and 48 DF,  p-value: 5.182e-11
AIC(MODELO_lineal)
## [1] 235.1473
AIC
## function (object, ..., k = 2) 
## UseMethod("AIC")
## <bytecode: 0x5d81f9921f50>
## <environment: namespace:stats>
BIC(MODELO_lineal)
## [1] 240.8834
BIC
## function (object, ...) 
## UseMethod("BIC")
## <bytecode: 0x5d81f89e2930>
## <environment: namespace:stats>
# Visualización
ggplot(MODELO_lineal, aes(x =DOSIS_N , y = DIAMETRO_TALLO )) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue") +
  labs(title = "Modelo lineal",
       x= "Dosis",
       y= "Diámetro tallo") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

4 Modelo cuadratico

# Modelado
MODELO_cuad <- lm(DIAMETRO_TALLO ~DOSIS_N + I(DOSIS_N^2), data = maiz)
summary(MODELO_cuad)
## 
## Call:
## lm(formula = DIAMETRO_TALLO ~ DOSIS_N + I(DOSIS_N^2), data = maiz)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2288 -1.1872 -0.1897  1.3319  3.3898 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   9.9979513  0.5467262  18.287  < 2e-16 ***
## DOSIS_N       0.1175233  0.0129528   9.073 6.71e-12 ***
## I(DOSIS_N^2) -0.0003820  0.0000621  -6.151 1.59e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.837 on 47 degrees of freedom
## Multiple R-squared:  0.7763, Adjusted R-squared:  0.7668 
## F-statistic: 81.54 on 2 and 47 DF,  p-value: 5.224e-16
AIC(MODELO_cuad)
## [1] 207.6167
AIC
## function (object, ..., k = 2) 
## UseMethod("AIC")
## <bytecode: 0x5d81f9921f50>
## <environment: namespace:stats>
BIC(MODELO_cuad)
## [1] 215.2648
BIC
## function (object, ...) 
## UseMethod("BIC")
## <bytecode: 0x5d81f89e2930>
## <environment: namespace:stats>
# Visualización
ggplot(maiz, aes(x =DOSIS_N, y =DIAMETRO_TALLO )) +
  geom_point() +
  stat_smooth(method = "lm", formula = y ~ x + I(x^2), se = FALSE, color = "red") +
  labs(title = "Modelo cuadrático",
       x = "Dosis",
       y = "Diámetro tallo") +
  theme_minimal()

5 Informacion

Modelo lineal

Ecuación del modelo de Regresión lineal Y = 11,908 + 0,041 X

\[Diametro\ tallo = 11,098 + 0.041 â‹…Dosis\]

Coeficientes estimados ordenada al origen 11,908 y pendiente positiva de la recta 0.041 (creciente)

R cuadrado ajustado 0,587 osea 59%

AIC 235.1473 y BIC 240.8834

Modelo cuadratico

Ecuación del modelo de Regresión lineal Y = 9,997 + 0,117 X - 0,00038 X2

\[Diametro\ tallo = 9.997 + 0.117 \cdot Dosis - 0.00038 \cdot Dosis^2\]

Coeficientes estimados ordenada al origen 9,997 , componente lineal 0.117 y componente cuadratica 0,000382

R cuadrado ajustado 0,766 osea 77%

AIC 207.6167 y BIC 215.2648

C Comparacion de modelos

6 Graficamente podemos ver que el modelo cuadratico se ajusta mejor. Analiticamente el modelo con el menor AIC posee mejor ajuste, en este caso es el cuadratico.

Interpretacion y prediccion

7

ggplot(maiz, aes(DOSIS_N,DIAMETRO_TALLO )) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "blue", size = 0.8 ) +
  geom_smooth(method = "lm", formula = y ~ x + I(x^2), se = FALSE, color = "red", size = 0.8) +
  labs(title = "Modelo lineal vs Modelo Cuadrático",
       x = "Dosis",
       y = "Diámetro tallo") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'

Se obseva en el grafico que la curva roja (cuadratica) se ajsuta mejor a la dispersion de las observaciones.

8

Contenido_nitrogeno_modelo_lineal = 11.098 + 0.041 * 125
Contenido_nitrogeno_modelo_lineal
## [1] 16.223
Contenido_nitrogeno_modelo_cuadratico = 9.997 + 0.117 * 125 - 0.000382 *125 *125
Contenido_nitrogeno_modelo_cuadratico 
## [1] 18.65325

Diametro tallo para modelo lineal 16,223

Diametro tallo para cuadratico lineal 18,653

ggplot(maiz, aes(DOSIS_N,DIAMETRO_TALLO )) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "black", size = 0.8 ) +
  geom_smooth(method = "lm", formula = y ~ x + I(x^2), se = FALSE, color = "black", size = 0.8) +
  geom_point(aes(x = 125 , y = 18.65325), color = "red", size = 3.0)
## Warning in geom_point(aes(x = 125, y = 18.65325), color = "red", size = 3): All aesthetics have length 1, but the data has 50 rows.
## ℹ Please consider using `annotate()` or provide this layer with data containing
##   a single row.
## `geom_smooth()` using formula = 'y ~ x'