#Introducción: Establecido una semilla de 22, con muestra n de 200, función f(x)=2 + 1.5sin(x) + 0.07x^2, con los valores de x generados aleatoreamente entre 0 y 8, con error un error de n distribuido con distribución normal y y=f(x) + error, se realizaran 5 gráficas con distinto número de nodos
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.1.3
## Warning: package 'ggplot2' was built under R version 4.1.3
## Warning: package 'tibble' was built under R version 4.1.3
## Warning: package 'tidyr' was built under R version 4.1.3
## Warning: package 'readr' was built under R version 4.1.3
## Warning: package 'purrr' was built under R version 4.1.3
## Warning: package 'dplyr' was built under R version 4.1.3
## Warning: package 'stringr' was built under R version 4.1.3
## Warning: package 'forcats' was built under R version 4.1.3
## Warning: package 'lubridate' was built under R version 4.1.3
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr 1.1.1 v readr 2.1.4
## v forcats 1.0.0 v stringr 1.5.0
## v ggplot2 3.4.1 v tibble 3.2.1
## v lubridate 1.9.2 v tidyr 1.3.0
## v purrr 1.0.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(splines)
set.seed(22)
n <- 200
f <- function(x) 2 + 1.5*sin(x) + 0.07*x^2
x <- sort(runif(n,0,8))
error <- rnorm(n)
y <- f(x) + error
datos <- tibble(x = x, y = y)
#Gráfica 20 nodos
nod <- 20
nodos <- seq(0,8, length.out =nod+2)
nodos <- nodos[-c(1, length(nodos))]
rspline <- lm(formula = y~bs(x,
knots = nodos,
degree = 3))
rspred <- predict(object = rspline)
datos <- datos%>%mutate(rspred = rspred)
plot1 <- ggplot(datos, aes(x,y)) + geom_point() +
labs(title = "regression splines") + geom_line(mapping = aes(x = x,y = rspred),color="red")
plot1
#Gráfica 6 nodos
nod2 <- 6
nodos2 <- seq(0,8, length.out =nod2+2)
nodos2 <- nodos2[-c(1, length(nodos2))]
rspline2 <- lm(formula = y~bs(x,
knots = nodos2,
degree = 3))
rspred2 <- predict(object = rspline2)
datos2 <- datos%>%mutate(rspred2 = rspred2)
plot2 <- ggplot(datos, aes(x,y)) + geom_point() +
labs(title = "regression splines") + geom_line(mapping = aes(x = x,y = rspred2),color="blue")
plot2
#Gráfica 14 nodos
nod3 <- 14
nodos3 <- seq(0,8, length.out =nod3+2)
nodos3 <- nodos3[-c(1, length(nodos3))]
rspline3 <- lm(formula = y~bs(x,
knots = nodos3,
degree = 3))
rspred3 <- predict(object = rspline3)
datos3 <- datos%>%mutate(rspred2 = rspred2)
plot3 <- ggplot(datos, aes(x,y)) + geom_point() +
labs(title = "regression splines") + geom_line(mapping = aes(x = x,y = rspred3),color="green")
plot3
#Gráfica 30 nodos
nod4 <- 30
nodos4 <- seq(0,8, length.out =nod4+2)
nodos4 <- nodos4[-c(1, length(nodos4))]
rspline4 <- lm(formula = y~bs(x,
knots = nodos4,
degree = 3))
rspred4 <- predict(object = rspline4)
datos4 <- datos%>%mutate(rspred4 = rspred4)
plot4 <- ggplot(datos, aes(x,y)) + geom_point() +
labs(title = "regression splines") + geom_line(mapping = aes(x = x,y = rspred4),color="red")
plot4
#Gráfica 12 nodos
nod5 <- 12
nodos5 <- seq(0,8, length.out =nod5+2)
nodos5 <- nodos5[-c(1, length(nodos5))]
rspline5 <- lm(formula = y~bs(x,
knots = nodos5,
degree = 3))
rspred5 <- predict(object = rspline5)
datos5 <- datos%>%mutate(rspred5 = rspred5)
plot5 <- ggplot(datos, aes(x,y)) + geom_point() +
labs(title = "regression splines") + geom_line(mapping = aes(x = x,y = rspred5),color="yellow")
plot5
#CONCLUSION
A medida que aumentan el número de nodos, la curva ajustada se ajusta más a los puntos de datos individuales lo que puede producir un sobreajuste del modelo