library('knitr')
library('ggplot2')
library('gplots')
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library('forecast')
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
  1. Considerar los datos del archivo acath.sav, el cual se lee con la función load. Se trata de una muestra de 3504 pacientes que acudieron al centro con dolor en el pecho, para los que se recogieron diversas variables cuyo nombre en la base de datos y descripción es la siguiente: • sigdz : variable binaria que toma valores 1 y 0, indicando si el paciente presenta estrechamiento de alguna de las arterias coronarias de al menos un 75 % (sigdz = 1) o no (sigdz = 0). • tvdlm : lo mismo que la anterior pero corresponde a tres arterias con estrechamiento. • sex: variable categórica que indica el género del paciente: 0 corresponde al género masculino, 1 al género femenino. • age: variable continua que representa la edad en años del individuo. choleste: variable continua que expresa los Mg/dl de colesterol • duración: variable continua que recoge la duración, en días, de los síntomas de la enfermedad coronaria.
load('acath.sav')
head (acath)
##   sex age cad.dur choleste sigdz tvdlm
## 1   0  73     132      268     1     1
## 2   0  68      85      120     1     1
## 3   0  54      45       NA     1     0
## 4   1  58      86      245     0     0
## 5   1  56       7      269     0     0
## 6   0  64       0       NA     1     0
dim (acath)
## [1] 3504    6
attach (acath)
names (acath)
## [1] "sex"      "age"      "cad.dur"  "choleste" "sigdz"    "tvdlm"
str(acath) 
## 'data.frame':    3504 obs. of  6 variables:
##  $ sex     : 'labelled' int  0 0 0 1 1 0 0 0 0 0 ...
##  $ age     : 'labelled' int  73 68 54 58 56 64 65 41 68 52 ...
##   ..- attr(*, "label")= chr "Age"
##   ..- attr(*, "units")= chr "Year"
##  $ cad.dur : 'labelled' int  132 85 45 86 7 0 76 15 30 1 ...
##   ..- attr(*, "label")= chr "Duration of Symptoms of Coronary Artery Disease"
##  $ choleste: 'labelled' int  268 120 NA 245 269 NA NA 247 NA NA ...
##   ..- attr(*, "label")= chr "Cholesterol"
##   ..- attr(*, "units")= chr "mg %"
##  $ sigdz   : 'labelled' int  1 1 1 0 0 1 1 1 1 1 ...
##   ..- attr(*, "label")= chr "Significant Coronary Disease by Cardiac Cath"
##  $ tvdlm   : 'labelled' int  1 1 0 0 0 0 1 0 1 0 ...
##   ..- attr(*, "label")= chr "Three Vessel or Left Main Disease by Cardiac Cath"
##  - attr(*, "comment")= chr "Data from the Duke University Cardiovascular Disease Databank"
anyNA(acath)
## [1] TRUE
sum(is.na(acath))
## [1] 1249
apply(is.na(acath), 2, sum)
##      sex      age  cad.dur choleste    sigdz    tvdlm 
##        0        0        0     1246        0        3
  1. Realizar un modelo de regresión logística simple, siendo la variable explicada sigdz, considerando como única variable explicativa la variable colesterol. Escribir el modelo de regresión logística y calcular la probabilidad de que una persona tenga estrechamiento arterial si el colesterol es 199.
logit1<- glm(sigdz ~ choleste, data = acath, family = "binomial")
summary(logit1)
## 
## Call:
## glm(formula = sigdz ~ choleste, family = "binomial", data = acath)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2140  -1.3669   0.8247   0.9406   1.4279  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -0.7525280  0.2186516  -3.442 0.000578 ***
## choleste     0.0062268  0.0009525   6.538 6.25e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2895.3  on 2257  degrees of freedom
## Residual deviance: 2849.7  on 2256  degrees of freedom
##   (1246 observations deleted due to missingness)
## AIC: 2853.7
## 
## Number of Fisher Scoring iterations: 4

La fórmula de este modelo para calcular la variable dependiente es:

Pr(sigdz =1) = e^(-0.7525280 + 0.0062268 choleste)/(1 + e^(-0.7525280 + 0.0062268 choleste))

newdata = data.frame(choleste=c(199))
prediccion <- predict(logit1, newdata, type = 'response')
prediccion
##         1 
## 0.6193053
x = seq(1, 11, 0.1)
choleste = 199
y =  exp(-0.7525280 + 0.0062268 *199)/(1 + exp(-0.7525280 + 0.0062268 *199))

qplot(x , y , geom = "line", xlab = "sigdz",ylab = "Probabilidad de estrechamiento arterial", main = "Probabilidad de estrechamiento arterial con choleste = 199")

  1. Realizar un modelo de regresión logística considerando todas las variables no categóricas.
logit2<- glm(sigdz ~ choleste + age + cad.dur, data = acath, family = "binomial")
summary(logit2)
## 
## Call:
## glm(formula = sigdz ~ choleste + age + cad.dur, family = "binomial", 
##     data = acath)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1371  -1.2503   0.7331   0.9109   1.5662  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.385005   0.347923  -9.729  < 2e-16 ***
## choleste     0.006394   0.000976   6.551 5.72e-11 ***
## age          0.052507   0.005317   9.876  < 2e-16 ***
## cad.dur     -0.001007   0.000916  -1.099    0.272    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2895.3  on 2257  degrees of freedom
## Residual deviance: 2741.0  on 2254  degrees of freedom
##   (1246 observations deleted due to missingness)
## AIC: 2749
## 
## Number of Fisher Scoring iterations: 4

La fórmula de este modelo para calcular la variable dependiente es:

Pr(sigdz=1) = e ^ (-3.385005 + 0.006394 * choleste + 0.052507 * age - 0.001007 * cad. dur ) / (1 + e ^ (-3.385005 + 0.006394 * choleste + 0.052507 * age - 0.001007 * cad. dur ))

  1. Realizar el modelo anterior diferenciando mujeres y varones, o sea tomando la variable sex como factor.
acath$sex <- factor(acath$sex)
logit3<- glm(sigdz ~ choleste + age + cad.dur + sex, data = acath, family = "binomial")
summary(logit3)
## 
## Call:
## glm(formula = sigdz ~ choleste + age + cad.dur + sex, family = "binomial", 
##     data = acath)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5401  -0.8704   0.5282   0.7663   2.4125  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -4.286432   0.386800 -11.082   <2e-16 ***
## choleste     0.009123   0.001079   8.453   <2e-16 ***
## age          0.073032   0.006146  11.883   <2e-16 ***
## cad.dur     -0.001695   0.001014  -1.672   0.0945 .  
## sex1        -2.101717   0.113559 -18.508   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2895.3  on 2257  degrees of freedom
## Residual deviance: 2344.6  on 2253  degrees of freedom
##   (1246 observations deleted due to missingness)
## AIC: 2354.6
## 
## Number of Fisher Scoring iterations: 4

El modelo para sexo masculino es:

Pr(sigdz =1) = e^(-4.2864325 + 0.009123 choleste + 0,073032 age - 0,001695 cad.dur)/(1 + e^(-4.2864325 + 0.009123 choleste + 0,073032 age - 0,001695 cad.dur))

El modelo para sexo femenino es:

Pr(sigdz =1) = e^(-4.2864325 + 0.009123 choleste + 0,073032 age - 0,001695 cad.dur - 2.101717)/(1 + e^(-4.2864325 + 0.009123 choleste + 0,073032 age - 0,001695 cad.dur - 2.101717))

  1. Comentar los resultados de los modelos anteriores. Explicar los resultados que devuelve el comando summary.
  1. Resolver el siguiente problema usando un modelo de ANOVA de factor fijo. El departamento de psicología de la Universidad de Tres de Febrero ha realizado un estudio sobre la frecuencia con que los alumnos asisten a clases teóricas no obligatorias, pertenecientes a tres localidades del partido, tomadas durante un cuatrimestre. Se han utilizado los datos que tomaron los profesores a algunos estudiantes y queremos conocer si existen diferencias estadísticamente significativas entre la cantidad estudiantes universitarios que asisten a clase dependiendo de la localidad a la que pertenecen. Los datos son los siguientes: • Caseros: 11 14 7 15 11 13 11 16 10 15 18 12 9 9 10 10 15 10 14 10 10 12 14 12 15 7 13 6 10 15 20 10 13 10 6 14 8 10 8 11 • Santos Lugares: 13 10 12 7 5 10 10 16 9 7 7 2 6 9 9 8 8 10 3 6 5 2 9 3 4 5 10 8 5 9 10 8 13 10 0 2 1 1 0 4 • Pablo Podestá: 6 7 3 5 9 6 1 6 0 2 5 6 11 6 7 0 5 7 5 4 7 4 2 8 9 6 1 4 7 7 8 9 7 5 1 6 9 4 76
  1. Introducir los datos en R creando las 2 variables: una que incluya las frecuencias en la asistencia a clase y otra que sea un factor, que proporcione información sobre la localidad a la que pertenecen cada uno de los estudiantes.
Asistencia <- c(11, 14, 7, 15, 11, 13, 11, 16,  10, 15, 18, 12, 9, 9, 10, 10, 15, 10, 14, 10, 10, 12, 14, 12, 15, 7, 13, 6, 10, 15, 20, 10, 13, 10, 6, 14, 8, 10, 8, 11, 13, 10, 12, 7, 5, 10, 10, 16, 9, 7, 7, 2, 6, 9, 9, 8, 8, 10, 3, 6, 5, 2, 9, 3, 4, 5, 10, 8, 5, 9, 10, 8, 13, 10, 0, 2, 1, 1, 0, 4, 6, 7, 3, 5, 9, 6, 1, 6, 0, 2, 5, 6, 11, 6, 7, 0, 5, 7, 5, 4, 7, 4, 2, 8, 9, 6, 1, 4, 7, 7, 8, 9, 7, 5, 1, 6, 9, 4, 7, 6)

Localidad <- c (rep (1,40), rep(2,40), rep (3,40))
  1. Introducir los datos en R creando las 2 variables: una que incluya las frecuencias en la asistencia a clase y otra que sea un factor, que proporcione información sobre la localidad a la que pertenecen cada uno de los estudiantes.
datos<-data.frame (Localidad = factor(Localidad,labels=
                                c('Caseros','Santos Lugares','Pablo Podesta')), Asistencia)
head (datos)
##   Localidad Asistencia
## 1   Caseros         11
## 2   Caseros         14
## 3   Caseros          7
## 4   Caseros         15
## 5   Caseros         11
## 6   Caseros         13
dim (datos)
## [1] 120   2
names (datos)
## [1] "Localidad"  "Asistencia"
str(datos) 
## 'data.frame':    120 obs. of  2 variables:
##  $ Localidad : Factor w/ 3 levels "Caseros","Santos Lugares",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Asistencia: num  11 14 7 15 11 13 11 16 10 15 ...
anyNA(datos)
## [1] FALSE
  1. Analizar los datos de la muestra mediante gráficos y descriptivos. ¿Observa diferencias en los valores promedios por grupos?
plot(datos, main = 'Asistencia por localidad',  cex.main = 1, cex.sub= 0.5 , ylab = 'Variabilidad', xlab = 'Localidades', cex.lab=0.8, col = c("orange2", "blue", "green3"))

plotmeans(Asistencia~Localidad, col = "green3", main = 'Asistencia por localidad' )

  1. Realizar un test ANOVA para comparar las medias de las 3 poblaciones. ¿Cuáles serían las hipótesis nula y alternativa?
res<-aov(Asistencia~ Localidad, data=datos)
summary(res)
##              Df Sum Sq Mean Sq F value   Pr(>F)    
## Localidad     2  826.9   413.4   38.98 1.07e-13 ***
## Residuals   117 1241.1    10.6                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
  1. Describir los resultados obtenidos indicando el valor del p-valor.

  2. Extraer conclusiones en el contexto del problema.

tapply(datos$Asistencia,datos$Localidad,mean)
##        Caseros Santos Lugares  Pablo Podesta 
##          11.60           6.90           5.45
desviaciones <- tapply(res$residuals, Localidad, sd)
desviaciones
##        1        2        3 
## 3.160656 3.848410 2.650109
max(desviaciones)/min(desviaciones) 
## [1] 1.45217
  1. Si se han obtenido diferencias significativas entre los grupos, determinar cuales son esas diferencias utilizando el test HSD de Tukey. Representar gra ́ficamente las dife- rencias encontradas e intrepretar los resultados obtenidos.

Test de Tukey

print(TukeyHSD(res))
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = Asistencia ~ Localidad, data = datos)
## 
## $Localidad
##                               diff       lwr        upr     p adj
## Santos Lugares-Caseros       -4.70 -6.428861 -2.9711395 0.0000000
## Pablo Podesta-Caseros        -6.15 -7.878861 -4.4211395 0.0000000
## Pablo Podesta-Santos Lugares -1.45 -3.178861  0.2788605 0.1189269
plot(TukeyHSD(res),cex.lab=0.8)

  1. A partir de los residuos obtenidos analizar la validez del modelo. ¿Es valido el modelo de ANOVA para estos datos?
plot(res$residuals, main = 'Independencia de resiudos', col = "blue", xlab = 'Índice',ylab = "Residuos")

dfr <- data.frame(Residuos = res$residuals)

ggplot(dfr, aes(x = Residuos)) + 
  geom_histogram(aes(y =..density..),
                 breaks = seq(-10, 10, by = 1), 
                 colour = "blue", 
                 fill = "skyblue") +  geom_density(size= 1.2,colour = "red") +
  stat_function(fun = dnorm,  args = list(mean = mean(res$residuals),
                                          sd = sd(res$residuals)),colour = "green3", size = 1.2)

qqnorm(res$residuals, main= "QQ plot de residuos")
qqline(res$residuals, col="blue", lwd = 2)

  1. El archivo de datos de R ’nottem’ contiene las temperaturas promedio por mes en la ciudad de Nottinham durante un período de tiempo.
data ("nottem")
nottem
##       Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec
## 1920 40.6 40.8 44.4 46.7 54.1 58.5 57.7 56.4 54.3 50.5 42.9 39.8
## 1921 44.2 39.8 45.1 47.0 54.1 58.7 66.3 59.9 57.0 54.2 39.7 42.8
## 1922 37.5 38.7 39.5 42.1 55.7 57.8 56.8 54.3 54.3 47.1 41.8 41.7
## 1923 41.8 40.1 42.9 45.8 49.2 52.7 64.2 59.6 54.4 49.2 36.3 37.6
## 1924 39.3 37.5 38.3 45.5 53.2 57.7 60.8 58.2 56.4 49.8 44.4 43.6
## 1925 40.0 40.5 40.8 45.1 53.8 59.4 63.5 61.0 53.0 50.0 38.1 36.3
## 1926 39.2 43.4 43.4 48.9 50.6 56.8 62.5 62.0 57.5 46.7 41.6 39.8
## 1927 39.4 38.5 45.3 47.1 51.7 55.0 60.4 60.5 54.7 50.3 42.3 35.2
## 1928 40.8 41.1 42.8 47.3 50.9 56.4 62.2 60.5 55.4 50.2 43.0 37.3
## 1929 34.8 31.3 41.0 43.9 53.1 56.9 62.5 60.3 59.8 49.2 42.9 41.9
## 1930 41.6 37.1 41.2 46.9 51.2 60.4 60.1 61.6 57.0 50.9 43.0 38.8
## 1931 37.1 38.4 38.4 46.5 53.5 58.4 60.6 58.2 53.8 46.6 45.5 40.6
## 1932 42.4 38.4 40.3 44.6 50.9 57.0 62.1 63.5 56.3 47.3 43.6 41.8
## 1933 36.2 39.3 44.5 48.7 54.2 60.8 65.5 64.9 60.1 50.2 42.1 35.8
## 1934 39.4 38.2 40.4 46.9 53.4 59.6 66.5 60.4 59.2 51.2 42.8 45.8
## 1935 40.0 42.6 43.5 47.1 50.0 60.5 64.6 64.0 56.8 48.6 44.2 36.4
## 1936 37.3 35.0 44.0 43.9 52.7 58.6 60.0 61.1 58.1 49.6 41.6 41.3
## 1937 40.8 41.0 38.4 47.4 54.1 58.6 61.4 61.8 56.3 50.9 41.4 37.1
## 1938 42.1 41.2 47.3 46.6 52.4 59.0 59.6 60.4 57.0 50.7 47.8 39.2
## 1939 39.4 40.9 42.4 47.8 52.4 58.0 60.7 61.8 58.2 46.7 46.6 37.8

Gráfico la temperatura en función del tiempo

Grafico la variación de temperatura en teniendo en cuenta los registros mensuales desde el año 1920 a 1940

plot(nottem, type="p", col = "red", main= "Temperatura en función del tiempo", xlab = 'Años', ylab = 'Temperatura') 
lines(nottem, type= "l", col = "green3" ) 
abline(reg=lm(nottem~time(nottem)), col = "blue")

seasonplot(nottem, col=rainbow(24), main = 'Variación mensual temperatura por año',  year.labels=TRUE, xlab = 'Meses', ylab = 'Temperatura')

print(nottem)
##       Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec
## 1920 40.6 40.8 44.4 46.7 54.1 58.5 57.7 56.4 54.3 50.5 42.9 39.8
## 1921 44.2 39.8 45.1 47.0 54.1 58.7 66.3 59.9 57.0 54.2 39.7 42.8
## 1922 37.5 38.7 39.5 42.1 55.7 57.8 56.8 54.3 54.3 47.1 41.8 41.7
## 1923 41.8 40.1 42.9 45.8 49.2 52.7 64.2 59.6 54.4 49.2 36.3 37.6
## 1924 39.3 37.5 38.3 45.5 53.2 57.7 60.8 58.2 56.4 49.8 44.4 43.6
## 1925 40.0 40.5 40.8 45.1 53.8 59.4 63.5 61.0 53.0 50.0 38.1 36.3
## 1926 39.2 43.4 43.4 48.9 50.6 56.8 62.5 62.0 57.5 46.7 41.6 39.8
## 1927 39.4 38.5 45.3 47.1 51.7 55.0 60.4 60.5 54.7 50.3 42.3 35.2
## 1928 40.8 41.1 42.8 47.3 50.9 56.4 62.2 60.5 55.4 50.2 43.0 37.3
## 1929 34.8 31.3 41.0 43.9 53.1 56.9 62.5 60.3 59.8 49.2 42.9 41.9
## 1930 41.6 37.1 41.2 46.9 51.2 60.4 60.1 61.6 57.0 50.9 43.0 38.8
## 1931 37.1 38.4 38.4 46.5 53.5 58.4 60.6 58.2 53.8 46.6 45.5 40.6
## 1932 42.4 38.4 40.3 44.6 50.9 57.0 62.1 63.5 56.3 47.3 43.6 41.8
## 1933 36.2 39.3 44.5 48.7 54.2 60.8 65.5 64.9 60.1 50.2 42.1 35.8
## 1934 39.4 38.2 40.4 46.9 53.4 59.6 66.5 60.4 59.2 51.2 42.8 45.8
## 1935 40.0 42.6 43.5 47.1 50.0 60.5 64.6 64.0 56.8 48.6 44.2 36.4
## 1936 37.3 35.0 44.0 43.9 52.7 58.6 60.0 61.1 58.1 49.6 41.6 41.3
## 1937 40.8 41.0 38.4 47.4 54.1 58.6 61.4 61.8 56.3 50.9 41.4 37.1
## 1938 42.1 41.2 47.3 46.6 52.4 59.0 59.6 60.4 57.0 50.7 47.8 39.2
## 1939 39.4 40.9 42.4 47.8 52.4 58.0 60.7 61.8 58.2 46.7 46.6 37.8
boxplot(nottem~cycle(nottem),col ="violet", main = 'Estacionalidad mensual temperaturas', xlab = 'Meses', ylab = 'Temperatura')

Para definir la serie correctamente escribimos:

Se indica la primer fecha del registro, que en este caso es el mes de enero de 1920 y la frecuencia es la cantidad de datos en este caso es mensual que ocurre en año. Por lo tanto en este caso es 12

nottem = ts(nottem, start = c(1920,1), frequency = 12)
nottem
##       Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec
## 1920 40.6 40.8 44.4 46.7 54.1 58.5 57.7 56.4 54.3 50.5 42.9 39.8
## 1921 44.2 39.8 45.1 47.0 54.1 58.7 66.3 59.9 57.0 54.2 39.7 42.8
## 1922 37.5 38.7 39.5 42.1 55.7 57.8 56.8 54.3 54.3 47.1 41.8 41.7
## 1923 41.8 40.1 42.9 45.8 49.2 52.7 64.2 59.6 54.4 49.2 36.3 37.6
## 1924 39.3 37.5 38.3 45.5 53.2 57.7 60.8 58.2 56.4 49.8 44.4 43.6
## 1925 40.0 40.5 40.8 45.1 53.8 59.4 63.5 61.0 53.0 50.0 38.1 36.3
## 1926 39.2 43.4 43.4 48.9 50.6 56.8 62.5 62.0 57.5 46.7 41.6 39.8
## 1927 39.4 38.5 45.3 47.1 51.7 55.0 60.4 60.5 54.7 50.3 42.3 35.2
## 1928 40.8 41.1 42.8 47.3 50.9 56.4 62.2 60.5 55.4 50.2 43.0 37.3
## 1929 34.8 31.3 41.0 43.9 53.1 56.9 62.5 60.3 59.8 49.2 42.9 41.9
## 1930 41.6 37.1 41.2 46.9 51.2 60.4 60.1 61.6 57.0 50.9 43.0 38.8
## 1931 37.1 38.4 38.4 46.5 53.5 58.4 60.6 58.2 53.8 46.6 45.5 40.6
## 1932 42.4 38.4 40.3 44.6 50.9 57.0 62.1 63.5 56.3 47.3 43.6 41.8
## 1933 36.2 39.3 44.5 48.7 54.2 60.8 65.5 64.9 60.1 50.2 42.1 35.8
## 1934 39.4 38.2 40.4 46.9 53.4 59.6 66.5 60.4 59.2 51.2 42.8 45.8
## 1935 40.0 42.6 43.5 47.1 50.0 60.5 64.6 64.0 56.8 48.6 44.2 36.4
## 1936 37.3 35.0 44.0 43.9 52.7 58.6 60.0 61.1 58.1 49.6 41.6 41.3
## 1937 40.8 41.0 38.4 47.4 54.1 58.6 61.4 61.8 56.3 50.9 41.4 37.1
## 1938 42.1 41.2 47.3 46.6 52.4 59.0 59.6 60.4 57.0 50.7 47.8 39.2
## 1939 39.4 40.9 42.4 47.8 52.4 58.0 60.7 61.8 58.2 46.7 46.6 37.8

Las funciones start y end nos indican comienzo y final de la serie de tiempo

start(nottem)
## [1] 1920    1
end(nottem)
## [1] 1939   12
frequency(nottem)
## [1] 12
cycle(nottem)
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1920   1   2   3   4   5   6   7   8   9  10  11  12
## 1921   1   2   3   4   5   6   7   8   9  10  11  12
## 1922   1   2   3   4   5   6   7   8   9  10  11  12
## 1923   1   2   3   4   5   6   7   8   9  10  11  12
## 1924   1   2   3   4   5   6   7   8   9  10  11  12
## 1925   1   2   3   4   5   6   7   8   9  10  11  12
## 1926   1   2   3   4   5   6   7   8   9  10  11  12
## 1927   1   2   3   4   5   6   7   8   9  10  11  12
## 1928   1   2   3   4   5   6   7   8   9  10  11  12
## 1929   1   2   3   4   5   6   7   8   9  10  11  12
## 1930   1   2   3   4   5   6   7   8   9  10  11  12
## 1931   1   2   3   4   5   6   7   8   9  10  11  12
## 1932   1   2   3   4   5   6   7   8   9  10  11  12
## 1933   1   2   3   4   5   6   7   8   9  10  11  12
## 1934   1   2   3   4   5   6   7   8   9  10  11  12
## 1935   1   2   3   4   5   6   7   8   9  10  11  12
## 1936   1   2   3   4   5   6   7   8   9  10  11  12
## 1937   1   2   3   4   5   6   7   8   9  10  11  12
## 1938   1   2   3   4   5   6   7   8   9  10  11  12
## 1939   1   2   3   4   5   6   7   8   9  10  11  12
summary(nottem)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   31.30   41.55   47.35   49.04   57.00   66.50
nott.ts.desc = decompose(nottem)
plot(nott.ts.desc, xlab='Años')

Serie observada

nott.ts.desc$x
##       Jan  Feb  Mar  Apr  May  Jun  Jul  Aug  Sep  Oct  Nov  Dec
## 1920 40.6 40.8 44.4 46.7 54.1 58.5 57.7 56.4 54.3 50.5 42.9 39.8
## 1921 44.2 39.8 45.1 47.0 54.1 58.7 66.3 59.9 57.0 54.2 39.7 42.8
## 1922 37.5 38.7 39.5 42.1 55.7 57.8 56.8 54.3 54.3 47.1 41.8 41.7
## 1923 41.8 40.1 42.9 45.8 49.2 52.7 64.2 59.6 54.4 49.2 36.3 37.6
## 1924 39.3 37.5 38.3 45.5 53.2 57.7 60.8 58.2 56.4 49.8 44.4 43.6
## 1925 40.0 40.5 40.8 45.1 53.8 59.4 63.5 61.0 53.0 50.0 38.1 36.3
## 1926 39.2 43.4 43.4 48.9 50.6 56.8 62.5 62.0 57.5 46.7 41.6 39.8
## 1927 39.4 38.5 45.3 47.1 51.7 55.0 60.4 60.5 54.7 50.3 42.3 35.2
## 1928 40.8 41.1 42.8 47.3 50.9 56.4 62.2 60.5 55.4 50.2 43.0 37.3
## 1929 34.8 31.3 41.0 43.9 53.1 56.9 62.5 60.3 59.8 49.2 42.9 41.9
## 1930 41.6 37.1 41.2 46.9 51.2 60.4 60.1 61.6 57.0 50.9 43.0 38.8
## 1931 37.1 38.4 38.4 46.5 53.5 58.4 60.6 58.2 53.8 46.6 45.5 40.6
## 1932 42.4 38.4 40.3 44.6 50.9 57.0 62.1 63.5 56.3 47.3 43.6 41.8
## 1933 36.2 39.3 44.5 48.7 54.2 60.8 65.5 64.9 60.1 50.2 42.1 35.8
## 1934 39.4 38.2 40.4 46.9 53.4 59.6 66.5 60.4 59.2 51.2 42.8 45.8
## 1935 40.0 42.6 43.5 47.1 50.0 60.5 64.6 64.0 56.8 48.6 44.2 36.4
## 1936 37.3 35.0 44.0 43.9 52.7 58.6 60.0 61.1 58.1 49.6 41.6 41.3
## 1937 40.8 41.0 38.4 47.4 54.1 58.6 61.4 61.8 56.3 50.9 41.4 37.1
## 1938 42.1 41.2 47.3 46.6 52.4 59.0 59.6 60.4 57.0 50.7 47.8 39.2
## 1939 39.4 40.9 42.4 47.8 52.4 58.0 60.7 61.8 58.2 46.7 46.6 37.8

Componente estacional

nott.ts.desc$seasonal
##             Jan        Feb        Mar        Apr        May        Jun
## 1920 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1921 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1922 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1923 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1924 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1925 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1926 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1927 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1928 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1929 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1930 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1931 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1932 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1933 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1934 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1935 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1936 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1937 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1938 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
## 1939 -9.3393640 -9.8998904 -6.9466009 -2.7573465  3.4533991  8.9865132
##             Jul        Aug        Sep        Oct        Nov        Dec
## 1920 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1921 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1922 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1923 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1924 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1925 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1926 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1927 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1928 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1929 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1930 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1931 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1932 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1933 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1934 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1935 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1936 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1937 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1938 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
## 1939 12.9672149 11.4591009  7.4001096  0.6547149 -6.6176535 -9.3601974
plot(nott.ts.desc$seasonal)

Componente tendencia

Componente Tendencia

nott.ts.desc$trend
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1920       NA       NA       NA       NA       NA       NA 49.04167 49.15000
## 1921 49.56667 50.07083 50.32917 50.59583 50.61667 50.60833 50.45417 50.12917
## 1922 48.87083 48.24167 47.89583 47.48750 47.27917 47.32083 47.45417 47.69167
## 1923 47.68333 48.21250 48.43750 48.52917 48.38750 47.98750 47.71250 47.50000
## 1924 47.59167 47.39167 47.41667 47.52500 47.88750 48.47500 48.75417 48.90833
## 1925 49.51250 49.74167 49.71667 49.58333 49.32917 48.76250 48.42500 48.51250
## 1926 48.64167 48.64167 48.87083 48.92083 48.92917 49.22083 49.37500 49.17917
## 1927 48.83750 48.68750 48.50833 48.54167 48.72083 48.55833 48.42500 48.59167
## 1928 48.63333 48.70833 48.73750 48.76250 48.78750 48.90417 48.74167 48.08333
## 1929 47.47917 47.48333 47.65833 47.80000 47.75417 47.94167 48.41667 48.94167
## 1930 49.48333 49.43750 49.37500 49.32917 49.40417 49.27917 48.96250 48.82917
## 1931 48.66250 48.54167 48.26667 47.95417 47.87917 48.05833 48.35417 48.57500
## 1932 48.30417 48.58750 48.91250 49.04583 48.99583 48.96667 48.75833 48.53750
## 1933 50.00000 50.20000 50.41667 50.69583 50.75417 50.44167 50.32500 50.41250
## 1934 49.75000 49.60417 49.37917 49.38333 49.45417 49.90000 50.34167 50.55000
## 1935 50.72083 50.79167 50.84167 50.63333 50.58333 50.25000 49.74583 49.31667
## 1936 48.65000 48.33750 48.27083 48.36667 48.30000 48.39583 48.74583 49.14167
## 1937 49.39167 49.47917 49.43333 49.41250 49.45833 49.27500 49.15417 49.21667
## 1938 49.71667 49.58333 49.55417 49.57500 49.83333 50.18750 50.16250 50.03750
## 1939 49.67917 49.78333 49.89167 49.77500 49.55833 49.45000       NA       NA
##           Sep      Oct      Nov      Dec
## 1920 49.13750 49.17917 49.19167 49.20000
## 1921 49.85000 49.41250 49.27500 49.30417
## 1922 47.89167 48.18750 48.07083 47.58750
## 1923 47.20000 46.99583 47.15000 47.52500
## 1924 49.13750 49.22500 49.23333 49.32917
## 1925 48.74167 49.00833 49.03333 48.79167
## 1926 49.05417 49.05833 49.02917 49.00000
## 1927 48.59583 48.50000 48.47500 48.50000
## 1928 47.60000 47.38333 47.33333 47.44583
## 1929 49.19167 49.32500 49.37083 49.43750
## 1930 48.76667 48.63333 48.71250 48.72500
## 1931 48.65417 48.65417 48.46667 48.30000
## 1932 48.75000 49.09583 49.40417 49.70000
## 1933 50.19583 49.95000 49.84167 49.75833
## 1934 50.86250 51.00000 50.86667 50.76250
## 1935 49.02083 48.90833 48.88750 48.92083
## 1936 49.15833 49.07083 49.27500 49.33333
## 1937 49.59583 49.93333 49.82917 49.77500
## 1938 49.82083 49.66667 49.71667 49.67500
## 1939       NA       NA       NA       NA
plot(nott.ts.desc$trend)

Componente random

nott.ts.desc$random
##               Jan          Feb          Mar          Apr          May
## 1920           NA           NA           NA           NA           NA
## 1921  3.972697368 -0.370942982  1.717434211 -0.838486842  0.029934211
## 1922 -2.031469298  0.358223684 -1.449232456 -2.630153509  4.967434211
## 1923  3.456030702  1.787390351  1.409100877  0.028179825 -2.640899123
## 1924  1.047697368  0.008223684 -2.170065789  0.732346491  1.859100877
## 1925 -0.173135965  0.658223684 -1.970065789 -1.725986842  1.017434211
## 1926 -0.102302632  4.658223684  1.475767544  2.736513158 -1.782565789
## 1927 -0.098135965 -0.287609649  3.738267544  1.315679825 -0.474232456
## 1928  1.506030702  2.291557018  1.009100877  1.294846491 -1.340899123
## 1929 -3.339802632 -6.283442982  0.288267544 -1.142653509  1.892434211
## 1930  1.456030702 -2.437609649 -1.228399123  0.328179825 -1.657565789
## 1931 -2.223135965 -0.241776316 -2.920065789  1.303179825  2.167434211
## 1932  3.435197368 -0.287609649 -1.665899123 -1.688486842 -1.549232456
## 1933 -4.460635965 -1.000109649  1.029934211  0.761513158 -0.007565789
## 1934 -1.010635965 -1.504276316 -2.032565789  0.274013158  0.492434211
## 1935 -1.381469298  1.708223684 -0.395065789 -0.775986842 -4.036732456
## 1936 -2.010635965 -3.437609649  2.675767544 -1.709320175  0.946600877
## 1937  0.747697368  1.420723684 -4.086732456  0.744846491  1.188267544
## 1938  1.722697368  1.516557018  4.692434211 -0.217653509 -0.886732456
## 1939 -0.939802632  1.016557018 -0.545065789  0.782346491 -0.611732456
##               Jun          Jul          Aug          Sep          Oct
## 1920           NA -4.308881579 -4.209100877 -2.237609649  0.666118421
## 1921 -0.894846491  2.878618421 -1.688267544 -0.250109649  4.132785088
## 1922  1.492653509 -3.621381579 -4.850767544 -0.991776316 -1.742214912
## 1923 -4.274013158  3.520285088  0.640899123 -0.200109649  1.549451754
## 1924  0.238486842 -0.921381579 -2.167434211 -0.137609649 -0.079714912
## 1925  1.650986842  2.107785088  1.028399123 -3.141776316  0.336951754
## 1926 -1.407346491  0.157785088  1.361732456  1.045723684 -3.013048246
## 1927 -2.544846491 -0.992214912  0.449232456 -1.295942982  1.145285088
## 1928 -1.490679825  0.491118421  0.957565789  0.399890351  2.161951754
## 1929 -0.028179825  1.116118421 -0.100767544  3.208223684 -0.779714912
## 1930  2.134320175 -1.829714912  1.311732456  0.833223684  1.611951754
## 1931  1.355153509 -0.721381579 -1.834100877 -2.254276316 -2.708881579
## 1932 -0.953179825  0.374451754  3.503399123  0.149890351 -2.450548246
## 1933  1.371820175  2.207785088  3.028399123  2.504057018 -0.404714912
## 1934  0.713486842  3.191118421 -1.609100877  0.937390351 -0.454714912
## 1935  1.263486842  1.886951754  3.224232456  0.379057018 -0.963048246
## 1936  1.217653509 -1.713048246  0.499232456  1.541557018 -0.125548246
## 1937  0.338486842 -0.721381579  1.124232456 -0.695942982  0.311951754
## 1938 -0.174013158 -3.529714912 -1.096600877 -0.220942982  0.378618421
## 1939 -0.436513158           NA           NA           NA           NA
##               Nov          Dec
## 1920  0.325986842 -0.039802632
## 1921 -2.957346491  2.856030702
## 1922  0.346820175  3.472697368
## 1923 -4.232346491 -0.564802632
## 1924  1.784320175  3.631030702
## 1925 -4.315679825 -3.131469298
## 1926 -0.811513158  0.160197368
## 1927  0.442653509 -3.939802632
## 1928  2.284320175 -0.785635965
## 1929  0.146820175  1.822697368
## 1930  0.905153509 -0.564802632
## 1931  3.650986842  1.660197368
## 1932  0.813486842  1.460197368
## 1933 -1.124013158 -4.598135965
## 1934 -1.449013158  4.397697368
## 1935  1.930153509 -3.160635965
## 1936 -1.057346491  1.326864035
## 1937 -1.811513158 -3.314802632
## 1938  4.700986842 -1.114802632
## 1939           NA           NA
plot(nott.ts.desc$random)

plot(aggregate(nottem, FUN = mean))

  1. Realizar un modelo ARIMA y predecir las temperaturas para los dos años posteriores a los informados en el dataset.
AP<- ts(nottem,frequency = 12, start = c(1920,1), end = c(1937,12))
modelo1 <- auto.arima(AP, stationary =TRUE, seasonal = TRUE)
summary(modelo1)
## Series: AP 
## ARIMA(5,0,1)(0,0,2)[12] with non-zero mean 
## 
## Coefficients:
##          ar1      ar2      ar3      ar4      ar5      ma1    sma1    sma2
##       1.0681  -0.2380  -0.1542  -0.0965  -0.1357  -0.5139  0.0147  0.3561
## s.e.  0.1104   0.1146   0.1014   0.0994   0.0822   0.0912  0.0697  0.0678
##          mean
##       48.9638
## s.e.   0.2118
## 
## sigma^2 estimated as 7.273:  log likelihood=-520.06
## AIC=1060.11   AICc=1061.18   BIC=1093.86
## 
## Training set error measures:
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.04088834 2.640028 2.128319 -0.4285207 4.518315 0.7648003
##                     ACF1
## Training set -0.01243412

Pronóstico

ajuste<- forecast(modelo1, h =24)
ajuste
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 1938       35.99945 32.54335 39.45555 30.71381 41.28510
## Feb 1938       36.59234 32.64091 40.54377 30.54915 42.63553
## Mar 1938       42.93357 38.79710 47.07003 36.60740 49.25974
## Apr 1938       47.17520 43.02656 51.32384 40.83041 53.52000
## May 1938       54.38048 50.19139 58.56956 47.97382 60.78713
## Jun 1938       59.04147 54.57883 63.50411 52.21645 65.86649
## Jul 1938       60.02198 55.15910 64.88487 52.58485 67.45912
## Aug 1938       59.97351 54.78663 65.16040 52.04086 67.90616
## Sep 1938       55.57310 50.24529 60.90092 47.42491 63.72130
## Oct 1938       48.99338 43.65437 54.33239 40.82806 57.15869
## Nov 1938       42.91286 37.54089 48.28483 34.69714 51.12858
## Dec 1938       39.49091 33.95411 45.02770 31.02311 47.95871
## Jan 1939       38.80519 32.99623 44.61416 29.92114 47.68924
## Feb 1939       39.78179 33.74614 45.81744 30.55106 49.01252
## Mar 1939       41.85010 35.71238 47.98782 32.46326 51.23693
## Apr 1939       48.50312 42.35709 54.64915 39.10357 57.90267
## May 1939       54.37981 48.21111 60.54850 44.94561 63.81401
## Jun 1939       57.51244 51.22643 63.79845 47.89882 67.12606
## Jul 1939       59.11143 52.63844 65.58442 49.21185 69.01102
## Aug 1939       58.32157 51.68537 64.95777 48.17238 68.47076
## Sep 1939       54.61394 47.90272 61.32516 44.35001 64.87787
## Oct 1939       49.99228 43.27462 56.70994 39.71850 60.26606
## Nov 1939       43.50353 36.76996 50.23711 33.20542 53.80165
## Dec 1939       40.41647 33.59852 47.23442 29.98932 50.84362

valores ajustados

ajuste$mean 
##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1938 35.99945 36.59234 42.93357 47.17520 54.38048 59.04147 60.02198 59.97351
## 1939 38.80519 39.78179 41.85010 48.50312 54.37981 57.51244 59.11143 58.32157
##           Sep      Oct      Nov      Dec
## 1938 55.57310 48.99338 42.91286 39.49091
## 1939 54.61394 49.99228 43.50353 40.41647
plot(ajuste, main = 'Pronóstico 24 meses con ARIMA', xlab = 'Años', ylab = 'Temperatura')

Error cuadrático medio

2.640028^2
## [1] 6.969748

Revisar el error en el pronóstico

SF<- window (nottem, start = c(1938,1), end = c(1939,12) )
SFajuste = cbind (SF, ajuste$mean)
SFajuste
##            SF ajuste$mean
## Jan 1938 42.1    35.99945
## Feb 1938 41.2    36.59234
## Mar 1938 47.3    42.93357
## Apr 1938 46.6    47.17520
## May 1938 52.4    54.38048
## Jun 1938 59.0    59.04147
## Jul 1938 59.6    60.02198
## Aug 1938 60.4    59.97351
## Sep 1938 57.0    55.57310
## Oct 1938 50.7    48.99338
## Nov 1938 47.8    42.91286
## Dec 1938 39.2    39.49091
## Jan 1939 39.4    38.80519
## Feb 1939 40.9    39.78179
## Mar 1939 42.4    41.85010
## Apr 1939 47.8    48.50312
## May 1939 52.4    54.37981
## Jun 1939 58.0    57.51244
## Jul 1939 60.7    59.11143
## Aug 1939 61.8    58.32157
## Sep 1939 58.2    54.61394
## Oct 1939 46.7    49.99228
## Nov 1939 46.6    43.50353
## Dec 1939 37.8    40.41647
(1/12)*sum ((ajuste$mean - SF)^2)
## [1] 14.36329