knitr::opts_chunk$set(warning = FALSE)

1. Ejercicio 1

1.1

Crear un nuevo dataframe que sea un subconjunto del dataframe original de dfFires. El subconjunto debe contener todos los incendios del Estado de Idaho y las columnas deben ser limitadas para que sólo estén presentes las columnas YEAR_, CAUSE y TOTALACRES. Cambie el nombre de las columnas. Agrupe los datos por CAUSE y YEAR_ y luego resuma por el total de acres quemados. Trazar los resultados.

library(readr)
dfFires <- read_csv("C:/BK/Julian Acevedo/WFM_2021-11-08/WFM nov.2021/Analitica/U.NORTE/Vizualizacion datos R y Python/RDataSets/StudyArea.csv", col_types = list(UNIT = col_character()), col_names = TRUE)
knitr::kable(head(dfFires, 5))

FID	ORGANIZATI	UNIT	SUBUNIT	SUBUNIT2	FIRENAME	CAUSE	YEAR_	STARTDATED	CONTRDATED	OUTDATED	STATE	STATE_FIPS	TOTALACRES
0	FWS	81682	USCADBR	San Diego Bay National Wildlife Refuge	PUMP HOUSE	Human	2001	1/1/01 0:00	1/1/01 0:00	NA	California	6	0.1
1	FWS	81682	USCADBR	San Diego Bay National Wildlife Refuge	I5	Human	2002	5/3/02 0:00	5/3/02 0:00	NA	California	6	3.0
2	FWS	81682	USCADBR	San Diego Bay National Wildlife Refuge	SOUTHBAY	Human	2002	6/1/02 0:00	6/1/02 0:00	NA	California	6	0.5
3	FWS	81682	USCADBR	San Diego Bay National Wildlife Refuge	MARINA	Human	2001	7/12/01 0:00	7/12/01 0:00	NA	California	6	0.1
4	FWS	81682	USCADBR	San Diego Bay National Wildlife Refuge	HILL	Human	1994	9/13/94 0:00	9/13/94 0:00	NA	California	6	1.0

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

dfFires_subset<-dfFires %>% 
  filter(STATE == 'Idaho') %>% 
  select("YR" = "YEAR_", "CS"  = "CAUSE","ACRES" = "TOTALACRES")
knitr::kable(head(dfFires_subset, 5))

YR	CS	ACRES
1987	Human	5
1991	Natural	150
1991	Human	800
1990	Natural	2
1985	Human	38

dfFires_summary<-dfFires_subset %>% 
  group_by(CS,YR) %>% 
  summarise(Total_Acres_Quemados=sum(ACRES))

## `summarise()` has grouped output by 'CS'. You can override using the `.groups`
## argument.

knitr::kable(head(dfFires_summary, 5))

CS	YR	Total_Acres_Quemados
Human	1980	71974.7
Human	1981	219362.4
Human	1982	34016.2
Human	1983	48242.5
Human	1984	36837.8

# Plot the results

library(ggplot2)

ggplot(dfFires_summary, aes(x = YR, y = Total_Acres_Quemados, color = CS)) +
  geom_line() +
  theme_minimal() +
  labs(title = 'Total acres quemados por causa y año en Idaho',
       x = 'Año',
       y = 'Total Acres Quemados') +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

1.2.

Resuelva la Tarea 1.1 de la Sección 1 de Python utilizando R.

Ejercicio para entregar Trabajaremos con el conjunto de datos de 120 años de historia olímpica adquirido por Randi Griffin en Randi-Griffin y puesto a disposición en athlete_events. - Su tarea consiste en identificar los cinco deportes más importantes según el mayor número de medallas otorgadas en el año 2016, y luego realizar el siguiente análisis:

athlete_events_url<-'https://raw.githubusercontent.com/lihkirun/AppliedStatisticMS/main/DataVisualizationRPython/Lectures/Python/PythonDataSets/athlete_events.csv'
library(readr)
df_athlete_events <- read_csv(athlete_events_url, col_types = list(UNIT = col_character()), col_names = TRUE)
knitr::kable(head(df_athlete_events, 5))

ID	Name	Sex	Age	Height	Weight	Team	NOC	Games	Year	Season	City	Sport	Event	Medal
1	A Dijiang	M	24	180	80	China	CHN	1992 Summer	1992	Summer	Barcelona	Basketball	Basketball Men’s Basketball	NA
2	A Lamusi	M	23	170	60	China	CHN	2012 Summer	2012	Summer	London	Judo	Judo Men’s Extra-Lightweight	NA
3	Gunnar Nielsen Aaby	M	24	NA	NA	Denmark	DEN	1920 Summer	1920	Summer	Antwerpen	Football	Football Men’s Football	NA
4	Edgar Lindenau Aabye	M	34	NA	NA	Denmark/Sweden	DEN	1900 Summer	1900	Summer	Paris	Tug-Of-War	Tug-Of-War Men’s Tug-Of-War	Gold
5	Christine Jacoba Aaftink	F	21	185	82	Netherlands	NED	1988 Winter	1988	Winter	Calgary	Speed Skating	Speed Skating Women’s 500 metres	NA

1.2.1.

Genere un gráfico que indique el número de medallas concedidas en cada uno de los cinco principales deportes en 2016.

medallas_2016 <- df_athlete_events %>%
  filter(Year == 2016 & !is.na(Medal)) %>%
  group_by(Sport) %>%
  summarise(Medals = n()) %>%
  arrange(desc(Medals)) %>%
  head(5)

ggplot(medallas_2016, aes(x = reorder(Sport, Medals), y = Medals, fill = Sport)) +
  geom_bar(stat = "identity") +
  labs(title = "Número de medallas concedidas en cada uno de los cinco principales deportes en 2016",
       x = "Deporte",
       y = "Numero de Medallas") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

1.2.2.

Trace un gráfico que represente la distribución de la edad de los ganadores de medallas en los cinco principales deportes en 2016.

medallas_2016 <- df_athlete_events %>%
  filter(Year == 2016, !is.na(Medal)) %>%
  group_by(Sport) %>%
  summarise(TotalMedals = n()) %>%
  top_n(5, TotalMedals) %>%
  pull(Sport)

datos_top_sports <- df_athlete_events %>%
  filter(Year == 2016, Sport %in% medallas_2016, !is.na(Medal), !is.na(Age))

ggplot(datos_top_sports, aes(x = Age, fill = Sport)) +
  geom_histogram(binwidth = 1, position = "dodge") +
  facet_wrap(~Sport) +
  labs(title = "Distribución de la edad de los ganadores de medallas en los cinco principales deportes en 2016",
       x = "Edad",
       y = "Conteo ganadores") +
  theme_minimal()

1.2.3.

Descubre qué equipos nacionales ganaron el mayor número de medallas en los cinco principales deportes en 2016.

medallas_2016 <- df_athlete_events %>%
  filter(Year == 2016, !is.na(Medal))

top_sports <- medallas_2016 %>%
  count(Sport) %>%
  top_n(5, n) %>%
  pull(Sport)

top_sports_medals <- medallas_2016 %>%
  filter(Sport %in% top_sports)

top_teams_medals <- top_sports_medals %>%
  group_by(NOC, Sport) %>%
  summarise(Medals = n(), .groups = 'drop') %>%
  arrange(desc(Medals))

top_teams_medals

## # A tibble: 94 × 3
##    NOC   Sport     Medals
##    <chr> <chr>      <int>
##  1 USA   Swimming      71
##  2 USA   Athletics     46
##  3 GER   Football      35
##  4 GER   Hockey        33
##  5 AUS   Swimming      32
##  6 JAM   Athletics     30
##  7 GBR   Rowing        26
##  8 ARG   Hockey        18
##  9 CAN   Football      18
## 10 NGR   Football      18
## # ℹ 84 more rows

1.2.4.

Observe la tendencia del peso medio de los atletas masculinos y femeninos ganadores en los cinco principales deportes en 2016

medallas_2016 <- df_athlete_events %>%
  filter(Year == 2016, !is.na(Medal))

top_sports <- medallas_2016 %>%
  count(Sport) %>%
  top_n(5, n) %>%
  pull(Sport)

datos_top_sports <- medallas_2016 %>%
  filter(Sport %in% top_sports)

weight_trends <- datos_top_sports %>%
  group_by(Sport, Sex) %>%
  summarise(MeanWeight = mean(Weight, na.rm = TRUE), .groups = 'drop')

ggplot(weight_trends, aes(x = Sport, y = MeanWeight, fill = Sex)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  labs(title = "peso medio de los atletas masculinos y femeninos ganadores en los cinco principales deportes en 2016",
       x = "Deporte",
       y = "Peso Medio(kg)") +
  scale_fill_manual(values = c("#CDC8B1", "#8B8878")) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

2. Ejercicio 2 para entregar

Considere el conjunto de datos us_state_population.tsv utilizado en la sección de Python, para la creación del mapa coroplético de Estados Unidos. Repita el procedimiento planteado en cada ítem de esta sección para obtener el nuevo dataframe con las nuevas columnas Year y Population. Realice unión y separación utilizando las columnas State y Code.

us_state_population_url='https://raw.githubusercontent.com/lihkir/Uninorte/main/AppliedStatisticMS/DataVisualizationRPython/Lectures/Python/PythonDataSets/us_state_population.tsv'

df_us_state_population<-read_tsv(us_state_population_url )

## Rows: 51 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): State, Code
## dbl (9): 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

knitr::kable(head(df_us_state_population, 5))

State	Code	2010	2011	2012	2013	2014	2015	2016	2017	2018
Alabama	AL	4785448	4798834	4815564	4830460	4842481	4853160	4864745	4875120	4887871
Alaska	AK	713906	722038	730399	737045	736307	737547	741504	739786	737438
Arizona	AZ	6407774	6473497	6556629	6634999	6733840	6833596	6945452	7048876	7171646
Arkansas	AR	2921978	2940407	2952109	2959549	2967726	2978407	2990410	3002997	3013825
California	CA	37320903	37641823	37960782	38280824	38625139	38953142	39209127	39399349	39557045

Recopilación

library(readr)
library(tidyr)
df_us_state_population_2 = gather(df_us_state_population, 
                "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018",
                key = "YEAR", 
                value = "POPULATION")
knitr::kable(head(df_us_state_population_2, 20))

State	Code	YEAR	POPULATION
Alabama	AL	2010	4785448
Alaska	AK	2010	713906
Arizona	AZ	2010	6407774
Arkansas	AR	2010	2921978
California	CA	2010	37320903
Colorado	CO	2010	5048281
Connecticut	CT	2010	3579125
Delaware	DE	2010	899595
District of Columbia	DC	2010	605085
Florida	FL	2010	18845785
Georgia	GA	2010	9711810
Hawaii	HI	2010	1363963
Idaho	ID	2010	1570773
Illinois	IL	2010	12840762
Indiana	IN	2010	6490436
Iowa	IA	2010	3050767
Kansas	KS	2010	2858213
Kentucky	KY	2010	4348200
Louisiana	LA	2010	4544532
Maine	ME	2010	1327632

Distribución

df_us_state_population_2b = spread(df_us_state_population_2, key = YEAR, value = POPULATION)
knitr::kable(head(df_us_state_population_2b))

State	Code	2010	2011	2012	2013	2014	2015	2016	2017	2018
Alabama	AL	4785448	4798834	4815564	4830460	4842481	4853160	4864745	4875120	4887871
Alaska	AK	713906	722038	730399	737045	736307	737547	741504	739786	737438
Arizona	AZ	6407774	6473497	6556629	6634999	6733840	6833596	6945452	7048876	7171646
Arkansas	AR	2921978	2940407	2952109	2959549	2967726	2978407	2990410	3002997	3013825
California	CA	37320903	37641823	37960782	38280824	38625139	38953142	39209127	39399349	39557045
Colorado	CO	5048281	5121771	5193721	5270482	5351218	5452107	5540921	5615902	5695564

Union

df_us_state_population_3 = unite(df_us_state_population, State_Code, State, Code)
knitr::kable(head(df_us_state_population_3, 10))

State_Code	2010	2011	2012	2013	2014	2015	2016	2017	2018
Alabama_AL	4785448	4798834	4815564	4830460	4842481	4853160	4864745	4875120	4887871
Alaska_AK	713906	722038	730399	737045	736307	737547	741504	739786	737438
Arizona_AZ	6407774	6473497	6556629	6634999	6733840	6833596	6945452	7048876	7171646
Arkansas_AR	2921978	2940407	2952109	2959549	2967726	2978407	2990410	3002997	3013825
California_CA	37320903	37641823	37960782	38280824	38625139	38953142	39209127	39399349	39557045
Colorado_CO	5048281	5121771	5193721	5270482	5351218	5452107	5540921	5615902	5695564
Connecticut_CT	3579125	3588023	3594395	3594915	3594783	3587509	3578674	3573880	3572665
Delaware_DE	899595	907316	915188	923638	932596	941413	949216	957078	967171
District of Columbia_DC	605085	619602	634725	650431	662513	675254	686575	695691	702455
Florida_FL	18845785	19093352	19326230	19563166	19860330	20224249	20629982	20976812	21299325

Separación

df_us_state_population_3b = separate(df_us_state_population_3, "State_Code",into = c("State", "Code"))
knitr::kable(head(df_us_state_population_3b, 10))

State	Code	2010	2011	2012	2013	2014	2015	2016	2017	2018
Alabama	AL	4785448	4798834	4815564	4830460	4842481	4853160	4864745	4875120	4887871
Alaska	AK	713906	722038	730399	737045	736307	737547	741504	739786	737438
Arizona	AZ	6407774	6473497	6556629	6634999	6733840	6833596	6945452	7048876	7171646
Arkansas	AR	2921978	2940407	2952109	2959549	2967726	2978407	2990410	3002997	3013825
California	CA	37320903	37641823	37960782	38280824	38625139	38953142	39209127	39399349	39557045
Colorado	CO	5048281	5121771	5193721	5270482	5351218	5452107	5540921	5615902	5695564
Connecticut	CT	3579125	3588023	3594395	3594915	3594783	3587509	3578674	3573880	3572665
Delaware	DE	899595	907316	915188	923638	932596	941413	949216	957078	967171
District	of	605085	619602	634725	650431	662513	675254	686575	695691	702455
Florida	FL	18845785	19093352	19326230	19563166	19860330	20224249	20629982	20976812	21299325

3. Series de tiempo

Utilice lo aprendido en esta sección para realizar una predicción a 7 días, del valor de las acciones de Tecnoglass. Esto es. Debe realizar:

url<-'https://raw.githubusercontent.com/lihkir/Data/main/TGLS.csv'
df_TGLS<-read.csv(url)
df_TGLS$Date <- as.Date(df_TGLS$Date, format="%Y-%m-%d")
#df_TGLS$Date<-as.Date(df_TGLS$Dat,format="%Y-%m-%d")
#df_TGLS$Date <- as.POSIXct(df_TGLS$Date, format="%Y-%m-%d")
#df_TGLS<-df_TGLS[order(df_TGLS$Date),]
knitr::kable(head(df_TGLS, 5))

Date	Open	High	Low	Close	Adj.Close	Volume
2012-05-10	9.97	10.00	9.50	9.80	7.945550	6900
2012-05-11	9.70	9.70	9.70	9.70	7.864471	300
2012-05-14	9.80	9.80	9.80	9.80	7.945550	100
2012-05-15	9.75	9.75	9.75	9.75	7.905012	300
2012-05-16	9.75	9.75	9.75	9.75	7.905012	0

str(df_TGLS)

## 'data.frame':    2200 obs. of  7 variables:
##  $ Date     : Date, format: "2012-05-10" "2012-05-11" ...
##  $ Open     : num  9.97 9.7 9.8 9.75 9.75 9.6 9.6 9.6 9.69 9.77 ...
##  $ High     : num  10 9.7 9.8 9.75 9.75 9.6 9.6 9.6 9.69 9.77 ...
##  $ Low      : num  9.5 9.7 9.8 9.75 9.75 9.6 9.6 9.6 9.69 9.6 ...
##  $ Close    : num  9.8 9.7 9.8 9.75 9.75 9.6 9.6 9.6 9.69 9.6 ...
##  $ Adj.Close: num  7.95 7.86 7.95 7.91 7.91 ...
##  $ Volume   : int  6900 300 100 300 0 800 0 0 1500 153600 ...

3.1.

Análisis Exploratorio de Datos: ACF, PAC, Descomposición, Dicky Fuller, Boxplots, Medidas de tendencia Central y Variavilidad, etc,…

3.1.1. EDA

library(TSstudio)
library(xts)

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

#ts_TGLS <- ts(df_TGLS$Close, frequency=1, start=c(2012, 5))
ts_TGLS <- ts(df_TGLS$Close, frequency=1)

ts_plot(ts_TGLS, 
        title='Acciones de Tecnoglass',
        Ytitle ='Acciones',
        Xtitle='Año')

Como se logra identificar en el gráfico, las acciones de Tecnoglas tienen una tendencia aparentemente negativa, sin embargo los ultimos registros tienen una tendencia positiva. Es evidente que el comportamiento no es estacinario.

Estadisticas descriptivas

summary(ts_TGLS)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.290   7.960   9.840   9.477  10.910  15.060

Con este resumen podemos identificar que la distribucion tiene un ligero sesgo hacia la izquierda.

Histograma

hist(ts_TGLS,main = "Histograma Acciones Tecnoglass", xlab = "Acciones", breaks = "Sturges", probability = TRUE)
lines(density(ts_TGLS))

## Verificar datos faltantes

sum(is.na(ts_TGLS))

## [1] 0

Boxplot

boxplot(ts_TGLS~cycle(ts_TGLS),xlab="Date", ylab = "y" ,main ="Boxplot")

# Descomposición

#ts_TGLS_2 <- ts(df_TGLS$Close, frequency=365, start=c(2012, 5))
ts_TGLS_2 <- ts(df_TGLS$Close,frequency=365)
library(TSstudio)
ts_decompose(ts_TGLS_2)

ACF y PACF

acf(ts_TGLS)

El gráfico ACF nos muestra que la correlación de la serie con sus rezagos decae en el tiempo de forma lineal, por lo tanto la eliminación de la tendencia de la serie y la correlación entre la serie y sus rezagos puede hacerse mediante diferenciación.

ts_TGLS_d1<-diff(ts_TGLS)
par(mfrow=c(1,2))
acf(ts_TGLS_d1)
pacf(ts_TGLS_d1)

asado en los graficos de ACF y PACF es apropiado aplicar la primera diferenciación AR(1)

Dicky-Fuller

library(tseries)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

adf.test(ts_TGLS)

## 
##  Augmented Dickey-Fuller Test
## 
## data:  ts_TGLS
## Dickey-Fuller = -2.6642, Lag order = 13, p-value = 0.2971
## alternative hypothesis: stationary

El resultado de la prueba Dicky-FUller nos indica que las acciones de Tecnoglass no son estacionarias, el \(p-value=0.2971\) es superior al valor de significancia del \(0.05\), por lo tanto no se rechaza la \(H_{0}\) que sugiere estacionalidad.

Transformacion de serie no estacionaria en serie estacionaria

Diferenciacion de series temporales

ts_plot(diff(ts_TGLS, lag=1),
        title='Acciones de Tecnoglass - 1° diferenciación',
        Ytitle ='Acciones',
        Xtitle='Año')

Aplicando la primera diferenciación se logra identificar como se eliminna la tendencia de la serie y la media en general es constante.

3.2. Modelo AUTO.ARIMA

Dibujar predicción dinámicas, realizar verificación de supuestos y cálcular métricas de precisión usando auto.arima y además, seleccionar el mejor modelo ARIMA de forma manual usando loop for.

ts_TGLS_split <- ts_split(ts_TGLS, sample.out = 7)
train<-ts_TGLS_split$train
test<-ts_TGLS_split$test

library(forecast)
TGLS_auto_md<-auto.arima(train)
TGLS_auto_md

## Series: train 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.0659
## s.e.   0.0209
## 
## sigma^2 = 0.04214:  log likelihood = 360.94
## AIC=-717.87   AICc=-717.87   BIC=-706.49

El primer moelo auto.arima ejecutado seleccion el modelo (0,1,1) con un AIC = 648.56. No obstante se ajustara algunos argumentos con el objetivo de establecer una busqueda mas robusta y exhaustiva del modelo.

TGLS_auto_md2<-auto.arima(train,
                          max.order = 5,
                          D = 1,
                          d = 1,
                          stepwise = FALSE,
                          approximation = FALSE)
TGLS_auto_md2

## Series: train 
## ARIMA(1,1,0) 
## 
## Coefficients:
##           ar1
##       -0.0689
## s.e.   0.0213
## 
## sigma^2 = 0.04213:  log likelihood = 361.17
## AIC=-718.34   AICc=-718.33   BIC=-706.95

Aplicando valores maximos en 5 para p+q+P+Q, buscando las conbinaciones posibles (stepwise = FALSE) y obtener cálculos mas precisos en los criterios de información (approximation = FALSE); encontramos que el mejor modelo seria el (1,1,0) con un AIC = 718.34.

Pronostico

TGLS_test_fc <- forecast(TGLS_auto_md2, h = 7)
accuracy(TGLS_test_fc, test)

##                        ME      RMSE       MAE         MPE     MAPE      MASE
## Training set -0.001345070 0.2051663 0.1210993 -0.05572084 1.437541 0.9969568
## Test set     -0.005341618 0.1133806 0.0845230 -0.10243307 1.211630 0.6958402
##                     ACF1 Theil's U
## Training set 0.001493314        NA
## Test set     0.369024896 0.9383515

test_forecast(ts_TGLS_2,
              forecast.obj = TGLS_test_fc,
              test = test)

USgas_split <- ts_split(USgas, sample.out = 12)

train <- USgas_split$train
test <- USgas_split$test

library(forecast)

USgas_auto_md <- auto.arima(train)
USgas_auto_md

## Series: train 
## ARIMA(2,1,1)(2,1,1)[12] 
## 
## Coefficients:
##          ar1      ar2      ma1    sar1     sar2     sma1
##       0.4301  -0.0372  -0.9098  0.0117  -0.2673  -0.7431
## s.e.  0.0794   0.0741   0.0452  0.0887   0.0830   0.0751
## 
## sigma^2 = 10446:  log likelihood = -1292.83
## AIC=2599.67   AICc=2600.22   BIC=2623.2

USgas_auto_md2 <- auto.arima(train,
                             max.order = 5,
                             D = 1,
                             d = 1,
                             stepwise = FALSE,
                             approximation = FALSE)
USgas_auto_md2

## Series: train 
## ARIMA(1,1,1)(2,1,1)[12] 
## 
## Coefficients:
##          ar1      ma1    sar1     sar2     sma1
##       0.4247  -0.9180  0.0132  -0.2639  -0.7449
## s.e.  0.0770   0.0376  0.0894   0.0834   0.0753
## 
## sigma^2 = 10405:  log likelihood = -1292.96
## AIC=2597.91   AICc=2598.32   BIC=2618.08

USgas_test_fc <- forecast(USgas_auto_md2, h = 12)
accuracy(USgas_test_fc, test)

##                     ME      RMSE      MAE       MPE     MAPE      MASE
## Training set  6.081099  97.85701 73.36854 0.1298714 3.517097 0.6371821
## Test set     42.211253 104.79281 83.09943 1.4913412 3.314280 0.7216918
##                      ACF1 Theil's U
## Training set  0.004565602        NA
## Test set     -0.049999868 0.3469228

test_forecast(USgas,
              forecast.obj = USgas_test_fc,
              test = test)

Tarea 3 Visualización

Edward Morales - Julian Acevedo

2023-11-08