library(reticulate)

1 Importando los paquetes.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

2 Importando la data.

raw_csv_data = pd.read_csv("E:/Index2018.csv") 
df_comp=raw_csv_data.copy()
df_comp.describe()
##                spx           dax         ftse        nikkei
## count  6269.000000   6269.000000  6269.000000   6269.000000
## mean   1288.127542   6080.063363  5422.713545  14597.055700
## std     487.586473   2754.361032  1145.572428   4043.122953
## min     438.920000   1911.700000  2876.600000   7054.980000
## 25%     990.671905   4069.350000  4486.100000  10709.290000
## 50%    1233.420000   5773.340000  5662.430000  15028.170000
## 75%    1459.987747   7443.070000  6304.250000  17860.470000
## max    2872.867839  13559.600000  7778.637689  24124.150000

df_comp.head()
##          date     spx      dax     ftse    nikkei
## 0  07/01/1994  469.90  2224.95  3445.98  18124.01
## 1  10/01/1994  475.27  2225.00  3440.58  18443.44
## 2  11/01/1994  474.13  2228.10  3413.77  18485.25
## 3  12/01/1994  474.17  2182.06  3372.02  18793.88
## 4  13/01/1994  472.47  2142.37  3360.01  18577.26
df_comp.date.describe()
## count           6269
## unique          6269
## top       03/01/2000
## freq               1
## Name: date, dtype: object

3 Convertir el texto a fechas o serie de tiempo.

df_comp.date = pd.to_datetime(df_comp.date, dayfirst = True)
df_comp.head()
##         date     spx      dax     ftse    nikkei
## 0 1994-01-07  469.90  2224.95  3445.98  18124.01
## 1 1994-01-10  475.27  2225.00  3440.58  18443.44
## 2 1994-01-11  474.13  2228.10  3413.77  18485.25
## 3 1994-01-12  474.17  2182.06  3372.02  18793.88
## 4 1994-01-13  472.47  2142.37  3360.01  18577.26
df_comp.date.describe()
## count                    6269
## unique                   6269
## top       1999-11-12 00:00:00
## freq                        1
## first     1994-01-07 00:00:00
## last      2018-01-29 00:00:00
## Name: date, dtype: object
## 
## <string>:1: FutureWarning: Treating datetime data as categorical rather than numeric in `.describe` is deprecated and will be removed in a future version of pandas. Specify `datetime_is_numeric=True` to silence this warning and adopt the future behavior now.

4 Poniendo la fecha como índice.

df_comp.set_index("date", inplace=True)

df_comp.head()
##                spx      dax     ftse    nikkei
## date                                          
## 1994-01-07  469.90  2224.95  3445.98  18124.01
## 1994-01-10  475.27  2225.00  3440.58  18443.44
## 1994-01-11  474.13  2228.10  3413.77  18485.25
## 1994-01-12  474.17  2182.06  3372.02  18793.88
## 1994-01-13  472.47  2142.37  3360.01  18577.26

5 Ubicando la frecuencia deseada.

df_comp=df_comp.asfreq('d')
df_comp.head(10)
##                spx      dax     ftse    nikkei
## date                                          
## 1994-01-07  469.90  2224.95  3445.98  18124.01
## 1994-01-08     NaN      NaN      NaN       NaN
## 1994-01-09     NaN      NaN      NaN       NaN
## 1994-01-10  475.27  2225.00  3440.58  18443.44
## 1994-01-11  474.13  2228.10  3413.77  18485.25
## 1994-01-12  474.17  2182.06  3372.02  18793.88
## 1994-01-13  472.47  2142.37  3360.01  18577.26
## 1994-01-14  474.91  2151.05  3400.56  18973.70
## 1994-01-15     NaN      NaN      NaN       NaN
## 1994-01-16     NaN      NaN      NaN       NaN

df_comp=df_comp.asfreq('b')
df_comp.head(10)
##                spx      dax     ftse    nikkei
## date                                          
## 1994-01-07  469.90  2224.95  3445.98  18124.01
## 1994-01-10  475.27  2225.00  3440.58  18443.44
## 1994-01-11  474.13  2228.10  3413.77  18485.25
## 1994-01-12  474.17  2182.06  3372.02  18793.88
## 1994-01-13  472.47  2142.37  3360.01  18577.26
## 1994-01-14  474.91  2151.05  3400.56  18973.70
## 1994-01-17  473.30  2115.56  3407.83  18725.37
## 1994-01-18  474.25  2130.35  3437.01  18514.55
## 1994-01-19  474.30  2132.52  3475.15  19039.40
## 1994-01-20  474.98  2098.36  3469.99  19183.92

6 Manejo de valores perdidos.

df_comp.isna().sum()
## spx       8
## dax       8
## ftse      8
## nikkei    8
## dtype: int64
df_comp
##                     spx       dax         ftse    nikkei
## date                                                    
## 1994-01-07   469.900000   2224.95  3445.980000  18124.01
## 1994-01-10   475.270000   2225.00  3440.580000  18443.44
## 1994-01-11   474.130000   2228.10  3413.770000  18485.25
## 1994-01-12   474.170000   2182.06  3372.020000  18793.88
## 1994-01-13   472.470000   2142.37  3360.010000  18577.26
## ...                 ...       ...          ...       ...
## 2018-01-23  2839.130362  13559.60  7731.827774  24124.15
## 2018-01-24  2837.544008  13414.74  7643.428966  23940.78
## 2018-01-25  2839.253031  13298.36  7615.839954  23669.49
## 2018-01-26  2872.867839  13340.17  7665.541292  23631.88
## 2018-01-29  2853.528411  13324.48  7671.533300  23629.34
## 
## [6277 rows x 4 columns]
df_comp.spx=df_comp.spx.fillna(method='ffill')
df_comp.isna().sum()
## spx       0
## dax       8
## ftse      8
## nikkei    8
## dtype: int64
df_comp.ftse=df_comp.ftse.fillna(method='bfill')
df_comp.dax=df_comp.dax.fillna(value = df_comp.dax.mean())
df_comp.nikkei=df_comp.dax.fillna(method='bfill')
df_comp.isna().sum()
## spx       0
## dax       0
## ftse      0
## nikkei    0
## dtype: int64

7 Simplificando el dataset.

df_comp['market_value']=df_comp.spx
df_comp.describe()
##                spx           dax         ftse        nikkei  market_value
## count  6277.000000   6277.000000  6277.000000   6277.000000   6277.000000
## mean   1288.642547   6080.063363  5423.690398   6080.063363   1288.642547
## std     487.868210   2752.604984  1145.568370   2752.604984    487.868210
## min     438.920000   1911.700000  2876.600000   1911.700000    438.920000
## 25%     992.715221   4070.460000  4487.880000   4070.460000    992.715221
## 50%    1233.761241   5774.380000  5663.300000   5774.380000   1233.761241
## 75%    1460.250000   7442.660000  6304.630175   7442.660000   1460.250000
## max    2872.867839  13559.600000  7778.637689  13559.600000   2872.867839
del df_comp['spx']
df_comp.head()
##                 dax     ftse   nikkei  market_value
## date                                               
## 1994-01-07  2224.95  3445.98  2224.95        469.90
## 1994-01-10  2225.00  3440.58  2225.00        475.27
## 1994-01-11  2228.10  3413.77  2228.10        474.13
## 1994-01-12  2182.06  3372.02  2182.06        474.17
## 1994-01-13  2142.37  3360.01  2142.37        472.47
del df_comp['dax']
del df_comp['ftse']
del df_comp['nikkei']
df_comp.describe()
##        market_value
## count   6277.000000
## mean    1288.642547
## std      487.868210
## min      438.920000
## 25%      992.715221
## 50%     1233.761241
## 75%     1460.250000
## max     2872.867839

8 Dividiendo la data en conjunto de entrenamiento y de prueba.

size = int(len(df_comp)*0.8)
df = df_comp.iloc[:size]
df_test = df_comp.iloc[size:]
df.tail()
##             market_value
## date                    
## 2013-04-01   1562.173837
## 2013-04-02   1570.252238
## 2013-04-03   1553.686978
## 2013-04-04   1559.979316
## 2013-04-05   1553.278930
df_test.head()
##             market_value
## date                    
## 2013-04-08   1563.071269
## 2013-04-09   1568.607909
## 2013-04-10   1587.731827
## 2013-04-11   1593.369863
## 2013-04-12   1588.854623