Importamos las librerias necesarias

library(tidyverse)
library(readxl)
library(VIM)
library(GGally)
library(lubridate)
library(stringr)

Hacemos tres listas de archivos, cada una con un tipo de archivo: logs,

logs_files <- list.files(path = "Training", pattern = "wind-farm-1-logs*")
metmast_files <- list.files(path = "Training", pattern = "wind-farm-1-metmast*")
signals_files <- list.files(path = "Training", pattern = "wind-farm-1-signals*")

Importamos los datasets de training

signals_data <- signals_files %>%
    map(function(x) {
        read_xlsx(paste0("./Training/", x))
    }) %>%
    reduce(rbind)

metmast_data <- metmast_files %>%
    map(function(x) {
        read_xlsx(paste0("./Training/", x))
    }) %>%
    reduce(rbind)

logs_data <- logs_files %>%
    map(function(x) {
        read_xlsx(paste0("./Training/", x))
    }) %>%
    reduce(rbind)

Echamos un vistazo a la tabla logs_data

summary(logs_data)
##  TimeDetected        TimeReset          UnitTitle        
##  Length:122090      Length:122090      Length:122090     
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##     Remark          UnitTitleDestination
##  Length:122090      Mode:logical        
##  Class :character   NA's:122090         
##  Mode  :character
head(logs_data)
## # A tibble: 6 x 5
##   TimeDetected       TimeReset UnitTitle Remark           UnitTitleDestina~
##   <chr>              <chr>     <chr>     <chr>            <lgl>            
## 1 2016-04-01T00:01:~ <NA>      T01       Yaw Speed Exc: ~ NA               
## 2 2016-04-01T00:12:~ <NA>      T01       External power ~ NA               
## 3 2016-04-01T00:21:~ <NA>      T11       External power ~ NA               
## 4 2016-04-01T00:22:~ <NA>      T06       External power ~ NA               
## 5 2016-04-01T00:23:~ <NA>      T07       External power ~ NA               
## 6 2016-04-01T00:33:~ <NA>      T11       Gen. int. vent.~ NA

En principio de esta tabla solo vamos a necesitar la fecha y la identificacion de la turbina

logs_data_1 <- logs_data %>%
               select(TimeDetected,
                      UnitTitle) 
logs_data_1$TimeDetected <- ymd_hms(logs_data_1$TimeDetected)
logs_data_1$UnitTitle <- as.factor(logs_data_1$UnitTitle)

Solo hay logs en 4 unidades. Hay 4 NAs en las fechas

summary(logs_data_1)
##   TimeDetected                 UnitTitle  
##  Min.   :2016-01-01 00:02:18   T01:36891  
##  1st Qu.:2016-04-24 07:21:40   T06:24721  
##  Median :2016-07-23 11:17:18   T07:30939  
##  Mean   :2016-07-09 03:21:27   T11:29539  
##  3rd Qu.:2016-09-18 12:21:16              
##  Max.   :2016-12-31 23:57:33              
##  NA's   :4

Echamos un vistazo a la tabla metmast_data. Hay varios campos que parecen no tener informacion valida.

summary(metmast_data)
##   Timestamp         Min_Windspeed1  Max_Windspeed1   Avg_Windspeed1 
##  Length:52697       Min.   : 0.00   Min.   : 0.000   Min.   : 0.00  
##  Class :character   1st Qu.: 2.30   1st Qu.: 4.600   1st Qu.: 3.50  
##  Mode  :character   Median : 3.80   Median : 7.400   Median : 5.60  
##                     Mean   : 4.35   Mean   : 8.259   Mean   : 6.31  
##                     3rd Qu.: 6.00   3rd Qu.:11.000   3rd Qu.: 8.50  
##                     Max.   :16.40   Max.   :26.300   Max.   :21.70  
##                                                                     
##  Var_Windspeed1    Min_Windspeed2   Max_Windspeed2   Avg_Windspeed2  
##  Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 0.1400   1st Qu.: 2.200   1st Qu.: 4.600   1st Qu.: 3.400  
##  Median : 0.4400   Median : 3.800   Median : 7.400   Median : 5.600  
##  Mean   : 0.7948   Mean   : 4.339   Mean   : 8.252   Mean   : 6.301  
##  3rd Qu.: 1.0400   3rd Qu.: 6.000   3rd Qu.:11.000   3rd Qu.: 8.500  
##  Max.   :35.1000   Max.   :16.300   Max.   :26.700   Max.   :21.800  
##                                                                      
##  Var_Windspeed2    Min_Winddirection2 Max_Winddirection2
##  Min.   : 0.0000   Min.   :  0        Min.   :236       
##  1st Qu.: 0.1400   1st Qu.:236        1st Qu.:236       
##  Median : 0.4400   Median :236        Median :236       
##  Mean   : 0.7984   Mean   :236        Mean   :236       
##  3rd Qu.: 1.0400   3rd Qu.:236        3rd Qu.:236       
##  Max.   :35.9300   Max.   :236        Max.   :236       
##                                                         
##  Avg_Winddirection2 Var_Winddirection2 Min_AmbientTemp  Max_AmbientTemp
##  Min.   :213        Min.   :   0.000   Min.   :-40.00   Min.   : 6.00  
##  1st Qu.:236        1st Qu.:   0.000   1st Qu.: 14.00   1st Qu.:15.00  
##  Median :236        Median :   0.000   Median : 17.00   Median :18.00  
##  Mean   :236        Mean   :   0.137   Mean   : 18.32   Mean   :19.16  
##  3rd Qu.:236        3rd Qu.:   0.000   3rd Qu.: 22.00   3rd Qu.:23.00  
##  Max.   :236        Max.   :4683.000   Max.   : 39.00   Max.   :40.00  
##                                                                        
##  Avg_AmbientTemp  Min_Pressure   Max_Pressure   Avg_Pressure 
##  Min.   : 5.00   Min.   : 991   Min.   : 992   Min.   : 992  
##  1st Qu.:15.00   1st Qu.:1007   1st Qu.:1007   1st Qu.:1007  
##  Median :18.00   Median :1009   Median :1009   Median :1009  
##  Mean   :18.71   Mean   :1010   Mean   :1010   Mean   :1010  
##  3rd Qu.:23.00   3rd Qu.:1013   3rd Qu.:1013   3rd Qu.:1013  
##  Max.   :40.00   Max.   :1028   Max.   :1028   Max.   :1028  
##                                                              
##   Min_Humidity     Max_Humidity     Avg_Humidity   Min_Precipitation 
##  Min.   : 13.00   Min.   : 14.00   Min.   : 13.0   Min.   : 0.00000  
##  1st Qu.: 57.00   1st Qu.: 60.00   1st Qu.: 58.0   1st Qu.: 0.00000  
##  Median : 70.00   Median : 72.00   Median : 71.0   Median : 0.00000  
##  Mean   : 68.21   Mean   : 70.68   Mean   : 69.4   Mean   : 0.04547  
##  3rd Qu.: 80.00   3rd Qu.: 83.00   3rd Qu.: 81.0   3rd Qu.: 0.00000  
##  Max.   :100.00   Max.   :100.00   Max.   :100.0   Max.   :55.00000  
##  NA's   :1                         NA's   :1                         
##  Max_Precipitation Avg_Precipitation Min_Raindetection Max_Raindetection
##  Min.   : 0.0000   Min.   : 0.0000   Min.   :0         Min.   :0.0e+00  
##  1st Qu.: 0.0000   1st Qu.: 0.0000   1st Qu.:0         1st Qu.:0.0e+00  
##  Median : 0.0000   Median : 0.0000   Median :0         Median :0.0e+00  
##  Mean   : 0.1291   Mean   : 0.0864   Mean   :0         Mean   :1.9e-05  
##  3rd Qu.: 0.0000   3rd Qu.: 0.0000   3rd Qu.:0         3rd Qu.:0.0e+00  
##  Max.   :79.0000   Max.   :74.0000   Max.   :0         Max.   :1.0e+00  
##                                                                         
##  Avg_Raindetection Anemometer1_Freq Anemometer1_Offset
##  Min.   :0         Min.   :0.0499   Min.   :0.24      
##  1st Qu.:0         1st Qu.:0.0499   1st Qu.:0.24      
##  Median :0         Median :0.0499   Median :0.24      
##  Mean   :0         Mean   :0.0499   Mean   :0.24      
##  3rd Qu.:0         3rd Qu.:0.0499   3rd Qu.:0.24      
##  Max.   :0         Max.   :0.0499   Max.   :0.24      
##                                                       
##  Anemometer1_CorrGain Anemometer1_CorrOffset Anemometer2_Freq
##  Min.   :1            Min.   :0              Min.   :0.0499  
##  1st Qu.:1            1st Qu.:0              1st Qu.:0.0499  
##  Median :1            Median :0              Median :0.0499  
##  Mean   :1            Mean   :0              Mean   :0.0499  
##  3rd Qu.:1            3rd Qu.:0              3rd Qu.:0.0499  
##  Max.   :1            Max.   :0              Max.   :0.0499  
##                                                              
##  Anemometer2_Offset Anemometer2_CorrGain Anemometer2_CorrOffset
##  Min.   :0.24       Min.   :1            Min.   :0             
##  1st Qu.:0.24       1st Qu.:1            1st Qu.:0             
##  Median :0.24       Median :1            Median :0             
##  Mean   :0.24       Mean   :1            Mean   :0             
##  3rd Qu.:0.24       3rd Qu.:1            3rd Qu.:0             
##  Max.   :0.24       Max.   :1            Max.   :0             
##                                                                
##  DistanceAirPress AirRessureSensorZeroOffset Anemometer1_Avg_Freq
##  Min.   :0        Min.   :600                Min.   :  0         
##  1st Qu.:0        1st Qu.:600                1st Qu.: 66         
##  Median :0        Median :600                Median :108         
##  Mean   :0        Mean   :600                Mean   :123         
##  3rd Qu.:0        3rd Qu.:600                3rd Qu.:166         
##  Max.   :0        Max.   :600                Max.   :431         
##                                                                  
##  Anemometer2_Avg_Freq Pressure_Avg_Freq
##  Min.   :  0.0        Min.   :392.0    
##  1st Qu.: 65.0        1st Qu.:407.0    
##  Median :108.0        Median :409.0    
##  Mean   :122.9        Mean   :410.1    
##  3rd Qu.:166.0        3rd Qu.:413.0    
##  Max.   :434.0        Max.   :428.0    
## 

Nos quedamos solo con los campos que parecen tener informacion valida

metmast_data_1 <- metmast_data %>%
                  select(-Anemometer1_CorrOffset,
                         -Anemometer2_Freq, 
                         -Anemometer2_Offset, 
                         -Anemometer2_CorrGain,
                         -Anemometer2_CorrOffset,
                         -DistanceAirPress,
                         -AirRessureSensorZeroOffset,
                         -Min_Precipitation,
                         -Max_Precipitation,
                         -Avg_Precipitation,
                         -Min_Raindetection,
                         -Avg_Raindetection,
                         -Max_Raindetection,
                         -Min_Winddirection2,
                         -Max_Winddirection2,
                         -Avg_Winddirection2,
                         -Var_Winddirection2,
                         -Anemometer1_CorrGain,
                         )

metmast_data_1$Timestamp <- ymd_hms(metmast_data_1$Timestamp)
summary(metmast_data_1)
##    Timestamp                   Min_Windspeed1  Max_Windspeed1  
##  Min.   :2016-01-01 00:00:00   Min.   : 0.00   Min.   : 0.000  
##  1st Qu.:2016-04-01 12:40:00   1st Qu.: 2.30   1st Qu.: 4.600  
##  Median :2016-07-02 00:20:00   Median : 3.80   Median : 7.400  
##  Mean   :2016-07-02 00:07:10   Mean   : 4.35   Mean   : 8.259  
##  3rd Qu.:2016-10-01 12:00:00   3rd Qu.: 6.00   3rd Qu.:11.000  
##  Max.   :2016-12-31 23:50:00   Max.   :16.40   Max.   :26.300  
##                                                                
##  Avg_Windspeed1  Var_Windspeed1    Min_Windspeed2   Max_Windspeed2  
##  Min.   : 0.00   Min.   : 0.0000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 3.50   1st Qu.: 0.1400   1st Qu.: 2.200   1st Qu.: 4.600  
##  Median : 5.60   Median : 0.4400   Median : 3.800   Median : 7.400  
##  Mean   : 6.31   Mean   : 0.7948   Mean   : 4.339   Mean   : 8.252  
##  3rd Qu.: 8.50   3rd Qu.: 1.0400   3rd Qu.: 6.000   3rd Qu.:11.000  
##  Max.   :21.70   Max.   :35.1000   Max.   :16.300   Max.   :26.700  
##                                                                     
##  Avg_Windspeed2   Var_Windspeed2    Min_AmbientTemp  Max_AmbientTemp
##  Min.   : 0.000   Min.   : 0.0000   Min.   :-40.00   Min.   : 6.00  
##  1st Qu.: 3.400   1st Qu.: 0.1400   1st Qu.: 14.00   1st Qu.:15.00  
##  Median : 5.600   Median : 0.4400   Median : 17.00   Median :18.00  
##  Mean   : 6.301   Mean   : 0.7984   Mean   : 18.32   Mean   :19.16  
##  3rd Qu.: 8.500   3rd Qu.: 1.0400   3rd Qu.: 22.00   3rd Qu.:23.00  
##  Max.   :21.800   Max.   :35.9300   Max.   : 39.00   Max.   :40.00  
##                                                                     
##  Avg_AmbientTemp  Min_Pressure   Max_Pressure   Avg_Pressure 
##  Min.   : 5.00   Min.   : 991   Min.   : 992   Min.   : 992  
##  1st Qu.:15.00   1st Qu.:1007   1st Qu.:1007   1st Qu.:1007  
##  Median :18.00   Median :1009   Median :1009   Median :1009  
##  Mean   :18.71   Mean   :1010   Mean   :1010   Mean   :1010  
##  3rd Qu.:23.00   3rd Qu.:1013   3rd Qu.:1013   3rd Qu.:1013  
##  Max.   :40.00   Max.   :1028   Max.   :1028   Max.   :1028  
##                                                              
##   Min_Humidity     Max_Humidity     Avg_Humidity   Anemometer1_Freq
##  Min.   : 13.00   Min.   : 14.00   Min.   : 13.0   Min.   :0.0499  
##  1st Qu.: 57.00   1st Qu.: 60.00   1st Qu.: 58.0   1st Qu.:0.0499  
##  Median : 70.00   Median : 72.00   Median : 71.0   Median :0.0499  
##  Mean   : 68.21   Mean   : 70.68   Mean   : 69.4   Mean   :0.0499  
##  3rd Qu.: 80.00   3rd Qu.: 83.00   3rd Qu.: 81.0   3rd Qu.:0.0499  
##  Max.   :100.00   Max.   :100.00   Max.   :100.0   Max.   :0.0499  
##  NA's   :1                         NA's   :1                       
##  Anemometer1_Offset Anemometer1_Avg_Freq Anemometer2_Avg_Freq
##  Min.   :0.24       Min.   :  0          Min.   :  0.0       
##  1st Qu.:0.24       1st Qu.: 66          1st Qu.: 65.0       
##  Median :0.24       Median :108          Median :108.0       
##  Mean   :0.24       Mean   :123          Mean   :122.9       
##  3rd Qu.:0.24       3rd Qu.:166          3rd Qu.:166.0       
##  Max.   :0.24       Max.   :431          Max.   :434.0       
##                                                              
##  Pressure_Avg_Freq
##  Min.   :392.0    
##  1st Qu.:407.0    
##  Median :409.0    
##  Mean   :410.1    
##  3rd Qu.:413.0    
##  Max.   :428.0    
## 

Echamos un vistazo a la tabla signals_data. Hay varios campos que parecen no tener informacion valida.

summary(signals_data)
##   Turbine_ID         Timestamp          Gen_RPM_Max      Gen_RPM_Min    
##  Length:207905      Length:207905      Min.   :   0.0   Min.   :   0.0  
##  Class :character   Class :character   1st Qu.: 296.8   1st Qu.: 183.2  
##  Mode  :character   Mode  :character   Median :1405.2   Median :1231.1  
##                                        Mean   :1166.5   Mean   : 915.3  
##                                        3rd Qu.:1676.6   3rd Qu.:1342.2  
##                                        Max.   :2040.9   Max.   :1666.1  
##   Gen_RPM_Avg      Gen_RPM_Std     Gen_Bear_Temp_Avg Gen_Phase1_Temp_Avg
##  Min.   :   0.0   Min.   :  0.00   Min.   : 18.00    Min.   : 21.00     
##  1st Qu.: 255.4   1st Qu.: 13.70   1st Qu.: 34.00    1st Qu.: 41.00     
##  Median :1271.9   Median : 28.10   Median : 43.00    Median : 59.00     
##  Mean   :1043.7   Mean   : 67.17   Mean   : 45.39    Mean   : 62.71     
##  3rd Qu.:1571.0   3rd Qu.: 69.80   3rd Qu.: 54.00    3rd Qu.: 74.00     
##  Max.   :1683.7   Max.   :823.10   Max.   :205.00    Max.   :205.00     
##  Gen_Phase2_Temp_Avg Gen_Phase3_Temp_Avg Hyd_Oil_Temp_Avg
##  Min.   : 21.00      Min.   : 21.00      Min.   :22.00   
##  1st Qu.: 41.00      1st Qu.: 41.00      1st Qu.:29.00   
##  Median : 60.00      Median : 59.00      Median :33.00   
##  Mean   : 62.95      Mean   : 62.07      Mean   :35.19   
##  3rd Qu.: 75.00      3rd Qu.: 74.00      3rd Qu.:41.00   
##  Max.   :205.00      Max.   :205.00      Max.   :55.00   
##  Gear_Oil_Temp_Avg Gear_Bear_Temp_Avg  Nac_Temp_Avg    Rtr_RPM_Max   
##  Min.   :25.00     Min.   :23.00      Min.   :17.00   Min.   : 0.00  
##  1st Qu.:43.00     1st Qu.:44.00      1st Qu.:25.00   1st Qu.: 2.60  
##  Median :48.00     Median :53.00      Median :29.00   Median :12.40  
##  Mean   :47.58     Mean   :51.37      Mean   :29.43   Mean   :10.25  
##  3rd Qu.:53.00     3rd Qu.:60.00      3rd Qu.:33.00   3rd Qu.:14.80  
##  Max.   :61.00     Max.   :71.00      Max.   :50.00   Max.   :16.80  
##   Rtr_RPM_Min      Rtr_RPM_Avg     Amb_WindSpeed_Max Amb_WindSpeed_Min
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.40     Min.   : 0.400   
##  1st Qu.: 1.700   1st Qu.: 2.300   1st Qu.: 6.80     1st Qu.: 0.700   
##  Median :10.900   Median :11.300   Median :10.80     Median : 1.500   
##  Mean   : 8.014   Mean   : 9.166   Mean   :12.61     Mean   : 1.627   
##  3rd Qu.:11.900   3rd Qu.:13.900   3rd Qu.:17.60     3rd Qu.: 2.200   
##  Max.   :14.800   Max.   :14.900   Max.   :70.00     Max.   :14.600   
##  Amb_WindSpeed_Avg Amb_WindSpeed_Std Amb_WindDir_Relative_Avg
##  Min.   : 0.400    Min.   : 0.000    Min.   :-180.0000       
##  1st Qu.: 3.100    1st Qu.: 0.600    1st Qu.: -10.5000       
##  Median : 5.200    Median : 1.000    Median :   0.2000       
##  Mean   : 5.941    Mean   : 1.166    Mean   :   0.3694       
##  3rd Qu.: 8.100    3rd Qu.: 1.500    3rd Qu.:  10.0000       
##  Max.   :22.500    Max.   :55.100    Max.   : 180.0000       
##  Amb_WindDir_Abs_Avg  Amb_Temp_Avg   Prod_LatestAvg_ActPwrGen0
##  Min.   :  0.0       Min.   : 6.00   Min.   :-5027.0          
##  1st Qu.:100.8       1st Qu.:15.00   1st Qu.: -731.0          
##  Median :163.5       Median :19.00   Median :    0.0          
##  Mean   :183.2       Mean   :19.62   Mean   : -397.2          
##  3rd Qu.:272.5       3rd Qu.:24.00   3rd Qu.:    0.0          
##  Max.   :359.0       Max.   :41.00   Max.   :  796.0          
##  Prod_LatestAvg_ActPwrGen1 Prod_LatestAvg_ActPwrGen2
##  Min.   :  -736            Min.   :0                
##  1st Qu.:     0            1st Qu.:0                
##  Median : 34073            Median :0                
##  Mean   : 86771            Mean   :0                
##  3rd Qu.:141527            3rd Qu.:0                
##  Max.   :334398            Max.   :0                
##  Prod_LatestAvg_TotActPwr Prod_LatestAvg_ReactPwrGen0
##  Min.   : -5027           Min.   :-5046.0            
##  1st Qu.:  -675           1st Qu.: -246.0            
##  Median : 34052           Median :    0.0            
##  Mean   : 86374           Mean   : -289.7            
##  3rd Qu.:141526           3rd Qu.:    0.0            
##  Max.   :334398           Max.   :  500.0            
##  Prod_LatestAvg_ReactPwrGen1 Prod_LatestAvg_ReactPwrGen2
##  Min.   :-77957              Min.   :0                  
##  1st Qu.:-26894              1st Qu.:0                  
##  Median :-15572              Median :0                  
##  Mean   :-15112              Mean   :0                  
##  3rd Qu.:     0              3rd Qu.:0                  
##  Max.   :166806              Max.   :0                  
##  Prod_LatestAvg_TotReactPwr HVTrafo_Phase1_Temp_Avg
##  Min.   :-77957             Min.   : 28.00         
##  1st Qu.:-26896             1st Qu.: 50.00         
##  Median :-15582             Median : 62.00         
##  Mean   :-15402             Mean   : 61.65         
##  3rd Qu.: -1229             3rd Qu.: 71.00         
##  Max.   :166806             Max.   :104.00         
##  HVTrafo_Phase2_Temp_Avg HVTrafo_Phase3_Temp_Avg
##  Min.   : 28.00          Min.   : 27.00         
##  1st Qu.: 54.00          1st Qu.: 50.00         
##  Median : 69.00          Median : 67.00         
##  Mean   : 68.65          Mean   : 67.27         
##  3rd Qu.: 79.00          3rd Qu.: 80.00         
##  Max.   :119.00          Max.   :137.00         
##  Grd_InverterPhase1_Temp_Avg Cont_Top_Temp_Avg Cont_Hub_Temp_Avg
##  Min.   :29.00               Min.   :23.00     Min.   :14.0     
##  1st Qu.:35.00               1st Qu.:35.00     1st Qu.:25.0     
##  Median :39.00               Median :39.00     Median :29.0     
##  Mean   :38.11               Mean   :39.52     Mean   :29.4     
##  3rd Qu.:40.00               3rd Qu.:43.00     3rd Qu.:33.0     
##  Max.   :58.00               Max.   :58.00     Max.   :49.0     
##  Cont_VCP_Temp_Avg Gen_SlipRing_Temp_Avg Spin_Temp_Avg  
##  Min.   :26.00     Min.   : 12.00        Min.   : 8.00  
##  1st Qu.:36.00     1st Qu.: 23.00        1st Qu.:18.00  
##  Median :41.00     Median : 28.00        Median :22.00  
##  Mean   :40.05     Mean   : 29.52        Mean   :22.47  
##  3rd Qu.:44.00     3rd Qu.: 35.00        3rd Qu.:26.00  
##  Max.   :66.00     Max.   :205.00        Max.   :44.00  
##  Blds_PitchAngle_Min Blds_PitchAngle_Max Blds_PitchAngle_Avg
##  Min.   :-4.200      Min.   :-2.20       Min.   :-2.300     
##  1st Qu.:-2.400      1st Qu.: 0.20       1st Qu.:-1.700     
##  Median :-2.200      Median : 4.50       Median :-0.100     
##  Mean   : 6.391      Mean   :13.62       Mean   : 9.009     
##  3rd Qu.:20.800      3rd Qu.:24.00       3rd Qu.:24.000     
##  Max.   :90.000      Max.   :90.00       Max.   :90.000     
##  Blds_PitchAngle_Std Cont_VCP_ChokcoilTemp_Avg Grd_RtrInvPhase1_Temp_Avg
##  Min.   : 0.000      Min.   : 20.00            Min.   :29.0             
##  1st Qu.: 0.100      1st Qu.: 53.00            1st Qu.:35.0             
##  Median : 0.400      Median : 90.00            Median :39.0             
##  Mean   : 1.697      Mean   : 79.67            Mean   :38.4             
##  3rd Qu.: 1.000      3rd Qu.:101.00            3rd Qu.:41.0             
##  Max.   :45.000      Max.   :149.00            Max.   :59.0             
##  Grd_RtrInvPhase2_Temp_Avg Grd_RtrInvPhase3_Temp_Avg Cont_VCP_WtrTemp_Avg
##  Min.   :29.00             Min.   :29.00             Min.   :22.00       
##  1st Qu.:35.00             1st Qu.:35.00             1st Qu.:33.00       
##  Median :39.00             Median :38.00             Median :38.00       
##  Mean   :38.53             Mean   :38.46             Mean   :36.71       
##  3rd Qu.:41.00             3rd Qu.:41.00             3rd Qu.:39.00       
##  Max.   :60.00             Max.   :59.00             Max.   :54.00       
##  Grd_Prod_Pwr_Avg Grd_Prod_CosPhi_Avg Grd_Prod_Freq_Avg
##  Min.   : -30.1   Min.   :0.2000      Min.   : 0.0     
##  1st Qu.:  -4.1   1st Qu.:0.8000      1st Qu.:50.0     
##  Median : 204.3   Median :0.9000      Median :50.0     
##  Mean   : 518.2   Mean   :0.8718      Mean   :50.0     
##  3rd Qu.: 849.1   3rd Qu.:1.0000      3rd Qu.:50.0     
##  Max.   :2000.5   Max.   :1.0000      Max.   :50.1     
##  Grd_Prod_VoltPhse1_Avg Grd_Prod_VoltPhse2_Avg Grd_Prod_VoltPhse3_Avg
##  Min.   :  0.0          Min.   :  0.0          Min.   :  0.0         
##  1st Qu.:398.2          1st Qu.:396.4          1st Qu.:395.7         
##  Median :399.3          Median :397.7          Median :397.2         
##  Mean   :399.3          Mean   :397.7          Mean   :397.3         
##  3rd Qu.:400.4          3rd Qu.:398.9          3rd Qu.:398.8         
##  Max.   :419.7          Max.   :418.1          Max.   :419.7         
##  Grd_Prod_CurPhse1_Avg Grd_Prod_CurPhse2_Avg Grd_Prod_CurPhse3_Avg
##  Min.   :   1.0        Min.   :   1.0        Min.   :   1.0       
##  1st Qu.:  12.6        1st Qu.:  12.0        1st Qu.:  13.7       
##  Median : 195.8        Median : 219.7        Median : 201.0       
##  Mean   : 447.8        Mean   : 463.5        Mean   : 448.3       
##  3rd Qu.: 712.2        3rd Qu.: 739.2        3rd Qu.: 714.2       
##  Max.   :1699.8        Max.   :1712.8        Max.   :1688.4       
##  Grd_Prod_Pwr_Max Grd_Prod_Pwr_Min Grd_Busbar_Temp_Avg  Rtr_RPM_Std    
##  Min.   : -27.7   Min.   : -90.4   Min.   :20.00       Min.   :0.0000  
##  1st Qu.:  -2.5   1st Qu.: -24.8   1st Qu.:33.00       1st Qu.:0.1000  
##  Median : 402.7   Median :  50.5   Median :38.00       Median :0.3000  
##  Mean   : 736.4   Mean   : 294.7   Mean   :37.43       Mean   :0.6057  
##  3rd Qu.:1390.4   3rd Qu.: 392.2   3rd Qu.:42.00       3rd Qu.:0.7000  
##  Max.   :2120.6   Max.   :1996.1   Max.   :77.00       Max.   :7.4000  
##  Amb_WindSpeed_Est_Avg Grd_Prod_Pwr_Std  Grd_Prod_ReactPwr_Avg
##  Min.   : 0.000        Min.   :   0.00   Min.   :-467.90      
##  1st Qu.: 3.100        1st Qu.:   3.50   1st Qu.:-161.40      
##  Median : 5.300        Median :  56.50   Median : -93.50      
##  Mean   : 5.932        Mean   :  94.46   Mean   : -92.41      
##  3rd Qu.: 8.100        3rd Qu.: 149.00   3rd Qu.:  -7.40      
##  Max.   :22.100        Max.   :1009.60   Max.   :1000.00      
##  Grd_Prod_ReactPwr_Max Grd_Prod_ReactPwr_Min Grd_Prod_ReactPwr_Std
##  Min.   :-449.5        Min.   :-1001.3       Min.   :  0.00       
##  1st Qu.:-147.2        1st Qu.: -202.0       1st Qu.:  2.50       
##  Median : -74.6        Median : -129.1       Median :  4.10       
##  Mean   : -27.0        Mean   : -145.9       Mean   : 21.41       
##  3rd Qu.:  -0.4        3rd Qu.:  -26.6       3rd Qu.:  7.90       
##  Max.   :1003.3        Max.   :  997.6       Max.   :575.70       
##  Grd_Prod_PsblePwr_Avg Grd_Prod_PsblePwr_Max Grd_Prod_PsblePwr_Min
##  Min.   :   0.0        Min.   :   0.0        Min.   :   0.0       
##  1st Qu.:  19.6        1st Qu.:  75.7        1st Qu.:   0.0       
##  Median : 230.9        Median : 457.0        Median :  65.3       
##  Mean   : 552.5        Mean   : 771.1        Mean   : 318.3       
##  3rd Qu.: 921.5        3rd Qu.:1520.6        3rd Qu.: 417.6       
##  Max.   :2000.0        Max.   :2000.0        Max.   :2000.0       
##  Grd_Prod_PsblePwr_Std Grd_Prod_PsbleInd_Avg Grd_Prod_PsbleInd_Max
##  Min.   :  0.0         Min.   :-1000.0       Min.   :-1000.0      
##  1st Qu.: 12.7         1st Qu.: -985.8       1st Qu.: -583.3      
##  Median : 59.1         Median : -627.0       Median : -239.9      
##  Mean   : 96.9         Mean   : -534.6       Mean   : -368.7      
##  3rd Qu.:153.2         3rd Qu.:    0.0       3rd Qu.:    0.0      
##  Max.   :999.5         Max.   :    0.0       Max.   :  200.0      
##  Grd_Prod_PsbleInd_Min Grd_Prod_PsbleInd_Std Grd_Prod_PsbleCap_Avg
##  Min.   :-1000.0       Min.   :  0.00        Min.   :   0.0       
##  1st Qu.:-1000.0       1st Qu.:  0.00        1st Qu.:   0.0       
##  Median :-1000.0       Median : 16.20        Median : 508.7       
##  Mean   : -669.4       Mean   : 74.53        Mean   : 504.6       
##  3rd Qu.:    0.0       3rd Qu.:140.40        3rd Qu.: 965.1       
##  Max.   :    0.0       Max.   :498.90        Max.   :1000.0       
##  Grd_Prod_PsbleCap_Max Grd_Prod_PsbleCap_Min Grd_Prod_PsbleCap_Std
##  Min.   :   0.0        Min.   :-200.0        Min.   :  0.00       
##  1st Qu.:   0.0        1st Qu.:   0.0        1st Qu.:  0.00       
##  Median :1000.0        Median : 239.9        Median : 29.60       
##  Mean   : 657.8        Mean   : 322.7        Mean   : 81.33       
##  3rd Qu.:1000.0        3rd Qu.: 517.5        3rd Qu.:152.50       
##  Max.   :1000.0        Max.   :1000.0        Max.   :498.90       
##  Gen_Bear2_Temp_Avg Nac_Direction_Avg
##  Min.   : 20.00     Min.   :  0.0    
##  1st Qu.: 32.00     1st Qu.:102.8    
##  Median : 40.00     Median :184.8    
##  Mean   : 42.37     Mean   :189.9    
##  3rd Qu.: 51.00     3rd Qu.:278.8    
##  Max.   :205.00     Max.   :359.0
signals_data$Timestamp <- ymd_hms(signals_data$Timestamp)
signals_data$Turbine_ID <- as.factor(signals_data$Turbine_ID)
summary(signals_data$Timestamp) 
##                  Min.               1st Qu.                Median 
## "2016-01-01 00:00:00" "2016-03-31 07:20:00" "2016-06-29 17:30:00" 
##                  Mean               3rd Qu.                  Max. 
## "2016-07-01 11:35:06" "2016-10-01 23:50:00" "2016-12-31 23:50:00"
summary(signals_data$Turbine_ID) 
##   T01   T06   T07   T11 
## 52439 50575 52445 52446

Ahora tenemos que juntar las tres tablas. Las tablas metmast y signals no hay problema para el cruce. Lo hacemos por TimeStamp

metmast_signals_data <- signals_data %>%
                        left_join(metmast_data_1, by = 'Timestamp')

Exportamos los datasets a csv para su procesado en BIGml

write_csv(metmast_signals_data, "metmast_signals_data.csv")
write_csv(logs_data_1, "logs_data_1.csv")

Ahora vamos a unir el dataset metmast_signals_data con el dataset de logs

head(metmast_signals_data)
## # A tibble: 6 x 105
##   Turbine_ID Timestamp           Gen_RPM_Max Gen_RPM_Min Gen_RPM_Avg
##   <fct>      <dttm>                    <dbl>       <dbl>       <dbl>
## 1 T06        2016-04-01 00:00:00       1375.        289.       1044.
## 2 T11        2016-04-01 00:00:00       1359.       1235        1264.
## 3 T01        2016-04-01 00:00:00       1265.       1240.       1252.
## 4 T07        2016-04-01 00:00:00       1447        1247.       1300.
## 5 T06        2016-04-01 00:10:00       1361.       1224.       1271.
## 6 T01        2016-04-01 00:10:00       1277.       1230.       1250.
## # ... with 100 more variables: Gen_RPM_Std <dbl>, Gen_Bear_Temp_Avg <dbl>,
## #   Gen_Phase1_Temp_Avg <dbl>, Gen_Phase2_Temp_Avg <dbl>,
## #   Gen_Phase3_Temp_Avg <dbl>, Hyd_Oil_Temp_Avg <dbl>,
## #   Gear_Oil_Temp_Avg <dbl>, Gear_Bear_Temp_Avg <dbl>, Nac_Temp_Avg <dbl>,
## #   Rtr_RPM_Max <dbl>, Rtr_RPM_Min <dbl>, Rtr_RPM_Avg <dbl>,
## #   Amb_WindSpeed_Max <dbl>, Amb_WindSpeed_Min <dbl>,
## #   Amb_WindSpeed_Avg <dbl>, Amb_WindSpeed_Std <dbl>,
## #   Amb_WindDir_Relative_Avg <dbl>, Amb_WindDir_Abs_Avg <dbl>,
## #   Amb_Temp_Avg <dbl>, Prod_LatestAvg_ActPwrGen0 <dbl>,
## #   Prod_LatestAvg_ActPwrGen1 <dbl>, Prod_LatestAvg_ActPwrGen2 <dbl>,
## #   Prod_LatestAvg_TotActPwr <dbl>, Prod_LatestAvg_ReactPwrGen0 <dbl>,
## #   Prod_LatestAvg_ReactPwrGen1 <dbl>, Prod_LatestAvg_ReactPwrGen2 <dbl>,
## #   Prod_LatestAvg_TotReactPwr <dbl>, HVTrafo_Phase1_Temp_Avg <dbl>,
## #   HVTrafo_Phase2_Temp_Avg <dbl>, HVTrafo_Phase3_Temp_Avg <dbl>,
## #   Grd_InverterPhase1_Temp_Avg <dbl>, Cont_Top_Temp_Avg <dbl>,
## #   Cont_Hub_Temp_Avg <dbl>, Cont_VCP_Temp_Avg <dbl>,
## #   Gen_SlipRing_Temp_Avg <dbl>, Spin_Temp_Avg <dbl>,
## #   Blds_PitchAngle_Min <dbl>, Blds_PitchAngle_Max <dbl>,
## #   Blds_PitchAngle_Avg <dbl>, Blds_PitchAngle_Std <dbl>,
## #   Cont_VCP_ChokcoilTemp_Avg <dbl>, Grd_RtrInvPhase1_Temp_Avg <dbl>,
## #   Grd_RtrInvPhase2_Temp_Avg <dbl>, Grd_RtrInvPhase3_Temp_Avg <dbl>,
## #   Cont_VCP_WtrTemp_Avg <dbl>, Grd_Prod_Pwr_Avg <dbl>,
## #   Grd_Prod_CosPhi_Avg <dbl>, Grd_Prod_Freq_Avg <dbl>,
## #   Grd_Prod_VoltPhse1_Avg <dbl>, Grd_Prod_VoltPhse2_Avg <dbl>,
## #   Grd_Prod_VoltPhse3_Avg <dbl>, Grd_Prod_CurPhse1_Avg <dbl>,
## #   Grd_Prod_CurPhse2_Avg <dbl>, Grd_Prod_CurPhse3_Avg <dbl>,
## #   Grd_Prod_Pwr_Max <dbl>, Grd_Prod_Pwr_Min <dbl>,
## #   Grd_Busbar_Temp_Avg <dbl>, Rtr_RPM_Std <dbl>,
## #   Amb_WindSpeed_Est_Avg <dbl>, Grd_Prod_Pwr_Std <dbl>,
## #   Grd_Prod_ReactPwr_Avg <dbl>, Grd_Prod_ReactPwr_Max <dbl>,
## #   Grd_Prod_ReactPwr_Min <dbl>, Grd_Prod_ReactPwr_Std <dbl>,
## #   Grd_Prod_PsblePwr_Avg <dbl>, Grd_Prod_PsblePwr_Max <dbl>,
## #   Grd_Prod_PsblePwr_Min <dbl>, Grd_Prod_PsblePwr_Std <dbl>,
## #   Grd_Prod_PsbleInd_Avg <dbl>, Grd_Prod_PsbleInd_Max <dbl>,
## #   Grd_Prod_PsbleInd_Min <dbl>, Grd_Prod_PsbleInd_Std <dbl>,
## #   Grd_Prod_PsbleCap_Avg <dbl>, Grd_Prod_PsbleCap_Max <dbl>,
## #   Grd_Prod_PsbleCap_Min <dbl>, Grd_Prod_PsbleCap_Std <dbl>,
## #   Gen_Bear2_Temp_Avg <dbl>, Nac_Direction_Avg <dbl>,
## #   Min_Windspeed1 <dbl>, Max_Windspeed1 <dbl>, Avg_Windspeed1 <dbl>,
## #   Var_Windspeed1 <dbl>, Min_Windspeed2 <dbl>, Max_Windspeed2 <dbl>,
## #   Avg_Windspeed2 <dbl>, Var_Windspeed2 <dbl>, Min_AmbientTemp <dbl>,
## #   Max_AmbientTemp <dbl>, Avg_AmbientTemp <dbl>, Min_Pressure <dbl>,
## #   Max_Pressure <dbl>, Avg_Pressure <dbl>, Min_Humidity <dbl>,
## #   Max_Humidity <dbl>, Avg_Humidity <dbl>, Anemometer1_Freq <dbl>,
## #   Anemometer1_Offset <dbl>, Anemometer1_Avg_Freq <dbl>,
## #   Anemometer2_Avg_Freq <dbl>, Pressure_Avg_Freq <dbl>
head(logs_data_1)
## # A tibble: 6 x 2
##   TimeDetected        UnitTitle
##   <dttm>              <fct>    
## 1 2016-04-01 00:01:12 T01      
## 2 2016-04-01 00:12:27 T01      
## 3 2016-04-01 00:21:37 T11      
## 4 2016-04-01 00:22:54 T06      
## 5 2016-04-01 00:23:09 T07      
## 6 2016-04-01 00:33:30 T11
logs_data_2 <- logs_data_1 %>% 
                mutate(TimeDetected_2 = TimeDetected) 

logs_data_2$TimeDetected_2 <- as.character(logs_data_2$TimeDetected_2)

logs_data_3 <- logs_data_2

logs_data_3$TimeDetected_2 <- str_sub(logs_data_3$TimeDetected_2, 1, str_length(logs_data_3$TimeDetected_2)-4)

logs_data_3$string <- as.character("0:00")

logs_data_4 <- unite(logs_data_3, Timestamp, c(TimeDetected_2, string), sep = "") %>%
                    rename(Turbine_ID = UnitTitle)

El Timestamp creado hay que convertirlo en formato Date-Time

logs_data_4$Timestamp <- ymd_hms(logs_data_4$Timestamp)
logs_data_4$error <- as.character("1")
head(logs_data_4)
## # A tibble: 6 x 4
##   TimeDetected        Turbine_ID Timestamp           error
##   <dttm>              <fct>      <dttm>              <chr>
## 1 2016-04-01 00:01:12 T01        2016-04-01 00:00:00 1    
## 2 2016-04-01 00:12:27 T01        2016-04-01 00:10:00 1    
## 3 2016-04-01 00:21:37 T11        2016-04-01 00:20:00 1    
## 4 2016-04-01 00:22:54 T06        2016-04-01 00:20:00 1    
## 5 2016-04-01 00:23:09 T07        2016-04-01 00:20:00 1    
## 6 2016-04-01 00:33:30 T11        2016-04-01 00:30:00 1

Ahora solo queda unirla con el dataset anterior

metmast_signals_logs_data <- metmast_signals_data %>%
                        left_join(logs_data_4, by = c('Timestamp', 'Turbine_ID'))

Vamos a hacer una aproximacion. Vamos a eliminar errores repetidos. Esto lo que hara es si en una turbina en particular hubo 2 o mas errores en los rangos de 10 minutos solo quedara un registro

metmast_signals_logs_data <- metmast_signals_data %>%
                        left_join(logs_data_4, by = c('Timestamp', 'Turbine_ID'))
metmast_signals_logs_data_1 <- metmast_signals_logs_data

metmast_signals_logs_data_2 <- metmast_signals_logs_data_1 %>% distinct(Timestamp, Turbine_ID, .keep_all = TRUE)

Archivos de test

test_metmast <- read_xlsx(path = "./wind-farm-1-metmast-testing.xlsx")

test_metmast_data_1 <- test_metmast %>%
                  select(-Anemometer1_CorrOffset,
                         -Anemometer2_Freq, 
                         -Anemometer2_Offset, 
                         -Anemometer2_CorrGain,
                         -Anemometer2_CorrOffset,
                         -DistanceAirPress,
                         -AirRessureSensorZeroOffset,
                         -Min_Precipitation,
                         -Max_Precipitation,
                         -Avg_Precipitation,
                         -Min_Raindetection,
                         -Avg_Raindetection,
                         -Max_Raindetection,
                         -Min_Winddirection2,
                         -Max_Winddirection2,
                         -Avg_Winddirection2,
                         -Var_Winddirection2,
                         -Anemometer1_CorrGain,
                         )


test_signals <- read_xlsx(path = "./wind-farm-1-signals-testing.xlsx")

test_metmast_signals_data <- test_signals %>%
                        left_join(test_metmast_data_1, by = 'Timestamp')
write_csv(test_metmast_signals_data, "test_metmast_data_1.csv")