knitr::opts_chunk$set(echo = TRUE,cache = TRUE,autodep = TRUE)
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
Loading the data sets into a statistical programming language. In this case we are using R programming language.
R version 3.4.1 (2017-06-30) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows >= 8 x64 (build 9200)
We are using R Studio as an IDE for R. All the packages attached are listed with the code
setwd("D:/UIC/MyResearch")
library(readxl)
library(magrittr)
library(dplyr)
Attaching f in prefix because loading the name in dataset
path="D:/UIC/MyResearch/Pumps/KIRLOSKER PUMP BREAKDOWN DETAILS (1).xlsx"
for(i in 1:length(excel_sheets(path))){
assign(x =paste0("f",excel_sheets(path)[i]), value =read_excel(path = path,sheet = excel_sheets(path)[i]))
}
# drop a column
fP301A<-within(fP301A,rm("System status"))
Kirlosker<-rbind(fAGA951,fAGA952A,fAGA955,fCGA201A,fCGA302A,f11P19A,fP301A)
Let’s identify the equipments notifn
knitr::kable(table(Kirlosker %>% group_by(`Notifictn type` ) %>% select(`Notifictn type`,Equipment) ,dnn = c("Notification Type","Freq equipment")),caption="Matrix for Pumps and Notification")
11P19A | AGA951 | AGA952A | AGA955 | CGA201A | P301A | UGA302A | |
---|---|---|---|---|---|---|---|
M1 | 12 | 5 | 12 | 11 | 1 | 3 | 0 |
Z1 | 0 | 0 | 0 | 0 | 7 | 8 | 8 |
Z1 category is preventice maintenance. Thus we see that if a pump is maintained preventively chances are less of suffering a fault.
# To automate the extraction of sheets from excel
path="D:/UIC/MyResearch/Pumps/KSB PUMPS BREAKDOWN RECORD (2).xlsx"
for(i in 1:length(excel_sheets(path))){
assign(x =excel_sheets(path)[i], value =read_excel(path = path,sheet = excel_sheets(path)[i]))
}
# Rbinding difficult due to different names
`11P24A`<-within(`11P24A`,rm("System status"))
P701A<-within(P701A,rm("System status"))
UGA1107A<-within(UGA1107A,rm("System status"))
colnames(P901A)<-names(P701A)
# Now again aggregating all the data from different sheets and putting them in a KSB dataset
KSB<-rbind(`11P24A`,P701A,P702A,P2001A,P901A,UGA1107A,UGA2201A)
knitr::kable(table(KSB %>% group_by(`Notifictn type` ) %>% select(`Notifictn type`,Equipment) ,dnn = c("Notification Type","Freq equipment")),caption="Matrix for Pumps and Notification")
21P24A | AGA901A | P2001A | P701A | P702A | UGA1107A | UGA2201A | |
---|---|---|---|---|---|---|---|
M1 | 1 | 4 | 4 | 2 | 3 | 0 | 9 |
Z1 | 0 | 3 | 0 | 0 | 9 | 8 | 7 |
Z3 | 6 | 0 | 0 | 5 | 0 | 0 | 0 |
path="D:/UIC/MyResearch/Pumps/MAINTENANCE HISTORY - compressor and turbile- K1&TK1.xlsx"
for(i in 1:length(excel_sheets(path))){
assign(x =paste0("c",excel_sheets(path)[i]), value =read_excel(path = path,sheet = excel_sheets(path)[i]))
}
# All these pumps have extra atrributes which 1st are not common across all pumps and secondly
# may not be relevant in analyzing the data
# What is important is the notification date because that might help us in knowing the frequency
# The third data set is of compressor and turbine. We will be stripping of unwanted columns
# Let's us change the names of columns so they are in a readable format
# X_3 is useful in 11ko1. We ll rename the column to Execution date
c11K01<-rename(c11K01,Execution.date=X__3)
cP607C<-rename(cP607C,Execution.date=X__1)
# In the equipment the equipment column should have a constant name throughout the dataset
c11K01$Equipment<-rep("11K01",nrow(c11K01))
cK431$Equipment<-rep("K431",nrow(cK431))
Compar<-data.frame(Ko1=colnames(c11K01))
Compar[14,]<-rep(x = NA,1)
Compar1<-data.frame(TKo1=colnames(c11TK01))
Compar1[11:14,]<-rep(x = NA,4)
Compar2<-data.frame(P607C=colnames(cP607C))
Compar3<-data.frame(K431=colnames(cK431))
Compar3[13:14,]<-rep(x = NA,2)
NetNames<-cbind(Compar,Compar1,Compar2,Compar3)
knitr::kable(NetNames,caption = "column names")
Ko1 | TKo1 | P607C | K431 |
---|---|---|---|
Notifictn type | Notifictn type | Notifictn type | Notifictn type |
Notification | Notification | Notification | Notification |
Notif.date | Notif.date | Notif.date | Notif.date |
Order | Order | Order | Order |
Equipment | Equipment | Equipment | Equipment |
Description | Description | Description | Description |
Functional loc. | Functional loc. | Functional loc. | Functional loc. |
System status | System status | System status | System status |
X__1 | User status | Execution.date | User status |
X__2 | Main WorkCtr | X__2 | Execution Date |
Execution.date | NA | User status | Job Details |
X__4 | NA | PartnerResp. | Main WorkCtr |
PartnerResp. | NA | DescEmpl.Resp. | NA |
NA | NA | Main WorkCtr | NA |
rm(Compar,Compar1,Compar2,Compar3)
# We will delete the unwanted variables from the dataset
# From the netnames we see that first 8 variables are common along the dataset. We will delete the rest
for(i in 1:length(excel_sheets(path))){
assign(x=paste0("c",excel_sheets(path)[i]),value=get(paste0("c",excel_sheets(path)[i]))[,1:8])
}
# They have different date formats so we need to convert to a common date format
#The below code is not working. The dates format are not proper
# ?c11K01$Notif.date<-as.Date(c11K01$Notif.date,format = "%d.%m.%y")
# ?c11TK01$Notif.date<-as.Date(c11TK01$Notif.date,format = "%d.%m.%y")
c11K01$Notif.date<-lubridate::dmy(c11K01$Notif.date)
c11TK01$Notif.date<-lubridate::dmy(c11TK01$Notif.date)
cP607C$Notif.date<-as.Date(cP607C$Notif.date,format = "%d.%m.%y")
cK431$Notif.date<-as.Date(cK431$Notif.date,format = "%d.%m.%y")
# Comnbining for analysis
CompTurbine<-rbind(get(paste0("c",excel_sheets(path)[1])),get(paste0("c",excel_sheets(path)[2])),get(paste0("c",excel_sheets(path)[3])),get(paste0("c",excel_sheets(path)[4])))
knitr::kable(table(CompTurbine %>% group_by(`Notifictn type` ) %>% select(`Notifictn type`,Equipment) ,dnn = c("Notification Type","Freq equipment")),caption="Matrix for Pumps and Notification")
11K01 | 11TK01 | K431 | P607C | |
---|---|---|---|---|
M1 | 9 | 50 | 16 | 10 |
Z1 | 0 | 0 | 0 | 8 |
Z3 | 6 | 30 | 0 | 0 |
CompTurbine %>% group_by(`Notifictn type`,Equipment ) %>% select(`Notifictn type`,Equipment) %>% tally()
## # A tibble: 7 x 3
## # Groups: Notifictn type [?]
## `Notifictn type` Equipment n
## <chr> <chr> <int>
## 1 M1 11K01 9
## 2 M1 11TK01 50
## 3 M1 K431 16
## 4 M1 P607C 10
## 5 Z1 P607C 8
## 6 Z3 11K01 6
## 7 Z3 11TK01 30
It is assumed that installation date of every pump is to be considered the previous year’s date in April
4 dates were formatted in excel itself before loading. Their format was changed from %m/%d/%y and %m.%d.y
We will arrange the dates in descending order ( in the order in which they are listed ) and then group them according to the notification type
Kirlosker$Notif.date<-lubridate::dmy(Kirlosker$Notif.date)
KSB$Notif.date<-lubridate::dmy(KSB$Notif.date)
After converting all the dates in a common format, we will
Analyzing the Kirlosker dataset first
dates_from_subtd<-Kirlosker %>% group_by(Equipment) %>% summarise(Date=min(Notif.date))
#For every pump we got the min date and hence subtracting one year from the date we can get our date for analysis
date_installed<-as.data.frame(paste((lubridate::year(dates_from_subtd$Date)-1),"04-01",sep="-"))
colnames(date_installed)<-"Date Installed"
dates_from_subtd<-cbind(dates_from_subtd,date_installed)
rm(date_installed)
dates_from_subtd<-dates_from_subtd[,c(1,3)]
Kirlosker<-left_join(Kirlosker,dates_from_subtd)
## Joining, by = "Equipment"
rm(dates_from_subtd)
dates_from_subtd<-Kirlosker %>% select(Equipment,Notif.date,`Date Installed`)
dates_from_subtd$`Date Installed`<-as.Date(dates_from_subtd$`Date Installed`)
dates_from_subtd<-dates_from_subtd %>% mutate(diff_date=(Notif.date - `Date Installed`))
dates_from_subtd %>% group_by(Equipment) %>% summarise(Date=min(diff_date))
## # A tibble: 7 x 2
## Equipment Date
## <chr> <time>
## 1 11P19A 330 days
## 2 AGA951 322 days
## 3 AGA952A 414 days
## 4 AGA955 365 days
## 5 CGA201A 556 days
## 6 P301A 308 days
## 7 UGA302A 576 days
The above table gives us the the days between which the pump had fault for the first time
Can use %in% operator to selectively filter some of the values
Kirlosker<-Kirlosker %>% select(`Notifictn type`,Equipment,Notif.date,`Date Installed`)
Kirlosker<-Kirlosker %>% group_by(Equipment,Notif.date) %>% arrange(Equipment,desc(Notif.date))
Kirlosker<-as.data.frame(Kirlosker)
Kirlosker<-Kirlosker %>% mutate(diff_days=Notif.date -lead(Notif.date))
Kirlosker$diff_days<-as.numeric(Kirlosker$diff_days)
for(i in 1:(nrow(Kirlosker)-1)){
if(Kirlosker$diff_days[i]<0)
{
Kirlosker$diff_days[i]<--1
}
else{
Kirlosker$diff_days[i]<-Kirlosker$diff_days[i]
}
}
We need to manually put the dates which are set to zero because it has to be subtracted from the data installed
Although we can easily replace it with few rows I want to write code which is generic For this we have to know how many rows each category has and will then replace them will the last one because they are the ones which are zero or NA
# We get the count of each equipment and we know that last we have to calculate manually
Kirlosker[is.na(Kirlosker[,5]),5]<--1
# Now wherever there are zeroes we can replace them with the date installed subtracted from the first fault occurence
Kirlosker$`Date Installed`<-as.Date(Kirlosker$`Date Installed`)
for(i in 1:nrow(Kirlosker)){
if(Kirlosker[i,5]==-1){
Kirlosker[i,5]<-Kirlosker[i,3]-Kirlosker[i,4]
}
}
dates_from_subtd<-KSB%>% group_by(Equipment) %>% summarise(Date=min(Notif.date))
#For every pump we got the min date and hence subtracting one year from the date we can get our date for analysis
date_installed<-as.data.frame(paste((lubridate::year(dates_from_subtd$Date)-1),"04-01",sep="-"))
colnames(date_installed)<-"Date Installed"
dates_from_subtd<-cbind(dates_from_subtd,date_installed)
rm(date_installed)
dates_from_subtd<-dates_from_subtd[,c(1,3)]
KSB<-left_join(KSB,dates_from_subtd)
## Joining, by = "Equipment"
rm(dates_from_subtd)
dates_from_subtd<-KSB %>% select(Equipment,Notif.date,`Date Installed`)
dates_from_subtd$`Date Installed`<-as.Date(dates_from_subtd$`Date Installed`)
dates_from_subtd<-dates_from_subtd %>% mutate(diff_date=(Notif.date - `Date Installed`))
dates_from_subtd %>% group_by(Equipment) %>% summarise(Date=min(diff_date))
## # A tibble: 7 x 2
## Equipment Date
## <chr> <time>
## 1 21P24A 332 days
## 2 AGA901A 380 days
## 3 P2001A 377 days
## 4 P701A 330 days
## 5 P702A 287 days
## 6 UGA1107A 448 days
## 7 UGA2201A 321 days
The above table gives us the the days between which the pump had fault for the first time
Can use %in% operator to selectively filter some of the values
KSB<-KSB %>% select(`Notifictn type`,Equipment,Notif.date,`Date Installed`)
KSB<-KSB %>% group_by(Equipment,Notif.date) %>% arrange(Equipment,desc(Notif.date))
KSB<-as.data.frame(KSB)
# to calculate difference between successive defaults
KSB<-KSB %>% mutate(diff_days=Notif.date -lead(Notif.date))
KSB$diff_days<-as.numeric(KSB$diff_days)
# We are running the loop max-1 because there is no lead for the last item which generates NA's and we know NA's cause lot of troubles in R
for(i in 1:(nrow(KSB)-1)){
if(KSB$diff_days[i] < 0)
{
KSB$diff_days[i]<--1
}
else{
KSB$diff_days[i]<-KSB$diff_days[i]
}
}
We need to manually put the dates which are set to zero because it has to be subtracted from the data installed
Although we can easily replace it with few rows I want to write code which is generic For this we have to know how many rows each category has and will then replace them will the last one because they are the ones which are zero or NA
# setting the last NA to zero
KSB[is.na(KSB[,5]),5]<--1
# Now wherever there are zeroes we can replace them with the date installed subtracted from the first fault occurence
KSB$`Date Installed`<-as.Date(KSB$`Date Installed`)
for(i in 1:nrow(KSB)){
if(KSB[i,5]==-1){
KSB[i,5]<-KSB[i,3]-KSB[i,4]
}
}
NOTE: It might be a wrong practice to carry the dates installed as it is carrying the same data.
We see that CompTurbine the dates are not arranged in a strictly decreasing order within a category.
dates_from_subtd<-CompTurbine%>% group_by(Equipment) %>% summarise(Date=min(Notif.date))
#For every pump we got the min date and hence subtracting one year from the date we can get our date for analysis
date_installed<-as.data.frame(paste((lubridate::year(dates_from_subtd$Date)-1),"04-01",sep="-"))
colnames(date_installed)<-"Date Installed"
dates_from_subtd<-cbind(dates_from_subtd,date_installed)
rm(date_installed)
dates_from_subtd<-dates_from_subtd[,c(1,3)]
CompTurbine<-left_join(CompTurbine,dates_from_subtd)
## Joining, by = "Equipment"
rm(dates_from_subtd)
dates_from_subtd<-CompTurbine %>% select(Equipment,Notif.date,`Date Installed`)
dates_from_subtd$`Date Installed`<-as.Date(dates_from_subtd$`Date Installed`)
dates_from_subtd<-dates_from_subtd %>% mutate(diff_date=(Notif.date - `Date Installed`))
dates_from_subtd %>% group_by(Equipment) %>% summarise(Date=min(diff_date))
## # A tibble: 4 x 2
## Equipment Date
## <chr> <time>
## 1 11K01 362
## 2 11TK01 294
## 3 K431 365
## 4 P607C 440
The above table gives us the the days between which the pump had fault for the first time
Can use %in% operator to selectively filter some of the values
CompTurbine<-CompTurbine %>% select(`Notifictn type`,Equipment,Notif.date,`Date Installed`)
CompTurbine<-CompTurbine %>% group_by(Equipment,Notif.date) %>% arrange(Equipment,desc(Notif.date))
CompTurbine<-as.data.frame(CompTurbine)
# to calculate difference between successive defaults
CompTurbine<-CompTurbine %>% mutate(diff_days=Notif.date -lead(Notif.date))
CompTurbine$diff_days<-as.numeric(CompTurbine$diff_days)
# We are running the loop max-1 because there is no lead for the last item which generates NA's and we know NA's cause lot of troubles in R
for(i in 1:(nrow(CompTurbine)-1)){
if(CompTurbine$diff_days[i] < 0)
{
CompTurbine$diff_days[i]<--1
}
else{
CompTurbine$diff_days[i]<-CompTurbine$diff_days[i]
}
}
We need to manually put the dates which are set to zero because it has to be subtracted from the data installed
Although we can easily replace it with few rows I want to write code which is generic For this we have to know how many rows each category has and will then replace them will the last one because they are the ones which are zero or NA
# setting the last NA to zero
CompTurbine[is.na(CompTurbine[,5]),5]<--1
# Now wherever there are zeroes we can replace them with the date installed subtracted from the first fault occurence
CompTurbine$`Date Installed`<-as.Date(CompTurbine$`Date Installed`)
for(i in 1:nrow(CompTurbine)){
if(CompTurbine[i,5]==-1){
CompTurbine[i,5]<-CompTurbine[i,3]-CompTurbine[i,4]
}
}
NOTE: It might be a wrong practice to carry the dates installed as it is carrying the same data.
After calculating the fault frequency in terms of days for all the three equipments let us put the output in a nice format
knitr::kable(Kirlosker,caption = 'Kirlosker')
Notifictn type | Equipment | Notif.date | Date Installed | diff_days |
---|---|---|---|---|
M1 | 11P19A | 2017-05-18 | 2009-04-01 | 1074 |
M1 | 11P19A | 2014-06-09 | 2009-04-01 | 154 |
M1 | 11P19A | 2014-01-06 | 2009-04-01 | 113 |
M1 | 11P19A | 2013-09-15 | 2009-04-01 | 52 |
M1 | 11P19A | 2013-07-25 | 2009-04-01 | 172 |
M1 | 11P19A | 2013-02-03 | 2009-04-01 | 85 |
M1 | 11P19A | 2012-11-10 | 2009-04-01 | 101 |
M1 | 11P19A | 2012-08-01 | 2009-04-01 | 14 |
M1 | 11P19A | 2012-07-18 | 2009-04-01 | 65 |
M1 | 11P19A | 2012-05-14 | 2009-04-01 | 389 |
M1 | 11P19A | 2011-04-21 | 2009-04-01 | 420 |
M1 | 11P19A | 2010-02-25 | 2009-04-01 | 330 |
M1 | AGA951 | 2017-01-11 | 2010-04-01 | 391 |
M1 | AGA951 | 2015-12-17 | 2010-04-01 | 576 |
M1 | AGA951 | 2014-05-20 | 2010-04-01 | 664 |
M1 | AGA951 | 2012-07-25 | 2010-04-01 | 524 |
M1 | AGA951 | 2011-02-17 | 2010-04-01 | 322 |
M1 | AGA952A | 2016-10-28 | 2008-04-01 | 27 |
M1 | AGA952A | 2016-10-01 | 2008-04-01 | 90 |
M1 | AGA952A | 2016-07-03 | 2008-04-01 | 122 |
M1 | AGA952A | 2016-03-03 | 2008-04-01 | 629 |
M1 | AGA952A | 2014-06-13 | 2008-04-01 | 314 |
M1 | AGA952A | 2013-08-03 | 2008-04-01 | 9 |
M1 | AGA952A | 2013-07-25 | 2008-04-01 | 562 |
M1 | AGA952A | 2012-01-10 | 2008-04-01 | 113 |
M1 | AGA952A | 2011-09-19 | 2008-04-01 | 767 |
M1 | AGA952A | 2009-08-13 | 2008-04-01 | 73 |
M1 | AGA952A | 2009-06-01 | 2008-04-01 | 12 |
M1 | AGA952A | 2009-05-20 | 2008-04-01 | 414 |
M1 | AGA955 | 2017-06-07 | 2008-04-01 | 300 |
M1 | AGA955 | 2016-08-11 | 2008-04-01 | 246 |
M1 | AGA955 | 2015-12-09 | 2008-04-01 | 195 |
M1 | AGA955 | 2015-05-28 | 2008-04-01 | 114 |
M1 | AGA955 | 2015-02-03 | 2008-04-01 | 159 |
M1 | AGA955 | 2014-08-28 | 2008-04-01 | 1179 |
M1 | AGA955 | 2011-06-06 | 2008-04-01 | 298 |
M1 | AGA955 | 2010-08-12 | 2008-04-01 | 285 |
M1 | AGA955 | 2009-10-31 | 2008-04-01 | 211 |
M1 | AGA955 | 2009-04-03 | 2008-04-01 | 2 |
M1 | AGA955 | 2009-04-01 | 2008-04-01 | 365 |
Z1 | CGA201A | 2017-01-02 | 2008-04-01 | 621 |
M1 | CGA201A | 2015-04-22 | 2008-04-01 | 194 |
Z1 | CGA201A | 2014-10-10 | 2008-04-01 | 240 |
Z1 | CGA201A | 2014-02-12 | 2008-04-01 | 544 |
Z1 | CGA201A | 2012-08-17 | 2008-04-01 | 302 |
Z1 | CGA201A | 2011-10-20 | 2008-04-01 | 367 |
Z1 | CGA201A | 2010-10-18 | 2008-04-01 | 374 |
Z1 | CGA201A | 2009-10-09 | 2008-04-01 | 556 |
M1 | P301A | 2017-02-16 | 2009-04-01 | 49 |
Z1 | P301A | 2016-12-29 | 2009-04-01 | 324 |
Z1 | P301A | 2016-02-09 | 2009-04-01 | 288 |
Z1 | P301A | 2015-04-27 | 2009-04-01 | 0 |
M1 | P301A | 2015-04-27 | 2009-04-01 | 441 |
Z1 | P301A | 2014-02-10 | 2009-04-01 | 368 |
Z1 | P301A | 2013-02-07 | 2009-04-01 | 372 |
M1 | P301A | 2012-02-01 | 2009-04-01 | 16 |
Z1 | P301A | 2012-01-16 | 2009-04-01 | 349 |
Z1 | P301A | 2011-02-01 | 2009-04-01 | 363 |
Z1 | P301A | 2010-02-03 | 2009-04-01 | 308 |
Z1 | UGA302A | 2016-05-24 | 2008-04-01 | 320 |
Z1 | UGA302A | 2015-07-09 | 2008-04-01 | 351 |
Z1 | UGA302A | 2014-07-23 | 2008-04-01 | 392 |
Z1 | UGA302A | 2013-06-26 | 2008-04-01 | 347 |
Z1 | UGA302A | 2012-07-14 | 2008-04-01 | 369 |
Z1 | UGA302A | 2011-07-11 | 2008-04-01 | 373 |
Z1 | UGA302A | 2010-07-03 | 2008-04-01 | 247 |
Z1 | UGA302A | 2009-10-29 | 2008-04-01 | 576 |
knitr::kable(KSB,caption = 'KSB')
Notifictn type | Equipment | Notif.date | Date Installed | diff_days |
---|---|---|---|---|
Z3 | 21P24A | 2016-10-25 | 2008-04-01 | 976 |
Z3 | 21P24A | 2014-02-22 | 2008-04-01 | 676 |
Z3 | 21P24A | 2012-04-17 | 2008-04-01 | 353 |
Z3 | 21P24A | 2011-04-30 | 2008-04-01 | 387 |
Z3 | 21P24A | 2010-04-08 | 2008-04-01 | 382 |
M1 | 21P24A | 2009-03-22 | 2008-04-01 | 23 |
Z3 | 21P24A | 2009-02-27 | 2008-04-01 | 332 |
Z1 | AGA901A | 2017-04-07 | 2008-04-01 | 367 |
Z1 | AGA901A | 2016-04-05 | 2008-04-01 | 780 |
M1 | AGA901A | 2014-02-15 | 2008-04-01 | 304 |
M1 | AGA901A | 2013-04-17 | 2008-04-01 | 489 |
M1 | AGA901A | 2011-12-15 | 2008-04-01 | 358 |
M1 | AGA901A | 2010-12-22 | 2008-04-01 | 615 |
Z1 | AGA901A | 2009-04-16 | 2008-04-01 | 380 |
M1 | P2001A | 2017-05-29 | 2010-04-01 | 24 |
M1 | P2001A | 2017-05-05 | 2010-04-01 | 710 |
M1 | P2001A | 2015-05-26 | 2010-04-01 | 1504 |
M1 | P2001A | 2011-04-13 | 2010-04-01 | 377 |
Z3 | P701A | 2016-10-18 | 2008-04-01 | 711 |
M1 | P701A | 2014-11-07 | 2008-04-01 | 242 |
M1 | P701A | 2014-03-10 | 2008-04-01 | 703 |
Z3 | P701A | 2012-04-06 | 2008-04-01 | 343 |
Z3 | P701A | 2011-04-29 | 2008-04-01 | 384 |
Z3 | P701A | 2010-04-10 | 2008-04-01 | 409 |
Z3 | P701A | 2009-02-25 | 2008-04-01 | 330 |
Z1 | P702A | 2017-02-24 | 2008-04-01 | 232 |
M1 | P702A | 2016-07-07 | 2008-04-01 | 26 |
M1 | P702A | 2016-06-11 | 2008-04-01 | 5 |
M1 | P702A | 2016-06-06 | 2008-04-01 | 125 |
Z1 | P702A | 2016-02-02 | 2008-04-01 | 376 |
Z1 | P702A | 2015-01-22 | 2008-04-01 | 365 |
Z1 | P702A | 2014-01-22 | 2008-04-01 | 330 |
Z1 | P702A | 2013-02-26 | 2008-04-01 | 473 |
Z1 | P702A | 2011-11-11 | 2008-04-01 | 330 |
Z1 | P702A | 2010-12-16 | 2008-04-01 | 315 |
Z1 | P702A | 2010-02-04 | 2008-04-01 | 387 |
Z1 | P702A | 2009-01-13 | 2008-04-01 | 287 |
Z1 | UGA1107A | 2017-04-12 | 2008-04-01 | 190 |
Z1 | UGA1107A | 2016-10-04 | 2008-04-01 | 599 |
Z1 | UGA1107A | 2015-02-13 | 2008-04-01 | 663 |
Z1 | UGA1107A | 2013-04-21 | 2008-04-01 | 408 |
Z1 | UGA1107A | 2012-03-09 | 2008-04-01 | 283 |
Z1 | UGA1107A | 2011-05-31 | 2008-04-01 | 354 |
Z1 | UGA1107A | 2010-06-11 | 2008-04-01 | 353 |
Z1 | UGA1107A | 2009-06-23 | 2008-04-01 | 448 |
M1 | UGA2201A | 2017-06-01 | 2008-04-01 | 415 |
Z1 | UGA2201A | 2016-04-12 | 2008-04-01 | 357 |
Z1 | UGA2201A | 2015-04-21 | 2008-04-01 | 313 |
M1 | UGA2201A | 2014-06-12 | 2008-04-01 | 23 |
M1 | UGA2201A | 2014-05-20 | 2008-04-01 | 13 |
Z1 | UGA2201A | 2014-05-07 | 2008-04-01 | 332 |
M1 | UGA2201A | 2013-06-09 | 2008-04-01 | 16 |
Z1 | UGA2201A | 2013-05-24 | 2008-04-01 | 252 |
M1 | UGA2201A | 2012-09-14 | 2008-04-01 | 136 |
M1 | UGA2201A | 2012-05-01 | 2008-04-01 | 705 |
Z1 | UGA2201A | 2010-05-27 | 2008-04-01 | 5 |
Z1 | UGA2201A | 2010-05-22 | 2008-04-01 | 167 |
M1 | UGA2201A | 2009-12-06 | 2008-04-01 | 74 |
M1 | UGA2201A | 2009-09-23 | 2008-04-01 | 127 |
Z1 | UGA2201A | 2009-05-19 | 2008-04-01 | 92 |
M1 | UGA2201A | 2009-02-16 | 2008-04-01 | 321 |
knitr::kable(CompTurbine,caption = 'CompTurbine')
Notifictn type | Equipment | Notif.date | Date Installed | diff_days |
---|---|---|---|---|
M1 | 11K01 | 2014-09-29 | 2009-04-01 | 183 |
M1 | 11K01 | 2014-03-30 | 2009-04-01 | 34 |
M1 | 11K01 | 2014-02-24 | 2009-04-01 | 74 |
M1 | 11K01 | 2013-12-12 | 2009-04-01 | 170 |
Z3 | 11K01 | 2013-06-25 | 2009-04-01 | 46 |
Z3 | 11K01 | 2013-05-10 | 2009-04-01 | 523 |
M1 | 11K01 | 2011-12-04 | 2009-04-01 | 244 |
Z3 | 11K01 | 2011-04-04 | 2009-04-01 | 0 |
Z3 | 11K01 | 2011-04-04 | 2009-04-01 | 217 |
M1 | 11K01 | 2010-08-30 | 2009-04-01 | 96 |
M1 | 11K01 | 2010-05-26 | 2009-04-01 | 22 |
M1 | 11K01 | 2010-05-04 | 2009-04-01 | 3 |
M1 | 11K01 | 2010-05-01 | 2009-04-01 | 33 |
Z3 | 11K01 | 2010-03-29 | 2009-04-01 | 0 |
Z3 | 11K01 | 2010-03-29 | 2009-04-01 | 362 |
Z3 | 11TK01 | 2017-02-02 | 2008-04-01 | 0 |
Z3 | 11TK01 | 2017-02-02 | 2008-04-01 | 69 |
M1 | 11TK01 | 2016-11-25 | 2008-04-01 | 9 |
M1 | 11TK01 | 2016-11-16 | 2008-04-01 | 4 |
M1 | 11TK01 | 2016-11-12 | 2008-04-01 | 115 |
M1 | 11TK01 | 2016-07-20 | 2008-04-01 | 37 |
Z3 | 11TK01 | 2016-06-13 | 2008-04-01 | 2 |
M1 | 11TK01 | 2016-06-11 | 2008-04-01 | 80 |
Z3 | 11TK01 | 2016-03-23 | 2008-04-01 | 0 |
Z3 | 11TK01 | 2016-03-23 | 2008-04-01 | 0 |
Z3 | 11TK01 | 2016-03-23 | 2008-04-01 | 5 |
Z3 | 11TK01 | 2016-03-18 | 2008-04-01 | 0 |
Z3 | 11TK01 | 2016-03-18 | 2008-04-01 | 12 |
Z3 | 11TK01 | 2016-03-06 | 2008-04-01 | 239 |
M1 | 11TK01 | 2015-07-11 | 2008-04-01 | 145 |
M1 | 11TK01 | 2015-02-16 | 2008-04-01 | 30 |
Z3 | 11TK01 | 2015-01-17 | 2008-04-01 | 39 |
Z3 | 11TK01 | 2014-12-09 | 2008-04-01 | 74 |
M1 | 11TK01 | 2014-09-26 | 2008-04-01 | 4 |
M1 | 11TK01 | 2014-09-22 | 2008-04-01 | 24 |
M1 | 11TK01 | 2014-08-29 | 2008-04-01 | 64 |
M1 | 11TK01 | 2014-06-26 | 2008-04-01 | 9 |
M1 | 11TK01 | 2014-06-17 | 2008-04-01 | 1 |
M1 | 11TK01 | 2014-06-16 | 2008-04-01 | 9 |
M1 | 11TK01 | 2014-06-07 | 2008-04-01 | 10 |
M1 | 11TK01 | 2014-05-28 | 2008-04-01 | 3 |
M1 | 11TK01 | 2014-05-25 | 2008-04-01 | 0 |
M1 | 11TK01 | 2014-05-25 | 2008-04-01 | 19 |
Z3 | 11TK01 | 2014-05-06 | 2008-04-01 | 1 |
Z3 | 11TK01 | 2014-05-05 | 2008-04-01 | 38 |
M1 | 11TK01 | 2014-03-28 | 2008-04-01 | 3 |
M1 | 11TK01 | 2014-03-25 | 2008-04-01 | 72 |
M1 | 11TK01 | 2014-01-12 | 2008-04-01 | 42 |
Z3 | 11TK01 | 2013-12-01 | 2008-04-01 | 33 |
Z3 | 11TK01 | 2013-10-29 | 2008-04-01 | 0 |
Z3 | 11TK01 | 2013-10-29 | 2008-04-01 | 46 |
M1 | 11TK01 | 2013-09-13 | 2008-04-01 | 0 |
Z3 | 11TK01 | 2013-09-13 | 2008-04-01 | 32 |
M1 | 11TK01 | 2013-08-12 | 2008-04-01 | 18 |
Z3 | 11TK01 | 2013-07-25 | 2008-04-01 | 26 |
Z3 | 11TK01 | 2013-06-29 | 2008-04-01 | 4 |
Z3 | 11TK01 | 2013-06-25 | 2008-04-01 | 19 |
Z3 | 11TK01 | 2013-06-06 | 2008-04-01 | 54 |
M1 | 11TK01 | 2013-04-13 | 2008-04-01 | 95 |
Z3 | 11TK01 | 2013-01-08 | 2008-04-01 | 0 |
Z3 | 11TK01 | 2013-01-08 | 2008-04-01 | 69 |
M1 | 11TK01 | 2012-10-31 | 2008-04-01 | 38 |
M1 | 11TK01 | 2012-09-23 | 2008-04-01 | 68 |
M1 | 11TK01 | 2012-07-17 | 2008-04-01 | 33 |
M1 | 11TK01 | 2012-06-14 | 2008-04-01 | 43 |
M1 | 11TK01 | 2012-05-02 | 2008-04-01 | 10 |
M1 | 11TK01 | 2012-04-22 | 2008-04-01 | 72 |
Z3 | 11TK01 | 2012-02-10 | 2008-04-01 | 9 |
M1 | 11TK01 | 2012-02-01 | 2008-04-01 | 23 |
Z3 | 11TK01 | 2012-01-09 | 2008-04-01 | 35 |
M1 | 11TK01 | 2011-12-05 | 2008-04-01 | 92 |
Z3 | 11TK01 | 2011-09-04 | 2008-04-01 | 55 |
M1 | 11TK01 | 2011-07-11 | 2008-04-01 | 0 |
M1 | 11TK01 | 2011-07-11 | 2008-04-01 | 20 |
Z3 | 11TK01 | 2011-06-21 | 2008-04-01 | 1 |
Z3 | 11TK01 | 2011-06-20 | 2008-04-01 | 25 |
M1 | 11TK01 | 2011-05-26 | 2008-04-01 | 7 |
M1 | 11TK01 | 2011-05-19 | 2008-04-01 | 10 |
M1 | 11TK01 | 2011-05-09 | 2008-04-01 | 39 |
Z3 | 11TK01 | 2011-03-31 | 2008-04-01 | 33 |
M1 | 11TK01 | 2011-02-26 | 2008-04-01 | 151 |
M1 | 11TK01 | 2010-09-28 | 2008-04-01 | 4 |
M1 | 11TK01 | 2010-09-24 | 2008-04-01 | 121 |
M1 | 11TK01 | 2010-05-26 | 2008-04-01 | 25 |
M1 | 11TK01 | 2010-05-01 | 2008-04-01 | 0 |
M1 | 11TK01 | 2010-05-01 | 2008-04-01 | 4 |
M1 | 11TK01 | 2010-04-27 | 2008-04-01 | 24 |
M1 | 11TK01 | 2010-04-03 | 2008-04-01 | 58 |
M1 | 11TK01 | 2010-02-04 | 2008-04-01 | 54 |
M1 | 11TK01 | 2009-12-12 | 2008-04-01 | 30 |
Z3 | 11TK01 | 2009-11-12 | 2008-04-01 | 111 |
M1 | 11TK01 | 2009-07-24 | 2008-04-01 | 15 |
M1 | 11TK01 | 2009-07-09 | 2008-04-01 | 76 |
M1 | 11TK01 | 2009-04-24 | 2008-04-01 | 94 |
M1 | 11TK01 | 2009-01-20 | 2008-04-01 | 294 |
M1 | K431 | 2015-11-05 | 2009-04-01 | 622 |
M1 | K431 | 2014-02-21 | 2009-04-01 | 155 |
M1 | K431 | 2013-09-19 | 2009-04-01 | 24 |
M1 | K431 | 2013-08-26 | 2009-04-01 | 65 |
M1 | K431 | 2013-06-22 | 2009-04-01 | 78 |
M1 | K431 | 2013-04-05 | 2009-04-01 | 228 |
M1 | K431 | 2012-08-20 | 2009-04-01 | 31 |
M1 | K431 | 2012-07-20 | 2009-04-01 | 0 |
M1 | K431 | 2012-07-20 | 2009-04-01 | 86 |
M1 | K431 | 2012-04-25 | 2009-04-01 | 44 |
M1 | K431 | 2012-03-12 | 2009-04-01 | 265 |
M1 | K431 | 2011-06-21 | 2009-04-01 | 205 |
M1 | K431 | 2010-11-28 | 2009-04-01 | 0 |
M1 | K431 | 2010-11-28 | 2009-04-01 | 146 |
M1 | K431 | 2010-07-05 | 2009-04-01 | 95 |
M1 | K431 | 2010-04-01 | 2009-04-01 | 365 |
M1 | P607C | 2016-10-13 | 2008-04-01 | 104 |
M1 | P607C | 2016-07-01 | 2008-04-01 | 88 |
Z1 | P607C | 2016-04-04 | 2008-04-01 | 365 |
Z1 | P607C | 2015-04-05 | 2008-04-01 | 43 |
Z1 | P607C | 2015-02-21 | 2008-04-01 | 189 |
M1 | P607C | 2014-08-16 | 2008-04-01 | 184 |
M1 | P607C | 2014-02-13 | 2008-04-01 | 188 |
M1 | P607C | 2013-08-09 | 2008-04-01 | 119 |
M1 | P607C | 2013-04-12 | 2008-04-01 | 8 |
Z1 | P607C | 2013-04-04 | 2008-04-01 | 0 |
Z1 | P607C | 2013-04-04 | 2008-04-01 | 336 |
M1 | P607C | 2012-05-03 | 2008-04-01 | 65 |
Z1 | P607C | 2012-02-28 | 2008-04-01 | 97 |
M1 | P607C | 2011-11-23 | 2008-04-01 | 156 |
M1 | P607C | 2011-06-20 | 2008-04-01 | 90 |
M1 | P607C | 2011-03-22 | 2008-04-01 | 45 |
Z1 | P607C | 2011-02-05 | 2008-04-01 | 600 |
Z1 | P607C | 2009-06-15 | 2008-04-01 | 440 |
We will first observe the five number summary for Kirlosker, KSB and Compressor and Turbine
For some readers who do not know about Kirlosker and KSB following are the links https://kirloskar.com and https://www.ksb.com/ksb-en/
summary(Kirlosker$diff_days)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 118.0 314.0 313.2 390.0 1179.0
summary(KSB$diff_days)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.0 232.0 353.0 361.6 409.0 1504.0
summary(CompTurbine$diff_days)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 9.00 39.00 83.43 95.00 622.00
Using the Notification Type to analyze
Kirlosker %>% group_by(`Notifictn type`) %>% summarise(mean(diff_days),median(diff_days))
## # A tibble: 2 x 3
## `Notifictn type` `mean(diff_days)` `median(diff_days)`
## <chr> <dbl> <dbl>
## 1 M1 287 203
## 2 Z1 363 363
KSB %>% group_by(`Notifictn type`) %>% summarise(mean(diff_days),median(diff_days))
## # A tibble: 3 x 3
## `Notifictn type` `mean(diff_days)` `median(diff_days)`
## <chr> <dbl> <dbl>
## 1 M1 319 242
## 2 Z1 350 353
## 3 Z3 480 384
CompTurbine %>% group_by(`Notifictn type`) %>% summarise(mean(diff_days),median(diff_days))
## # A tibble: 3 x 3
## `Notifictn type` `mean(diff_days)` `median(diff_days)`
## <chr> <dbl> <dbl>
## 1 M1 76.6 43.0
## 2 Z1 259 262
## 3 Z3 60.5 29.0
As we can see that Z is preventive maintainance and M is natural fault that occurs We see that Z category has a mean difference greater than M
Converting into factor variables
Kirlosker$`Notifictn type`<-as.factor(Kirlosker$`Notifictn type`)
KSB$`Notifictn type`<-as.factor(KSB$`Notifictn type`)
CompTurbine$`Notifictn type`<-as.factor(CompTurbine$`Notifictn type`)
Now putting linear regression
Y(dependent variable) = Future date of failure , calculated based on the historical analysis . However dates which are quite close to each other having diff_days=0 have to be left out since MTBF will be wrong if we include those
X (independent variable) = Can be a factor variable which in this case can be notification type.
kker<-Kirlosker %>% filter(diff_days>0)
kker_model<-lm(formula=kker$diff_days~kker$`Notifictn type`,data=kker)
summary(kker_model)
##
## Call:
## lm(formula = kker$diff_days ~ kker$`Notifictn type`, data = kker)
##
## Residuals:
## Min 1Q Median 3Q Max
## -285.09 -158.72 -29.59 95.91 891.91
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 287.09 34.70 8.273 1.07e-11 ***
## kker$`Notifictn type`Z1 92.50 60.11 1.539 0.129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 230.2 on 64 degrees of freedom
## Multiple R-squared: 0.03568, Adjusted R-squared: 0.02062
## F-statistic: 2.368 on 1 and 64 DF, p-value: 0.1288
ksber<-KSB %>% filter(diff_days>0)
ksber_model<-lm(formula=ksber$diff_days~ksber$`Notifictn type`,data=ksber)
summary(ksber_model)
##
## Call:
## lm(formula = ksber$diff_days ~ ksber$`Notifictn type`, data = ksber)
##
## Residuals:
## Min 1Q Median 3Q Max
## -344.56 -150.27 -19.56 58.44 1185.09
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 318.91 54.05 5.900 1.99e-07 ***
## ksber$`Notifictn type`Z1 30.64 73.55 0.417 0.6785
## ksber$`Notifictn type`Z3 161.36 95.02 1.698 0.0948 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 259.2 on 58 degrees of freedom
## Multiple R-squared: 0.04899, Adjusted R-squared: 0.0162
## F-statistic: 1.494 on 2 and 58 DF, p-value: 0.233
CompTur<-CompTurbine %>% filter(diff_days>0)
CompTur_model<-lm(formula=CompTur$diff_days~CompTur$`Notifictn type`,data=CompTur)
summary(CompTur_model)
##
## Call:
## lm(formula = CompTur$diff_days ~ CompTur$`Notifictn type`, data = CompTur)
##
## Residuals:
## Min 1Q Median 3Q Max
## -252.71 -63.19 -37.94 19.56 539.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 82.443 12.457 6.618 1.34e-09 ***
## CompTur$`Notifictn type`Z1 213.271 43.662 4.885 3.51e-06 ***
## CompTur$`Notifictn type`Z3 -4.622 24.351 -0.190 0.85
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 110.7 on 111 degrees of freedom
## Multiple R-squared: 0.182, Adjusted R-squared: 0.1672
## F-statistic: 12.35 on 2 and 111 DF, p-value: 1.441e-05
library(ggplot2)
g1<-ggplot(aes(diff_days,`Notifictn type`),data = kker)+
geom_col(aes(`Notifictn type`,diff_days))
plot(g1)
g3<-ggplot(aes(diff_days,`Notifictn type`),data = kker)+
geom_point(aes(diff_days,`Notifictn type`))
plot(g3)
library(ggplot2)
g2<-ggplot(aes(diff_days,`Notifictn type`),data = ksber)+
geom_count(aes(`Notifictn type`,diff_days))
plot(g2)
g4<-ggplot(aes(diff_days,`Notifictn type`),data = ksber)+
geom_point(aes(diff_days,`Notifictn type`))
plot(g4)
g5<-ggplot(aes(diff_days,`Notifictn type`),data = CompTur)+
geom_point(aes(diff_days,`Notifictn type`))
plot(g5)
g6<-ggplot(aes(diff_days,`Notifictn type`),data = CompTur)+
geom_col(aes(`Notifictn type`,diff_days))
plot(g6)
Let us bind all the datasets and see if we are able to predcit
Total<-rbind(Kirlosker %>% select(Equipment,diff_days,`Notifictn type`),KSB %>% select(Equipment,diff_days,`Notifictn type`),CompTurbine %>% select(Equipment,diff_days,`Notifictn type`))
g7<-ggplot(aes(diff_days,`Notifictn type`),data = Total)+
geom_point(aes(`Notifictn type`,diff_days))
plot(g7)
If we try to group them based on the categories of the notification then we see that there is no feature in the dataset which can disect this dataset into three categories
Total<-Total %>% filter(diff_days >0)
results<-kmeans(Total$diff_days,3)
table(Total$`Notifictn type`,results$cluster)
##
## 1 2 3
## M1 105 13 28
## Z1 7 8 41
## Z3 24 3 12
How the overall data is behaving irrespective of any pumps.An attempt to figure out something from the plot
plot(Total$diff_days,type='o')
Are there any outliers
boxplot(Total$diff_days)
From the boxplot we see that observations that are below the median don’t have quite a variance but above the median the observations are quite far apart as illustrated from the box plot.
What is causing such behavior I don’t know. We may have to slice the data from different angles so that we can get a better perspective
We are not able to get good clusters as well.
After applying different techniques, I am thinking that a pump’s fault would be based on what kind/when repairing has been done.If pump has been repaired yesterday chances are less that it will need a repair within a month
Let us apply time series model
library(forecast)
mydata.arima101<-forecast::Arima(y = Total$diff_days,order = c(1,0,1))
mydata.pred1 <- predict(mydata.arima101, n.ahead=100)
plot (Total$diff_days)
lines(mydata.pred1$pred, col="blue")
lines(mydata.pred1$pred+2*mydata.pred1$se, col="red")
lines(mydata.pred1$pred-2*mydata.pred1$se, col="red")
Using the Arima 1,0,1 model and using the standard error , we are calculating a confidence interval for our prediction
mydata.pred1$pred+2*mydata.pred1$se
## Time Series:
## Start = 242
## End = 341
## Frequency = 1
## [1] 608.9806 611.1307 613.2139 615.2327 617.1894 619.0862 620.9254
## [8] 622.7090 624.4389 626.1170 627.7452 629.3250 630.8583 632.3465
## [15] 633.7912 635.1939 636.5558 637.8785 639.1631 640.4109 641.6231
## [22] 642.8008 643.9452 645.0573 646.1381 647.1886 648.2098 649.2025
## [29] 650.1677 651.1062 652.0189 652.9064 653.7697 654.6093 655.4261
## [36] 656.2208 656.9939 657.7461 658.4782 659.1905 659.8838 660.5586
## [43] 661.2155 661.8549 662.4774 663.0834 663.6735 664.2482 664.8077
## [50] 665.3527 665.8834 666.4004 666.9039 667.3945 667.8724 668.3380
## [57] 668.7916 669.2336 669.6644 670.0842 670.4933 670.8921 671.2807
## [64] 671.6596 672.0290 672.3890 672.7401 673.0824 673.4162 673.7417
## [71] 674.0591 674.3686 674.6706 674.9651 675.2523 675.5326 675.8060
## [78] 676.0727 676.3329 676.5869 676.8347 677.0765 677.3125 677.5428
## [85] 677.7676 677.9870 678.2012 678.4103 678.6145 678.8138 679.0084
## [92] 679.1984 679.3840 679.5652 679.7422 679.9151 680.0839 680.2488
## [99] 680.4099 680.5673
mydata.pred1$pred-2*mydata.pred1$se
## Time Series:
## Start = 242
## End = 341
## Frequency = 1
## [1] -179.4064 -180.9676 -182.4707 -183.9178 -185.3113 -186.6533 -187.9457
## [8] -189.1906 -190.3898 -191.5450 -192.6580 -193.7303 -194.7634 -195.7589
## [15] -196.7181 -197.6424 -198.5331 -199.3914 -200.2185 -201.0156 -201.7837
## [22] -202.5238 -203.2371 -203.9244 -204.5867 -205.2249 -205.8398 -206.4323
## [29] -207.0031 -207.5531 -208.0829 -208.5933 -209.0849 -209.5584 -210.0145
## [36] -210.4537 -210.8766 -211.2838 -211.6759 -212.0533 -212.4166 -212.7663
## [43] -213.1028 -213.4266 -213.7381 -214.0377 -214.3260 -214.6032 -214.8697
## [50] -215.1259 -215.3722 -215.6089 -215.8363 -216.0547 -216.2646 -216.4661
## [57] -216.6595 -216.8451 -217.0233 -217.1942 -217.3581 -217.5153 -217.6659
## [64] -217.8102 -217.9485 -218.0809 -218.2077 -218.3290 -218.4450 -218.5559
## [71] -218.6620 -218.7633 -218.8600 -218.9523 -219.0404 -219.1244 -219.2044
## [78] -219.2806 -219.3532 -219.4221 -219.4877 -219.5500 -219.6091 -219.6652
## [85] -219.7183 -219.7685 -219.8160 -219.8609 -219.9033 -219.9432 -219.9807
## [92] -220.0160 -220.0491 -220.0800 -220.1090 -220.1360 -220.1612 -220.1845
## [99] -220.2062 -220.2261
As we can see the red part is in negative days so we will have to adjust our prediction
In time series based approach we don’t have to calculate the differences between successive observations. Times series methods inherently take the differences and the differences can be of first order or second order or more depending on the stationariness of the data
There are various tests to check the stationarity of the data
I know of Dickey-Fuller test so I will be using it
Total<-Total %>% filter(diff_days>0)
tseries::adf.test(Total$diff_days,alternative = "stationary",k = 0)
## Warning in tseries::adf.test(Total$diff_days, alternative = "stationary", :
## p-value smaller than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: Total$diff_days
## Dickey-Fuller = -11.718, Lag order = 0, p-value = 0.01
## alternative hypothesis: stationary
acf(Total$diff_days)
pacf(Total$diff_days)
ggplot(data = Total,aes(diff_days,Equipment))+
geom_line(aes(diff_days,color=Equipment))+
geom_point(aes(diff_days,color=Equipment))+
geom_vline(xintercept = 320)
The point of drawing such a plot is to see the distribution of the differences in the successive two faults. Drawing a line at h=320 because this line intersects with every other pump.It means that for every pumps there will probably be one observation
In conclusion, probabilty of occurence is maximum
We can take a reference point for dates. For every pump there is a date installed
ggplot(Kirlosker,aes(diff_days,Equipment))+
geom_point(aes(diff_days,Equipment),data = Kirlosker)
We will be plotting the dates as a function of index
ggplot(Kirlosker,aes(diff_days))+
geom_line(aes(y=diff_days,x=seq(1:67)),data = Kirlosker)
ggplot(Kirlosker,aes(diff_days))+
geom_line(aes(y=diff_days,x=seq(1:61)),data = KSB)
ggplot(Kirlosker,aes(diff_days))+
geom_line(aes(y=diff_days,x=seq(1:129)),data = CompTurbine)