About this file A comprehensive csv files containing all measurement sessions and features. Each row represents one snapshot of sensor data at a certain time step. Sample rate is 2 Hz (One row per 0.5 seconds). Distinctive sessions are identified with “profile_id”. A short lookup over the columns
ambient Ambient temperature as measured by a thermal sensor located closely to the stator.
coolant Coolant temperature. The motor is water cooled. Measurement is taken at outflow.
u_d Voltage d-component
u_q Voltage q-component
motor_speed Motor speed
torque Torque induced by current.
i_d Current d-component
i_q Current q-component
pm Permanent Magnet surface temperature representing the rotor temperature. This was measured with an infrared thermography unit.
stator_yoke Stator yoke temperature measured with a thermal sensor.
stator_tooth Stator tooth temperature measured with a thermal sensor.
stator_winding Stator winding temperature measured with a thermal sensor.
profile_id Each measurement session has a unique ID. Make sure not to try to estimate from one session onto the other as they are strongly independent

Loading Dataset

dataset=read.csv('pmsm_temperature_data.csv')
str(dataset)
## 'data.frame':    998070 obs. of  13 variables:
##  $ ambient       : num  -0.752 -0.771 -0.783 -0.781 -0.774 ...
##  $ coolant       : num  -1.12 -1.12 -1.12 -1.12 -1.12 ...
##  $ u_d           : num  0.328 0.33 0.333 0.334 0.335 ...
##  $ u_q           : num  -1.3 -1.3 -1.3 -1.3 -1.3 ...
##  $ motor_speed   : num  -1.22 -1.22 -1.22 -1.22 -1.22 ...
##  $ torque        : num  -0.25 -0.249 -0.249 -0.249 -0.249 ...
##  $ i_d           : num  1.03 1.03 1.03 1.03 1.03 ...
##  $ i_q           : num  -0.246 -0.246 -0.246 -0.247 -0.247 ...
##  $ pm            : num  -2.52 -2.52 -2.52 -2.52 -2.52 ...
##  $ stator_yoke   : num  -1.83 -1.83 -1.83 -1.83 -1.83 ...
##  $ stator_tooth  : num  -2.07 -2.06 -2.06 -2.06 -2.06 ...
##  $ stator_winding: num  -2.02 -2.02 -2.02 -2.02 -2.02 ...
##  $ profile_id    : int  4 4 4 4 4 4 4 4 4 4 ...
f=as.factor(dataset$profile_id)
levels(f)
##  [1] "4"  "6"  "10" "11" "20" "27" "29" "30" "31" "32" "36" "41" "42" "43"
## [15] "44" "45" "46" "47" "48" "49" "50" "51" "52" "53" "54" "55" "56" "57"
## [29] "58" "59" "60" "61" "62" "63" "64" "65" "66" "67" "68" "69" "70" "71"
## [43] "72" "73" "74" "75" "76" "77" "78" "79" "80" "81"
dataset$profile_id=factor(dataset$profile_id)

Therefore only 52 profile_ids are there

Loading Required libraries

library(ggplot2)
library(tidyr)
library(funModeling)
library(dplyr)

Exploratory Data Analysis

Histogram of all columns

plot_num(dataset)

Minimum,Medium and Maximum motor_speed of each profile id

form1=dataset%>%group_by(profile_id)%>%summarise(max1=max(motor_speed),med1=median(motor_speed),min1=min(motor_speed))
str(form1)
## Classes 'tbl_df', 'tbl' and 'data.frame':    52 obs. of  4 variables:
##  $ profile_id: Factor w/ 52 levels "4","6","10","11",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ max1      : num  2.02 1.48 2.02 1.99 1.89 ...
##  $ med1      : num  -0.14025 1.48302 -0.64616 0.00289 0.26557 ...
##  $ min1      : num  -1.22 -1.22 -1.24 -1.22 -1.22 ...

Having a boxplot over all the profile id

ggplot(data=dataset,aes(x=profile_id,y=motor_speed,color=profile_id))+geom_boxplot()+coord_flip()

Removing outlayers in boxplot

ggplot(data=dataset,aes(x=profile_id,y=motor_speed,color=profile_id))+geom_boxplot(outlier.shape = NA)+coord_flip()

Maximum motor speed of each profile

ggplot(data=form1,aes(x=profile_id,y=max1,group=1))+geom_point(color='red')+geom_line(color='violet')+ggtitle('Maximum motor speed of each motor')

Observation

According to ohms law we have only voltage and current we can derive the Resistance
V=IR V=Voltage;I=Current;R=Resistance;
u_d+u_q=voltage and i_d+i_q=Current
form2=dataset%>%mutate(resistance=(u_d+u_q)/(i_d+i_q))
summary(form2$resistance)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -360409.9      -1.2      -0.6      -0.6       0.5  299505.3

Group based on clockwise and anticlockwise motorspeed

dataset2=dataset%>%mutate(clock_anticlock=ifelse(motor_speed>0,'clock','anticlock'))
dataset2[c(1:10),]
##       ambient   coolant       u_d       u_q motor_speed     torque
## 1  -0.7521430 -1.118446 0.3279352 -1.297858   -1.222428 -0.2501821
## 2  -0.7712632 -1.117021 0.3296648 -1.297686   -1.222429 -0.2491333
## 3  -0.7828916 -1.116681 0.3327715 -1.301822   -1.222428 -0.2494311
## 4  -0.7809354 -1.116764 0.3336999 -1.301852   -1.222430 -0.2486364
## 5  -0.7740426 -1.116775 0.3352061 -1.303118   -1.222429 -0.2487008
## 6  -0.7629362 -1.116955 0.3349012 -1.303017   -1.222429 -0.2481970
## 7  -0.7492281 -1.116170 0.3350135 -1.302082   -1.222430 -0.2479142
## 8  -0.7384499 -1.113986 0.3362563 -1.305155   -1.222432 -0.2483210
## 9  -0.7309097 -1.111828 0.3349053 -1.303790   -1.222431 -0.2477847
## 10 -0.7271296 -1.109486 0.3359881 -1.305633   -1.222431 -0.2482944
##         i_d        i_q        pm stator_yoke stator_tooth stator_winding
## 1  1.029572 -0.2458600 -2.522071   -1.831422    -2.066143      -2.018033
## 2  1.029509 -0.2458323 -2.522418   -1.830969    -2.064859      -2.017631
## 3  1.029448 -0.2458179 -2.522673   -1.830400    -2.064073      -2.017343
## 4  1.032845 -0.2469548 -2.521639   -1.830333    -2.063137      -2.017632
## 5  1.031807 -0.2466097 -2.521900   -1.830498    -2.062795      -2.018145
## 6  1.031031 -0.2463406 -2.522203   -1.831931    -2.062549      -2.017884
## 7  1.030493 -0.2461615 -2.522538   -1.833012    -2.062115      -2.017243
## 8  1.030107 -0.2460349 -2.522844   -1.832182    -2.061953      -2.017213
## 9  1.029851 -0.2459809 -2.522808   -1.831576    -2.062443      -2.017739
## 10 1.029636 -0.2458877 -2.522677   -1.831438    -2.062317      -2.018180
##    profile_id clock_anticlock
## 1           4       anticlock
## 2           4       anticlock
## 3           4       anticlock
## 4           4       anticlock
## 5           4       anticlock
## 6           4       anticlock
## 7           4       anticlock
## 8           4       anticlock
## 9           4       anticlock
## 10          4       anticlock

For each profile composition of clock_anticlock

ggplot(data=dataset2,aes(x=profile_id,fill=clock_anticlock))+geom_bar(position = "dodge")

Composition of records of different profile ids

library(plotrix)
composition=dataset %>% group_by(profile_id) %>%summarise(no_rows = length(profile_id))
pie3D(composition$no_rows ,labels=composition$profile_id,explode = 0.001, main = "Composition of different profile ids ")

For having a better prediction model is data is shuffled

dataset=dataset[sample(nrow(dataset)),]
dataset[c(1:10),]
##            ambient    coolant        u_d        u_q  motor_speed
## 928676  0.58421123  2.0008519  1.5930232  0.9764124  0.845988500
## 161751 -0.68321884 -1.0508887 -1.0667872  0.8891351 -0.004971894
## 657152  0.68860870  1.1272036  0.7437856  1.5852509  1.563562200
## 514906  0.00814226  0.6006848  0.6743952 -1.1148270 -0.986741000
## 178968 -2.75069950 -1.0366476 -1.6007240 -0.6084604  1.023104800
## 798049 -1.00165570 -0.4198218 -1.4079956  0.1159275  0.035594445
## 279864 -2.64922290 -1.0642931 -1.6452277 -0.9898827 -0.248465820
## 848260  1.46169460  0.5542523  2.0766978  0.1425142 -0.031537727
## 530913  0.73645216  0.2046274 -0.1284589  0.1908144 -0.474326400
## 779503  0.52630120  0.1352808  0.3564143 -1.2782458 -1.222428300
##            torque        i_d        i_q         pm stator_yoke
## 928676 -1.0370494 -0.3413019 -1.0135430  1.1426723  2.04552910
## 161751  0.9455174  0.4366075  1.0688396 -0.6762463 -1.02593820
## 657152 -0.4841578 -0.4671991 -0.4683290  1.1440512  1.07366380
## 514906 -2.4675412 -0.3403582 -2.5398347 -1.2031977  0.81458205
## 178968  0.8717596 -1.9739234  0.7211785 -0.1647044 -0.76907045
## 798049  1.4761018 -0.7909316  1.4346538 -1.4187038 -0.73472506
## 279864  2.9214988 -2.6742950  2.5686433 -1.5766789 -1.15441630
## 848260 -2.1413672 -0.1850266 -2.2089918  0.5538067  0.70014054
## 530913  0.2616976  0.8753130  0.3649761  0.2931785  1.11301540
## 779503 -0.2556397  1.0291519 -0.2457062  0.3337078 -0.05771145
##        stator_tooth stator_winding profile_id
## 928676    1.8391030     1.52068850         79
## 161751   -0.9096749    -0.84218490         27
## 657152    1.0007882     0.88959260         65
## 514906    0.7703874     0.91623130         57
## 178968   -0.3279659     0.01770049         29
## 798049   -0.7700177    -0.72994780         71
## 279864   -0.7664260     0.17022896         36
## 848260    0.7702012     0.64092374         74
## 530913    0.7702012     0.29948136         58
## 779503   -0.2788869    -0.43881804         70

The dataset is so big so only less amount of data is used for huge time process

Visualising motorspeed vs ambient with size as color

ggplot(data = dataset[c(1:200),],aes(x=motor_speed,y=ambient,size=torque,col=profile_id))+geom_point()

Normalising the records

com=composition
com$`profile name` <- com$profile_id
# create new column for profile names
com$motors_z <- round((com$no_rows - mean(com$no_rows))/sd(com$no_rows), 2)  # compute normalized observation
com$motors_type <- ifelse(com$motors_z < 0, "below", "above")  # above / below avg flag
com <- com[order(com$motors_z), ]  # sort
com$`profile name` <- factor(com$`profile name`, levels = com$`profile name`)  # convert to factor to retain sorted order in plot.

Normalised usage of motors

# Diverging Barcharts
ggplot(com, aes(x=`profile name`, y=motors_z, label=motors_z)) + 
  geom_bar(stat='identity', aes(fill=motors_type), width=.5)  +
  scale_fill_manual(name="Mileage", 
                    labels = c("Above Average", "Below Average"), 
                    values = c("above"="#00ba38", "below"="#f8766d")) + 
  labs(subtitle="Normalised usage of motors", 
       title= "Diverging Bars") + 
  coord_flip()

Density plot using profile id and motor speed

#density plot
dataset2=dataset
g <- ggplot(dataset2, aes(motor_speed))
g + geom_density(aes(fill=profile_id), alpha=0.5) + 
  labs(title="Density plot using profile id and motor speed",
       x="Motor Speed",
       fill="# Profile id")

Violin plot torque vs clock

#Violin Plot
dataset2=dataset%>%mutate(clock_anticlock=ifelse(motor_speed>0,'clock','anticlock'))
g <- ggplot(dataset2, aes(clock_anticlock, torque,fill=clock_anticlock))
g + geom_violin() + 
  labs(title="Violin plot torque vs clock",
       x="Clock of Motor",
       y="Torque of Motor")

Counts Plot profile id vs torque

g <- ggplot(dataset2, aes(profile_id,torque,col=profile_id))
g + geom_count(show.legend=F) +
  labs(y="torque", 
       x="profile_id", 
       title="Counts Plot profile id vs torque")

Subgraphs of min,mid and max of profiles

com=dataset%>%group_by(profile_id)%>%summarise(max1=max(motor_speed),med1=median(motor_speed),min1=min(motor_speed))
com$type=ifelse(com$med1>0,'above','below')
com$names=rownames(com)
com$profile_id=as.integer(com$profile_id)
library(tidyr)
library(gapminder)
library(gganimate)
com <- gather(com,rate,growth,2:4)
com$col=1
ggplot(com, aes(as.factor(rate), growth,size=growth)) +
  geom_point(alpha = 0.7, color=as.factor(com$profile_id),show.legend = FALSE)+
  scale_colour_manual(values = country_colors) +facet_wrap(~names)

Animating the subgraphs

com$profile_id=as.integer(com$profile_id)
ggplot(com, aes(as.factor(rate), growth+3,size=growth)) +
  geom_point(alpha = 0.7, color=as.factor(com$profile_id),show.legend = FALSE)+
  scale_colour_manual(values = country_colors) +facet_wrap(~type)+
  # Here comes the gganimate specific bits
  labs(title = 'Profile id: {frame_time}', x = 'summaries', y = 'values') +
  transition_time(profile_id) +
  ease_aes('linear')

Correlogram of PMSM

library(ggcorrplot)
dataset3=dataset[c(1:1000),]
dataset3$profile_id=as.numeric(dataset3$profile_id)
#correlogram
corr <- round(cor(dataset3), 1)

# Plot
ggcorrplot(corr, hc.order = TRUE, 
           type = "lower", 
           lab = TRUE, 
           lab_size = 3, 
           method="circle", 
           colors = c("tomato2", "white", "springgreen3"), 
           title="Correlogram of PMSM", 
           ggtheme=theme_bw)

Lollipop Chart torque vs profile_id

  theme_set(theme_bw())
  dataset3=dataset[c(1:500),]
  str(dataset3)
## 'data.frame':    500 obs. of  13 variables:
##  $ ambient       : num  0.58421 -0.68322 0.68861 0.00814 -2.7507 ...
##  $ coolant       : num  2.001 -1.051 1.127 0.601 -1.037 ...
##  $ u_d           : num  1.593 -1.067 0.744 0.674 -1.601 ...
##  $ u_q           : num  0.976 0.889 1.585 -1.115 -0.608 ...
##  $ motor_speed   : num  0.84599 -0.00497 1.56356 -0.98674 1.0231 ...
##  $ torque        : num  -1.037 0.946 -0.484 -2.468 0.872 ...
##  $ i_d           : num  -0.341 0.437 -0.467 -0.34 -1.974 ...
##  $ i_q           : num  -1.014 1.069 -0.468 -2.54 0.721 ...
##  $ pm            : num  1.143 -0.676 1.144 -1.203 -0.165 ...
##  $ stator_yoke   : num  2.046 -1.026 1.074 0.815 -0.769 ...
##  $ stator_tooth  : num  1.839 -0.91 1.001 0.77 -0.328 ...
##  $ stator_winding: num  1.5207 -0.8422 0.8896 0.9162 0.0177 ...
##  $ profile_id    : Factor w/ 52 levels "4","6","10","11",..: 50 6 36 28 7 42 11 45 29 41 ...
  # Plot
  ggplot(dataset3, aes(x=torque, y=profile_id)) + 
    geom_point(size=1,col='red') + 
    geom_segment(aes(x=torque, 
                     xend=torque, 
                     y=0, 
                     yend=profile_id)) + 
    labs(title="Lollipop Chart torque vs profile_id") + 
    theme(axis.text.x = element_text(angle=65, vjust=0.6))

Dot Plot coolant and profile id

  dataset3=dataset[c(1:1000),]
  library(scales)
  theme_set(theme_classic())
  
  # Plot
  ggplot(dataset3, aes(x=profile_id, y=coolant)) + 
    geom_point(col="blue", size=2) +   # Draw points
    geom_segment(aes(x=profile_id, 
                     xend=profile_id, 
                     y=min(coolant), 
                     yend=max(coolant)), 
                 linetype="dashed",col='red', 
                 size=0.1) +   # Draw dashed lines
    labs(title="Dot Plot coolant and profile id") +  
    coord_flip()

The encircled points denoting the motors which are above average

dataset3=dataset[c(1:500),]
  options(scipen = 999)
  library(ggalt)
  dataset3_select=dataset3[dataset3$motor_speed>mean(dataset3$motor_speed)
                           & dataset3$torque>mean(dataset3$torque)& 
                             dataset3$i_q+dataset3$i_d>mean(dataset3$i_q+dataset3$i_d)
                           ,]
  # Plot
  ggplot(dataset3, aes(x=i_d, y=motor_speed)) + 
    geom_point(aes(col=profile_id)) +   # draw points
    geom_encircle( aes(x=i_d, y=motor_speed), 
                  data=dataset3_select, 
                  color="red", 
                  size=2, 
                  expand=0.08) +   # encircle
    labs(y="motor_speed", 
         x="i_d", 
         title="The encircled points denoting the motors which are above average")

Treemap over profile id

  library(treemapify)
  library(ggplotify)
  dataset2=dataset[sample(nrow(dataset)),]
  dataset2$profile_id=as.numeric(as.character(dataset2$profile_id))
  dataset2=dataset2[c(1:1000),]
  
  ggplot(dataset2,aes(area=profile_id,fill=as.factor(profile_id),label=profile_id)) +
    geom_treemap()+geom_treemap_text(fontface = "italic", colour = "white", place = "centre",grow = TRUE)+
    labs(title="Treemap over profile id")

Hierarchical dendogram over profiles

composition=dataset %>% group_by(profile_id) %>%summarise(no_rows = length(profile_id))
  com=composition
  com$`profile name` <- com$profile_id
  # create new column for profile names
  com$motors_z <- round((com$no_rows - mean(com$no_rows))/sd(com$no_rows), 2)  # compute normalized mpg
  com$motors_type <- ifelse(com$motors_z < 0, "below", "above")  # above / below avg flag
  com <- com[order(com$motors_z), ]  # sort
  com$`profile name` <- factor(com$`profile name`, levels = com$`profile name`)  # convert to factor to retain sorted order in plot.
  dataset2=com
  str(dataset2)
## Classes 'tbl_df', 'tbl' and 'data.frame':    52 obs. of  5 variables:
##  $ profile_id  : Factor w/ 52 levels "4","6","10","11",..: 17 18 23 22 35 30 4 14 49 20 ...
##  $ no_rows     : int  2179 2175 3725 6260 6249 7474 7886 8442 8444 10815 ...
##  $ profile name: Factor w/ 52 levels "46","47","52",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ motors_z    : num  -1.65 -1.65 -1.5 -1.25 -1.25 -1.13 -1.09 -1.04 -1.04 -0.81 ...
##  $ motors_type : chr  "below" "below" "below" "below" ...
  dd <- dist(scale(as.numeric(dataset2$profile_id)), method = "euclidean")
  hc <- hclust(dd, method = "ward.D2")
  library("ape")
  plot(as.phylo(hc), type = "cladogram", cex = 0.6,
       edge.color = "steelblue", edge.width = 2, edge.lty = 2,
       tip.color = "steelblue")
  title(main="Hierarchical dendogram over profiles")

Analysis & Modeling

Machine Learning Algorithm for Electric Motor Temperature data

dataset2=dataset[c(1:1000),]
  dataset1=dataset2[3:7]
  #Elbow plot
  library(cluster)
  set.seed(5)
  wcss=vector()
  for(i in 1:20)
    wcss[i]=sum(kmeans(dataset1,i)$withinss)
  wcss
##  [1] 5109.7678 3238.5025 2106.4017 1495.1583 1182.7845  992.0054  963.7830
##  [8]  755.5049  681.3732  617.5483  560.0434  495.5868  461.2586  446.6056
## [15]  427.3760  387.0579  347.8155  351.9645  315.7313  284.5121
  plot(1:20, wcss,type='b',main="The Elbow Method",xlab="Clusters",ylab='WCSS')

Unsupervised learning [K-means clustering]

kmeans=kmeans(x=dataset1,centers = 5)
  y_kmeans = kmeans$cluster
  #Visualising the cluster
  library(cluster)
  clusplot(dataset1,y_kmeans,shade = T,lines = 1,color = T,main="Cluster of Electric Motor Temperature",xlab = 'Usage',ylab = 'Overall Consumption')

Mclust

library(mclust)
  dataset2=dataset[c(1:10000),]
  dataset1=dataset2[3:7]
  fit <- Mclust(dataset1)
  plot(fit) # plot results

Summarising the content

summary(fit)
## ---------------------------------------------------- 
## Gaussian finite mixture model fitted by EM algorithm 
## ---------------------------------------------------- 
## 
## Mclust VEV (ellipsoidal, equal shape) model with 9 components: 
## 
##  log-likelihood     n  df      BIC      ICL
##        36284.74 10000 156 71132.66 70842.87
## 
## Clustering table:
##    1    2    3    4    5    6    7    8    9 
## 1641  861 1789  946 1050 1263  794 1281  375