About this file A comprehensive csv files containing all measurement sessions and features. Each row represents one snapshot of sensor data at a certain time step. Sample rate is 2 Hz (One row per 0.5 seconds). Distinctive sessions are identified with “profile_id”. A short lookup over the columns
Exploratory Data Analysis
Histogram of all columns
plot_num(dataset)

Minimum,Medium and Maximum motor_speed of each profile id
form1=dataset%>%group_by(profile_id)%>%summarise(max1=max(motor_speed),med1=median(motor_speed),min1=min(motor_speed))
str(form1)
## Classes 'tbl_df', 'tbl' and 'data.frame': 52 obs. of 4 variables:
## $ profile_id: Factor w/ 52 levels "4","6","10","11",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ max1 : num 2.02 1.48 2.02 1.99 1.89 ...
## $ med1 : num -0.14025 1.48302 -0.64616 0.00289 0.26557 ...
## $ min1 : num -1.22 -1.22 -1.24 -1.22 -1.22 ...
Having a boxplot over all the profile id
ggplot(data=dataset,aes(x=profile_id,y=motor_speed,color=profile_id))+geom_boxplot()+coord_flip()

Removing outlayers in boxplot
ggplot(data=dataset,aes(x=profile_id,y=motor_speed,color=profile_id))+geom_boxplot(outlier.shape = NA)+coord_flip()

Maximum motor speed of each profile
ggplot(data=form1,aes(x=profile_id,y=max1,group=1))+geom_point(color='red')+geom_line(color='violet')+ggtitle('Maximum motor speed of each motor')

Observation
According to ohms law we have only voltage and current we can derive the Resistance
V=IR V=Voltage;I=Current;R=Resistance;
u_d+u_q=voltage and i_d+i_q=Current
form2=dataset%>%mutate(resistance=(u_d+u_q)/(i_d+i_q))
summary(form2$resistance)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -360409.9 -1.2 -0.6 -0.6 0.5 299505.3
Group based on clockwise and anticlockwise motorspeed
dataset2=dataset%>%mutate(clock_anticlock=ifelse(motor_speed>0,'clock','anticlock'))
dataset2[c(1:10),]
## ambient coolant u_d u_q motor_speed torque
## 1 -0.7521430 -1.118446 0.3279352 -1.297858 -1.222428 -0.2501821
## 2 -0.7712632 -1.117021 0.3296648 -1.297686 -1.222429 -0.2491333
## 3 -0.7828916 -1.116681 0.3327715 -1.301822 -1.222428 -0.2494311
## 4 -0.7809354 -1.116764 0.3336999 -1.301852 -1.222430 -0.2486364
## 5 -0.7740426 -1.116775 0.3352061 -1.303118 -1.222429 -0.2487008
## 6 -0.7629362 -1.116955 0.3349012 -1.303017 -1.222429 -0.2481970
## 7 -0.7492281 -1.116170 0.3350135 -1.302082 -1.222430 -0.2479142
## 8 -0.7384499 -1.113986 0.3362563 -1.305155 -1.222432 -0.2483210
## 9 -0.7309097 -1.111828 0.3349053 -1.303790 -1.222431 -0.2477847
## 10 -0.7271296 -1.109486 0.3359881 -1.305633 -1.222431 -0.2482944
## i_d i_q pm stator_yoke stator_tooth stator_winding
## 1 1.029572 -0.2458600 -2.522071 -1.831422 -2.066143 -2.018033
## 2 1.029509 -0.2458323 -2.522418 -1.830969 -2.064859 -2.017631
## 3 1.029448 -0.2458179 -2.522673 -1.830400 -2.064073 -2.017343
## 4 1.032845 -0.2469548 -2.521639 -1.830333 -2.063137 -2.017632
## 5 1.031807 -0.2466097 -2.521900 -1.830498 -2.062795 -2.018145
## 6 1.031031 -0.2463406 -2.522203 -1.831931 -2.062549 -2.017884
## 7 1.030493 -0.2461615 -2.522538 -1.833012 -2.062115 -2.017243
## 8 1.030107 -0.2460349 -2.522844 -1.832182 -2.061953 -2.017213
## 9 1.029851 -0.2459809 -2.522808 -1.831576 -2.062443 -2.017739
## 10 1.029636 -0.2458877 -2.522677 -1.831438 -2.062317 -2.018180
## profile_id clock_anticlock
## 1 4 anticlock
## 2 4 anticlock
## 3 4 anticlock
## 4 4 anticlock
## 5 4 anticlock
## 6 4 anticlock
## 7 4 anticlock
## 8 4 anticlock
## 9 4 anticlock
## 10 4 anticlock
For each profile composition of clock_anticlock
ggplot(data=dataset2,aes(x=profile_id,fill=clock_anticlock))+geom_bar(position = "dodge")

Composition of records of different profile ids
library(plotrix)
composition=dataset %>% group_by(profile_id) %>%summarise(no_rows = length(profile_id))
pie3D(composition$no_rows ,labels=composition$profile_id,explode = 0.001, main = "Composition of different profile ids ")

For having a better prediction model is data is shuffled
dataset=dataset[sample(nrow(dataset)),]
dataset[c(1:10),]
## ambient coolant u_d u_q motor_speed
## 928676 0.58421123 2.0008519 1.5930232 0.9764124 0.845988500
## 161751 -0.68321884 -1.0508887 -1.0667872 0.8891351 -0.004971894
## 657152 0.68860870 1.1272036 0.7437856 1.5852509 1.563562200
## 514906 0.00814226 0.6006848 0.6743952 -1.1148270 -0.986741000
## 178968 -2.75069950 -1.0366476 -1.6007240 -0.6084604 1.023104800
## 798049 -1.00165570 -0.4198218 -1.4079956 0.1159275 0.035594445
## 279864 -2.64922290 -1.0642931 -1.6452277 -0.9898827 -0.248465820
## 848260 1.46169460 0.5542523 2.0766978 0.1425142 -0.031537727
## 530913 0.73645216 0.2046274 -0.1284589 0.1908144 -0.474326400
## 779503 0.52630120 0.1352808 0.3564143 -1.2782458 -1.222428300
## torque i_d i_q pm stator_yoke
## 928676 -1.0370494 -0.3413019 -1.0135430 1.1426723 2.04552910
## 161751 0.9455174 0.4366075 1.0688396 -0.6762463 -1.02593820
## 657152 -0.4841578 -0.4671991 -0.4683290 1.1440512 1.07366380
## 514906 -2.4675412 -0.3403582 -2.5398347 -1.2031977 0.81458205
## 178968 0.8717596 -1.9739234 0.7211785 -0.1647044 -0.76907045
## 798049 1.4761018 -0.7909316 1.4346538 -1.4187038 -0.73472506
## 279864 2.9214988 -2.6742950 2.5686433 -1.5766789 -1.15441630
## 848260 -2.1413672 -0.1850266 -2.2089918 0.5538067 0.70014054
## 530913 0.2616976 0.8753130 0.3649761 0.2931785 1.11301540
## 779503 -0.2556397 1.0291519 -0.2457062 0.3337078 -0.05771145
## stator_tooth stator_winding profile_id
## 928676 1.8391030 1.52068850 79
## 161751 -0.9096749 -0.84218490 27
## 657152 1.0007882 0.88959260 65
## 514906 0.7703874 0.91623130 57
## 178968 -0.3279659 0.01770049 29
## 798049 -0.7700177 -0.72994780 71
## 279864 -0.7664260 0.17022896 36
## 848260 0.7702012 0.64092374 74
## 530913 0.7702012 0.29948136 58
## 779503 -0.2788869 -0.43881804 70
The dataset is so big so only less amount of data is used for huge time process
Visualising motorspeed vs ambient with size as color
ggplot(data = dataset[c(1:200),],aes(x=motor_speed,y=ambient,size=torque,col=profile_id))+geom_point()

Normalising the records
com=composition
com$`profile name` <- com$profile_id
# create new column for profile names
com$motors_z <- round((com$no_rows - mean(com$no_rows))/sd(com$no_rows), 2) # compute normalized observation
com$motors_type <- ifelse(com$motors_z < 0, "below", "above") # above / below avg flag
com <- com[order(com$motors_z), ] # sort
com$`profile name` <- factor(com$`profile name`, levels = com$`profile name`) # convert to factor to retain sorted order in plot.
Normalised usage of motors
# Diverging Barcharts
ggplot(com, aes(x=`profile name`, y=motors_z, label=motors_z)) +
geom_bar(stat='identity', aes(fill=motors_type), width=.5) +
scale_fill_manual(name="Mileage",
labels = c("Above Average", "Below Average"),
values = c("above"="#00ba38", "below"="#f8766d")) +
labs(subtitle="Normalised usage of motors",
title= "Diverging Bars") +
coord_flip()

Density plot using profile id and motor speed
#density plot
dataset2=dataset
g <- ggplot(dataset2, aes(motor_speed))
g + geom_density(aes(fill=profile_id), alpha=0.5) +
labs(title="Density plot using profile id and motor speed",
x="Motor Speed",
fill="# Profile id")

Violin plot torque vs clock
#Violin Plot
dataset2=dataset%>%mutate(clock_anticlock=ifelse(motor_speed>0,'clock','anticlock'))
g <- ggplot(dataset2, aes(clock_anticlock, torque,fill=clock_anticlock))
g + geom_violin() +
labs(title="Violin plot torque vs clock",
x="Clock of Motor",
y="Torque of Motor")

Counts Plot profile id vs torque
g <- ggplot(dataset2, aes(profile_id,torque,col=profile_id))
g + geom_count(show.legend=F) +
labs(y="torque",
x="profile_id",
title="Counts Plot profile id vs torque")

Subgraphs of min,mid and max of profiles
com=dataset%>%group_by(profile_id)%>%summarise(max1=max(motor_speed),med1=median(motor_speed),min1=min(motor_speed))
com$type=ifelse(com$med1>0,'above','below')
com$names=rownames(com)
com$profile_id=as.integer(com$profile_id)
library(tidyr)
library(gapminder)
library(gganimate)
com <- gather(com,rate,growth,2:4)
com$col=1
ggplot(com, aes(as.factor(rate), growth,size=growth)) +
geom_point(alpha = 0.7, color=as.factor(com$profile_id),show.legend = FALSE)+
scale_colour_manual(values = country_colors) +facet_wrap(~names)

Animating the subgraphs
com$profile_id=as.integer(com$profile_id)
ggplot(com, aes(as.factor(rate), growth+3,size=growth)) +
geom_point(alpha = 0.7, color=as.factor(com$profile_id),show.legend = FALSE)+
scale_colour_manual(values = country_colors) +facet_wrap(~type)+
# Here comes the gganimate specific bits
labs(title = 'Profile id: {frame_time}', x = 'summaries', y = 'values') +
transition_time(profile_id) +
ease_aes('linear')

Correlogram of PMSM
library(ggcorrplot)
dataset3=dataset[c(1:1000),]
dataset3$profile_id=as.numeric(dataset3$profile_id)
#correlogram
corr <- round(cor(dataset3), 1)
# Plot
ggcorrplot(corr, hc.order = TRUE,
type = "lower",
lab = TRUE,
lab_size = 3,
method="circle",
colors = c("tomato2", "white", "springgreen3"),
title="Correlogram of PMSM",
ggtheme=theme_bw)

Lollipop Chart torque vs profile_id
theme_set(theme_bw())
dataset3=dataset[c(1:500),]
str(dataset3)
## 'data.frame': 500 obs. of 13 variables:
## $ ambient : num 0.58421 -0.68322 0.68861 0.00814 -2.7507 ...
## $ coolant : num 2.001 -1.051 1.127 0.601 -1.037 ...
## $ u_d : num 1.593 -1.067 0.744 0.674 -1.601 ...
## $ u_q : num 0.976 0.889 1.585 -1.115 -0.608 ...
## $ motor_speed : num 0.84599 -0.00497 1.56356 -0.98674 1.0231 ...
## $ torque : num -1.037 0.946 -0.484 -2.468 0.872 ...
## $ i_d : num -0.341 0.437 -0.467 -0.34 -1.974 ...
## $ i_q : num -1.014 1.069 -0.468 -2.54 0.721 ...
## $ pm : num 1.143 -0.676 1.144 -1.203 -0.165 ...
## $ stator_yoke : num 2.046 -1.026 1.074 0.815 -0.769 ...
## $ stator_tooth : num 1.839 -0.91 1.001 0.77 -0.328 ...
## $ stator_winding: num 1.5207 -0.8422 0.8896 0.9162 0.0177 ...
## $ profile_id : Factor w/ 52 levels "4","6","10","11",..: 50 6 36 28 7 42 11 45 29 41 ...
# Plot
ggplot(dataset3, aes(x=torque, y=profile_id)) +
geom_point(size=1,col='red') +
geom_segment(aes(x=torque,
xend=torque,
y=0,
yend=profile_id)) +
labs(title="Lollipop Chart torque vs profile_id") +
theme(axis.text.x = element_text(angle=65, vjust=0.6))

Dot Plot coolant and profile id
dataset3=dataset[c(1:1000),]
library(scales)
theme_set(theme_classic())
# Plot
ggplot(dataset3, aes(x=profile_id, y=coolant)) +
geom_point(col="blue", size=2) + # Draw points
geom_segment(aes(x=profile_id,
xend=profile_id,
y=min(coolant),
yend=max(coolant)),
linetype="dashed",col='red',
size=0.1) + # Draw dashed lines
labs(title="Dot Plot coolant and profile id") +
coord_flip()

The encircled points denoting the motors which are above average
dataset3=dataset[c(1:500),]
options(scipen = 999)
library(ggalt)
dataset3_select=dataset3[dataset3$motor_speed>mean(dataset3$motor_speed)
& dataset3$torque>mean(dataset3$torque)&
dataset3$i_q+dataset3$i_d>mean(dataset3$i_q+dataset3$i_d)
,]
# Plot
ggplot(dataset3, aes(x=i_d, y=motor_speed)) +
geom_point(aes(col=profile_id)) + # draw points
geom_encircle( aes(x=i_d, y=motor_speed),
data=dataset3_select,
color="red",
size=2,
expand=0.08) + # encircle
labs(y="motor_speed",
x="i_d",
title="The encircled points denoting the motors which are above average")

Treemap over profile id
library(treemapify)
library(ggplotify)
dataset2=dataset[sample(nrow(dataset)),]
dataset2$profile_id=as.numeric(as.character(dataset2$profile_id))
dataset2=dataset2[c(1:1000),]
ggplot(dataset2,aes(area=profile_id,fill=as.factor(profile_id),label=profile_id)) +
geom_treemap()+geom_treemap_text(fontface = "italic", colour = "white", place = "centre",grow = TRUE)+
labs(title="Treemap over profile id")

Hierarchical dendogram over profiles
composition=dataset %>% group_by(profile_id) %>%summarise(no_rows = length(profile_id))
com=composition
com$`profile name` <- com$profile_id
# create new column for profile names
com$motors_z <- round((com$no_rows - mean(com$no_rows))/sd(com$no_rows), 2) # compute normalized mpg
com$motors_type <- ifelse(com$motors_z < 0, "below", "above") # above / below avg flag
com <- com[order(com$motors_z), ] # sort
com$`profile name` <- factor(com$`profile name`, levels = com$`profile name`) # convert to factor to retain sorted order in plot.
dataset2=com
str(dataset2)
## Classes 'tbl_df', 'tbl' and 'data.frame': 52 obs. of 5 variables:
## $ profile_id : Factor w/ 52 levels "4","6","10","11",..: 17 18 23 22 35 30 4 14 49 20 ...
## $ no_rows : int 2179 2175 3725 6260 6249 7474 7886 8442 8444 10815 ...
## $ profile name: Factor w/ 52 levels "46","47","52",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ motors_z : num -1.65 -1.65 -1.5 -1.25 -1.25 -1.13 -1.09 -1.04 -1.04 -0.81 ...
## $ motors_type : chr "below" "below" "below" "below" ...
dd <- dist(scale(as.numeric(dataset2$profile_id)), method = "euclidean")
hc <- hclust(dd, method = "ward.D2")
library("ape")
plot(as.phylo(hc), type = "cladogram", cex = 0.6,
edge.color = "steelblue", edge.width = 2, edge.lty = 2,
tip.color = "steelblue")
title(main="Hierarchical dendogram over profiles")
