##Attaching package
library(dplyr)
library(tidyr)
library(readr)
library(outliers)
library(lubridate)
library(forecast)
library(data.table)
library(magrittr)
library(deducorrect)
library(editrules)
In Data Preprocessing ,firstly two datasets fighters and fights consisting of data about fighters and matches played by each fighter ,having 3569 instances , occurring since 1993 -2016 were loaded using readcsv function. On joining the two datasets, next we filtered the required variables and observations by subsetting them. The subset data was then transformed into appropriate dataformats and factors.Missing values and outliers were treated.And Scaling was performed on numeric data.
The datasets used comprises of UFC shows data from 1993 to 2016.This dataset is scraped from Sherdog.com.Datasets used are Fighters.csv and fights.csv.Fighters dataset consist of 1561 observations with 7 variables and Fights dataset consist of 3569 obs with 15 variables.
Total Variables used after subsetting: Eid: event id number Event_name: name of the event Event_date: date of event occurrence Fid: Fighter Id Method: Technique used by fighter (Decision, TKO, KO, NC(14 levels)) Round: No. of rounds Time: duration of Match Name: Name of the fighter Birth_date: Date of birth of fighter Height: Height of fighter Weight: Weight of fighter in pounds Class: type of fighter BMI: BMI of each fighter Age: current age of fighter Name: Fighter name Fighter: Sequence of fighter(F1/F2) Result: Outcome of match: Win/Lose/Draw/NC
fighters <- read.csv("C:/Users/adity/Desktop/ALL UFC FIGHTERS Sheet1.csv")
fights <- read.csv("C:/Users/adity/Desktop/ALL UFC FIGHTS Sheet2.csv")
head(fighters,5)
## url fid name
## 1 /fighter/Conor-McGregor-29688 29688 Conor McGregor
## 2 /fighter/Jon-Jones-27944 27944 Jon Jones
## 3 /fighter/Holly-Holm-75125 75125 Holly Holm
## 4 /fighter/Dominick-Cruz-12107 12107 Dominick Cruz
## 5 /fighter/Demetrious-Johnson-45452 45452 Demetrious Johnson
## nick birth_date height weight association
## 1 Notorious 7/14/1988 68 145 SBG Ireland
## 2 Bones 7/19/1987 76 205 Jackson-Wink MMA
## 3 The Preacher's Daughter 10/17/1981 68 135 Jackson-Wink MMA
## 4 The Dominator 9/3/1985 68 134 Alliance MMA
## 5 Mighty Mouse 8/13/1986 63 125 AMC Pankration
## class locality country
## 1 Featherweight Dublin Ireland
## 2 Light Heavyweight Rochester, New York United States
## 3 Bantamweight Albuquerque, New Mexico United States
## 4 Bantamweight San Diego, California United States
## 5 Flyweight Kirkland, Washington United States
head(fights,5)
## pageurl eid mid event_name
## 1 /events/UFC-1-The-Beginning-7 7 8 UFC 1 - The Beginning
## 2 /events/UFC-1-The-Beginning-7 7 7 UFC 1 - The Beginning
## 3 /events/UFC-1-The-Beginning-7 7 6 UFC 1 - The Beginning
## 4 /events/UFC-1-The-Beginning-7 7 5 UFC 1 - The Beginning
## 5 /events/UFC-1-The-Beginning-7 7 4 UFC 1 - The Beginning
## event_org event_date
## 1 Ultimate Fighting Championship 11/12/1993
## 2 Ultimate Fighting Championship 11/12/1993
## 3 Ultimate Fighting Championship 11/12/1993
## 4 Ultimate Fighting Championship 11/12/1993
## 5 Ultimate Fighting Championship 11/12/1993
## event_place f1pageurl
## 1 McNichols Arena, Denver, Colorado, United States /fighter/Royce-Gracie-19
## 2 McNichols Arena, Denver, Colorado, United States /fighter/Jason-DeLucia-22
## 3 McNichols Arena, Denver, Colorado, United States /fighter/Royce-Gracie-19
## 4 McNichols Arena, Denver, Colorado, United States /fighter/Gerard-Gordeau-15
## 5 McNichols Arena, Denver, Colorado, United States /fighter/Ken-Shamrock-4
## f2pageurl f1name f2name f1result f2result
## 1 /fighter/Gerard-Gordeau-15 Royce Gracie Gerard Gordeau win loss
## 2 /fighter/Trent-Jenkins-23 Jason DeLucia Trent Jenkins win loss
## 3 /fighter/Ken-Shamrock-4 Royce Gracie Ken Shamrock win loss
## 4 /fighter/Kevin-Rosier-17 Gerard Gordeau Kevin Rosier win loss
## 5 /fighter/Patrick-Smith-21 Ken Shamrock Patrick Smith win loss
## f1fid f2fid method method_d ref round time
## 1 19 15 Submission Rear-Naked Choke Helio Vigio 1 1:44
## 2 22 23 Submission Rear-Naked Choke Joao Alberto Barreto 1 0:52
## 3 19 4 Submission Rear-Naked Choke Helio Vigio 1 0:57
## 4 15 17 TKO Corner Stoppage Joao Alberto Barreto 1 0:59
## 5 4 21 Submission Heel Hook Helio Vigio 1 1:49
dim(fighters)
## [1] 1561 11
dim(fights)
## [1] 3569 20
str(fighters)
## 'data.frame': 1561 obs. of 11 variables:
## $ url : chr "/fighter/Conor-McGregor-29688" "/fighter/Jon-Jones-27944" "/fighter/Holly-Holm-75125" "/fighter/Dominick-Cruz-12107" ...
## $ fid : int 29688 27944 75125 12107 45452 73073 8390 2245 11506 38393 ...
## $ name : chr "Conor McGregor" "Jon Jones" "Holly Holm" "Dominick Cruz" ...
## $ nick : chr "Notorious" "Bones" "The Preacher's Daughter" "The Dominator" ...
## $ birth_date : chr "7/14/1988" "7/19/1987" "10/17/1981" "9/3/1985" ...
## $ height : int 68 76 68 68 63 66 76 71 67 66 ...
## $ weight : int 145 205 135 134 125 135 242 170 145 145 ...
## $ association: chr "SBG Ireland" "Jackson-Wink MMA" "Jackson-Wink MMA" "Alliance MMA" ...
## $ class : chr "Featherweight" "Light Heavyweight" "Bantamweight" "Bantamweight" ...
## $ locality : chr "Dublin" "Rochester, New York" "Albuquerque, New Mexico" "San Diego, California" ...
## $ country : chr "Ireland" "United States" "United States" "United States" ...
str(fights)
## 'data.frame': 3569 obs. of 20 variables:
## $ pageurl : chr "/events/UFC-1-The-Beginning-7" "/events/UFC-1-The-Beginning-7" "/events/UFC-1-The-Beginning-7" "/events/UFC-1-The-Beginning-7" ...
## $ eid : int 7 7 7 7 7 7 7 7 8 8 ...
## $ mid : int 8 7 6 5 4 3 2 1 15 14 ...
## $ event_name : chr "UFC 1 - The Beginning" "UFC 1 - The Beginning" "UFC 1 - The Beginning" "UFC 1 - The Beginning" ...
## $ event_org : chr "Ultimate Fighting Championship" "Ultimate Fighting Championship" "Ultimate Fighting Championship" "Ultimate Fighting Championship" ...
## $ event_date : chr "11/12/1993" "11/12/1993" "11/12/1993" "11/12/1993" ...
## $ event_place: chr "McNichols Arena, Denver, Colorado, United States" "McNichols Arena, Denver, Colorado, United States" "McNichols Arena, Denver, Colorado, United States" "McNichols Arena, Denver, Colorado, United States" ...
## $ f1pageurl : chr "/fighter/Royce-Gracie-19" "/fighter/Jason-DeLucia-22" "/fighter/Royce-Gracie-19" "/fighter/Gerard-Gordeau-15" ...
## $ f2pageurl : chr "/fighter/Gerard-Gordeau-15" "/fighter/Trent-Jenkins-23" "/fighter/Ken-Shamrock-4" "/fighter/Kevin-Rosier-17" ...
## $ f1name : chr "Royce Gracie" "Jason DeLucia" "Royce Gracie" "Gerard Gordeau" ...
## $ f2name : chr "Gerard Gordeau" "Trent Jenkins" "Ken Shamrock" "Kevin Rosier" ...
## $ f1result : chr "win" "win" "win" "win" ...
## $ f2result : chr "loss" "loss" "loss" "loss" ...
## $ f1fid : int 19 22 19 15 4 19 17 15 19 19 ...
## $ f2fid : int 15 23 4 17 21 20 18 16 21 33 ...
## $ method : chr "Submission" "Submission" "Submission" "TKO" ...
## $ method_d : chr "Rear-Naked Choke" "Rear-Naked Choke" "Rear-Naked Choke" "Corner Stoppage" ...
## $ ref : chr "Helio Vigio" "Joao Alberto Barreto" "Helio Vigio" "Joao Alberto Barreto" ...
## $ round : int 1 1 1 1 1 1 1 1 1 1 ...
## $ time : chr "1:44" "0:52" "0:57" "0:59" ...
#column names are displayed
names(fighters)
## [1] "url" "fid" "name" "nick" "birth_date"
## [6] "height" "weight" "association" "class" "locality"
## [11] "country"
names(fights)
## [1] "pageurl" "eid" "mid" "event_name" "event_org"
## [6] "event_date" "event_place" "f1pageurl" "f2pageurl" "f1name"
## [11] "f2name" "f1result" "f2result" "f1fid" "f2fid"
## [16] "method" "method_d" "ref" "round" "time"
The data has been taken from the following link: * https://github.com/jslucf/UFC-Fight-Card-Analysis/find/master
The Functions used to summarize the dataset at required intervals using str(), dim() and displaying names().The two datasets used comprises of numeric,factors, integers, and character variables.From structure summary we can see,columns event_date and birth_date werent in date type format.Also the dates were in mm/dd/yyyy format so using date converter function it was transformed into desirable format.Columns like class, method , mutated column result and Fighter was also converted to factor.
dim(fighters)
## [1] 1561 11
dim(fights)
## [1] 3569 20
str(fighters)
## 'data.frame': 1561 obs. of 11 variables:
## $ url : chr "/fighter/Conor-McGregor-29688" "/fighter/Jon-Jones-27944" "/fighter/Holly-Holm-75125" "/fighter/Dominick-Cruz-12107" ...
## $ fid : int 29688 27944 75125 12107 45452 73073 8390 2245 11506 38393 ...
## $ name : chr "Conor McGregor" "Jon Jones" "Holly Holm" "Dominick Cruz" ...
## $ nick : chr "Notorious" "Bones" "The Preacher's Daughter" "The Dominator" ...
## $ birth_date : chr "7/14/1988" "7/19/1987" "10/17/1981" "9/3/1985" ...
## $ height : int 68 76 68 68 63 66 76 71 67 66 ...
## $ weight : int 145 205 135 134 125 135 242 170 145 145 ...
## $ association: chr "SBG Ireland" "Jackson-Wink MMA" "Jackson-Wink MMA" "Alliance MMA" ...
## $ class : chr "Featherweight" "Light Heavyweight" "Bantamweight" "Bantamweight" ...
## $ locality : chr "Dublin" "Rochester, New York" "Albuquerque, New Mexico" "San Diego, California" ...
## $ country : chr "Ireland" "United States" "United States" "United States" ...
str(fights)
## 'data.frame': 3569 obs. of 20 variables:
## $ pageurl : chr "/events/UFC-1-The-Beginning-7" "/events/UFC-1-The-Beginning-7" "/events/UFC-1-The-Beginning-7" "/events/UFC-1-The-Beginning-7" ...
## $ eid : int 7 7 7 7 7 7 7 7 8 8 ...
## $ mid : int 8 7 6 5 4 3 2 1 15 14 ...
## $ event_name : chr "UFC 1 - The Beginning" "UFC 1 - The Beginning" "UFC 1 - The Beginning" "UFC 1 - The Beginning" ...
## $ event_org : chr "Ultimate Fighting Championship" "Ultimate Fighting Championship" "Ultimate Fighting Championship" "Ultimate Fighting Championship" ...
## $ event_date : chr "11/12/1993" "11/12/1993" "11/12/1993" "11/12/1993" ...
## $ event_place: chr "McNichols Arena, Denver, Colorado, United States" "McNichols Arena, Denver, Colorado, United States" "McNichols Arena, Denver, Colorado, United States" "McNichols Arena, Denver, Colorado, United States" ...
## $ f1pageurl : chr "/fighter/Royce-Gracie-19" "/fighter/Jason-DeLucia-22" "/fighter/Royce-Gracie-19" "/fighter/Gerard-Gordeau-15" ...
## $ f2pageurl : chr "/fighter/Gerard-Gordeau-15" "/fighter/Trent-Jenkins-23" "/fighter/Ken-Shamrock-4" "/fighter/Kevin-Rosier-17" ...
## $ f1name : chr "Royce Gracie" "Jason DeLucia" "Royce Gracie" "Gerard Gordeau" ...
## $ f2name : chr "Gerard Gordeau" "Trent Jenkins" "Ken Shamrock" "Kevin Rosier" ...
## $ f1result : chr "win" "win" "win" "win" ...
## $ f2result : chr "loss" "loss" "loss" "loss" ...
## $ f1fid : int 19 22 19 15 4 19 17 15 19 19 ...
## $ f2fid : int 15 23 4 17 21 20 18 16 21 33 ...
## $ method : chr "Submission" "Submission" "Submission" "TKO" ...
## $ method_d : chr "Rear-Naked Choke" "Rear-Naked Choke" "Rear-Naked Choke" "Corner Stoppage" ...
## $ ref : chr "Helio Vigio" "Joao Alberto Barreto" "Helio Vigio" "Joao Alberto Barreto" ...
## $ round : int 1 1 1 1 1 1 1 1 1 1 ...
## $ time : chr "1:44" "0:52" "0:57" "0:59" ...
names(fighters)
## [1] "url" "fid" "name" "nick" "birth_date"
## [6] "height" "weight" "association" "class" "locality"
## [11] "country"
names(fights)
## [1] "pageurl" "eid" "mid" "event_name" "event_org"
## [6] "event_date" "event_place" "f1pageurl" "f2pageurl" "f1name"
## [11] "f2name" "f1result" "f2result" "f1fid" "f2fid"
## [16] "method" "method_d" "ref" "round" "time"
#remove Unwanted columns
fights = fights[, -c(1,5,7,8,9)]
fighters = fighters[,-c(1,4,8,10)]
#convert fights table time to numeric , event date to date, method to factor
fights$method <- as.factor(fights$method)
fights$event_date<-as.Date(as.character(fights$event_date), format = "%m/%d/%Y")
fights$round<- as.factor(fights$round)
#duplicated dataframe
fighters_subset= fighters
#Cganging Data Type in fighter table
fighters_subset$birth_date <- as.Date(as.character(fighters_subset$birth_date), format= "%m/%d/%Y")
#fighters_subset$birth_date
#remove column country
fighters_subset <- fighters_subset[, -7]
fighters_subset$class <- as.factor(fighters_subset$class)
levels(fighters_subset$class)
## [1] "Atomweight" "Bantamweight" "Featherweight"
## [4] "Flyweight" "Heavyweight" "Light Heavyweight"
## [7] "Lightweight" "Middleweight" "N/A"
## [10] "Strawweight" "Super Heavyweight" "Welterweight"
str(fighters_subset)
## 'data.frame': 1561 obs. of 6 variables:
## $ fid : int 29688 27944 75125 12107 45452 73073 8390 2245 11506 38393 ...
## $ name : chr "Conor McGregor" "Jon Jones" "Holly Holm" "Dominick Cruz" ...
## $ birth_date: Date, format: "1988-07-14" "1987-07-19" ...
## $ height : int 68 76 68 68 63 66 76 71 67 66 ...
## $ weight : int 145 205 135 134 125 135 242 170 145 145 ...
## $ class : Factor w/ 12 levels "Atomweight","Bantamweight",..: 3 6 2 2 4 2 5 12 3 3 ...
Prior to joining two dataset and converting dataset from spread to gather or vice versa, we have used variables present in fighters dataset height and weight of fighter to mutate a new variable BMI. Also have used column Birth-Date of fighter to determine the Age of fighter.
#create a column BMI using height and weight and new column Age using Birth_date column
new_var <- mutate(fighters_subset, BMI = (((fighters_subset$weight/fighters_subset$height)/fighters_subset$height)*708))
#View(new_var)
new_var1 <- mutate(new_var, Age = as.integer(2020- year(fighters_subset$birth_date)))
head(new_var1,5)
## fid name birth_date height weight class BMI
## 1 29688 Conor McGregor 1988-07-14 68 145 Featherweight 22.20156
## 2 27944 Jon Jones 1987-07-19 76 205 Light Heavyweight 25.12812
## 3 75125 Holly Holm 1981-10-17 68 135 Bantamweight 20.67042
## 4 12107 Dominick Cruz 1985-09-03 68 134 Bantamweight 20.51730
## 5 45452 Demetrious Johnson 1986-08-13 63 125 Flyweight 22.29781
## Age
## 1 32
## 2 33
## 3 39
## 4 35
## 5 34
#rename column name for joining
names(fights)[9]<- paste("fid")
mergedfight <- fights %>% left_join(new_var1, by ="fid")
mergedfight<- mergedfight%>%select(eid,mid,event_name,event_date,f1name:fid,method,round:name,birth_date: Age)
dim(mergedfight)
## [1] 3685 19
str(mergedfight)
## 'data.frame': 3685 obs. of 19 variables:
## $ eid : int 7 7 7 7 7 7 7 7 7 7 ...
## $ mid : int 8 8 7 6 6 5 4 3 3 2 ...
## $ event_name: chr "UFC 1 - The Beginning" "UFC 1 - The Beginning" "UFC 1 - The Beginning" "UFC 1 - The Beginning" ...
## $ event_date: Date, format: "1993-11-12" "1993-11-12" ...
## $ f1name : chr "Royce Gracie" "Royce Gracie" "Jason DeLucia" "Royce Gracie" ...
## $ f2name : chr "Gerard Gordeau" "Gerard Gordeau" "Trent Jenkins" "Ken Shamrock" ...
## $ f1result : chr "win" "win" "win" "win" ...
## $ f2result : chr "loss" "loss" "loss" "loss" ...
## $ fid : int 19 19 22 19 19 15 4 19 19 17 ...
## $ method : Factor w/ 14 levels "Decision","DQ",..: 12 12 12 12 12 14 12 12 12 14 ...
## $ round : Factor w/ 5 levels "1","2","3","4",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ time : chr "1:44" "1:44" "0:52" "0:57" ...
## $ name : chr "Royce Gracie" "Royce Gracie" "Jason DeLucia" "Royce Gracie" ...
## $ birth_date: Date, format: "1966-12-12" "1966-12-12" ...
## $ height : int 72 72 71 72 72 77 73 72 72 76 ...
## $ weight : int 176 176 190 176 176 216 212 176 176 265 ...
## $ class : Factor w/ 12 levels "Atomweight","Bantamweight",..: 8 8 6 8 8 5 5 8 8 5 ...
## $ BMI : num 24 24 26.7 24 24 ...
## $ Age : int 54 54 51 54 54 65 56 54 54 58 ...
Explanation:Fights Dataset consisting the events details is untidy because the observations F1name, F2name,F1result and F2result were populated in the header of columns.Thus using the Tidy dataset definition from Hadley Wickhams principles values were allocated to each cell and each observation was given to its row.
mergedfight<-mergedfight%>% select(eid,mid,event_name,event_date,f1name:fid,method,round:name,birth_date: Age)
#subsetted by date
#tail(mergedfight, 1000)
subset_date<- mergedfight[mergedfight$event_date>"2013-01-01",]
#View(subset_date)
#tidy dataset
#f1name and f2name have fighter no and name and f1result have fighter result
names(subset_date)[5]<- paste("f1")
names(subset_date)[6]<- paste("f2")
tablet1<- subset_date %>% gather('f1','f2', key = "Fighter", value = "Name")
names(tablet1)[5]<- paste("F1")
names(tablet1)[6]<- paste("F2")
tablet2<- tablet1 %>% gather('F1','F2', key = "Fighter", value = "Result")
tablet2$Fighter <- as.factor(tablet2$Fighter)
tablet2$Result <- as.factor(tablet2$Result)
#subsetting data having events recorded after mentioned date
tablet3<- tablet2[tablet2$event_date > "2015-01-01", ]
#match id was removed since its not required
tablet3 <- tablet3[,-2]
head(tablet3, 5)
## eid event_name event_date fid method round time
## 915 38841 UFC 182 - Jones vs. Cormier 2015-01-03 27944 Decision 5 5:00
## 916 38841 UFC 182 - Jones vs. Cormier 2015-01-03 15105 Decision 3 5:00
## 917 38841 UFC 182 - Jones vs. Cormier 2015-01-03 33095 Decision 3 5:00
## 918 38841 UFC 182 - Jones vs. Cormier 2015-01-03 64413 Decision 3 5:00
## 919 38841 UFC 182 - Jones vs. Cormier 2015-01-03 11292 NC 3 5:00
## name birth_date height weight class BMI Age
## 915 Jon Jones 1987-07-19 76 205 Light Heavyweight 25.12812 33
## 916 Donald Cerrone 1983-03-29 71 155 Lightweight 21.76949 37
## 917 Brad Tavares 1987-12-21 71 185 Middleweight 25.98294 33
## 918 Kyoji Horiguchi 1990-10-12 65 125 Flyweight 20.94675 30
## 919 Hector Lombard 1978-02-02 69 170 Welterweight 25.28040 42
## Name Fighter Result
## 915 Jon Jones F1 win
## 916 Donald Cerrone F1 win
## 917 Brad Tavares F1 win
## 918 Kyoji Horiguchi F1 win
## 919 Hector Lombard F1 NC
Explanation: After joining two datasets, we created two variables BMI and AGE. But there are few missing values present in various columns. Thus using which() function locations were found and mean was calculated for the values having Nan values. After treating missing values, dataset was checked with special values using is.finite() and is.special().
#find missing values
#is.na(tablet3)
#sum(is.na(tablet3))
colSums(is.na(tablet3))
## eid event_name event_date fid method round time
## 0 0 0 0 0 0 0
## name birth_date height weight class BMI Age
## 12 68 28 12 12 28 68
## Name Fighter Result
## 0 0 0
#missing values present in various columns so treat them
tablet3$height<- as.numeric(tablet3$height)
tablet3$weight<- as.numeric(tablet3$weight)
tablet3$height[which(is.na(tablet3$height))] <- mean(tablet3$height, na.rm = TRUE)
tablet3$weight[which(is.na(tablet3$weight))] <- mean(tablet3$weight, na.rm = TRUE)
tablet3$BMI[which(is.na(tablet3$BMI))] <- mean(tablet3$BMI, na.rm = TRUE)
tablet3$Age[which(is.na(tablet3$Age))] <- mean(tablet3$Age, na.rm = TRUE)
colSums(is.na(tablet3))
## eid event_name event_date fid method round time
## 0 0 0 0 0 0 0
## name birth_date height weight class BMI Age
## 12 68 0 0 12 0 0
## Name Fighter Result
## 0 0 0
tablet3_duplicate <- tablet3
#find special values
which(!is.finite(tablet3$Age))
## integer(0)
which(!is.finite(tablet3$fid))
## integer(0)
which(!is.finite(tablet3$BMI))
## integer(0)
#sapply(tablet3,is.special)
#sapply(tablet3,is.na)
levels(tablet3$Result)
## [1] "draw" "loss" "NC" "win"
levels(tablet3$Fighter)
## [1] "F1" "F2"
Explanation:In this section, outliers present in numeric values of dataset are checked.As shown below, we examined height, weight, BMI and Age variable.Dataset has some outliers which are identified and treated using capping technique.
#outliers
library(outliers)
cap <- function (x){
quantiles <- quantile( x, c( .05 , 0.25 , 0.75 , .95) )
x[ x < quantiles[2] - 1.5 * IQR(x) ]<- quantiles[1]
x[ x > quantiles[3] +1.5*IQR(x) ] <- quantiles[4]
x
}
tablet3$height <- tablet3$height %>% cap()
par(mfrow = c(1,2))
boxplot(tablet3_duplicate$height)
boxplot(tablet3$height)
tablet3$weight <- tablet3$weight %>% cap()
par(mfrow = c(1,2))
boxplot(tablet3_duplicate$weight)
boxplot(tablet3$weight)
tablet3$BMI <- tablet3$BMI%>% cap()
par(mfrow = c(1,2))
boxplot(tablet3_duplicate$BMI)
boxplot(tablet3$BMI)
tablet3$Age <- tablet3$Age %>% cap()
par(mfrow = c(1,2))
boxplot(tablet3_duplicate$Age)
boxplot(tablet3$Age)
#Transform:
Explanation: Data Transformation is performed on dataset to achieve normality.Using logarithmic transformation on BMI the right skewed distribution reduced.but from the square-root transformation we we removed the skewedness from the data and normalized it.
#normalise
par(mfrow = c(1,2))
hist(tablet3$BMI, breaks = 10, col = blues9, main = "BMI Before Transformation")
hist(sqrt(tablet3$BMI), breaks= 10, col = blues9, main = "BMI After Transformation")
par(mfrow = c(1,2))
hist(tablet3$weight, breaks = 10, col = blues9, main = "Weight Before Transformation")
hist(log10(tablet3$weight), breaks= 10, col = blues9, main = "Weight After Transformation")