library(tidyverse)
library(ggplot2)
setwd("C:/R Study")
obesity <-read.csv("Obesity.csv")
head(obesity,10)
## ID Age Gender Height Weight BMI Label
## 1 1 25 Male 175 80 25.3 Normal Weight
## 2 2 30 Female 160 60 22.5 Normal Weight
## 3 3 35 Male 180 90 27.3 Overweight
## 4 4 40 Female 150 50 20.0 Underweight
## 5 5 45 Male 190 100 31.2 Obese
## 6 6 50 Female 140 40 16.7 Underweight
## 7 7 55 Male 200 110 34.2 Obese
## 8 8 60 Female 130 30 13.3 Underweight
## 9 9 65 Male 210 120 37.2 Obese
## 10 10 70 Female 120 20 10.0 Underweight
#Correlation analysis of each variable
matrics_cor<-cor(obesity[,c("Age","Height","Weight","BMI")], use="complete.obs")
matrics_cor
## Age Height Weight BMI
## Age 1.00000000 -0.07689606 0.4651065 0.4741851
## Height -0.07689606 1.00000000 0.4288899 0.3543398
## Weight 0.46510648 0.42888995 1.0000000 0.9728293
## BMI 0.47418510 0.35433984 0.9728293 1.0000000
# Bar plot for Gender
ggplot(obesity,aes(x=Gender)) +
geom_bar()
#Bar Plot for Gender among obese and overweight
obesity_heavy <- subset(obesity,obesity$Label != "Underweight" & obesity$Label != "Normal Weight")
ggplot(obesity_heavy,aes(x=Gender)) +
geom_bar()
#BMI
# BMI Top 10, 9 of 10 are Male with height of 190+
sort_BMI <- obesity[order(-obesity$BMI),]
head(sort_BMI,10)
## ID Age Gender Height Weight BMI Label
## 9 9 65 Male 210 120 37.2 Obese
## 29 30 108 Male 210 120 37.2 Obese
## 7 7 55 Male 200 110 34.2 Obese
## 19 20 58 Male 210 110 34.2 Obese
## 27 28 98 Male 200 110 34.2 Obese
## 5 5 45 Male 190 100 31.2 Obese
## 17 17 48 Male 200 100 31.2 Obese
## 25 26 88 Male 190 100 31.2 Obese
## 37 39 59 Male 210 115 30.8 Obese
## 38 40 64 Female 120 95 29.1 Overweight
#Box plot for BMI
ggplot(obesity,aes(x=Label,y=BMI))+
geom_boxplot()
#Summary by Group
group_summary <- obesity %>%
group_by(Label) %>%
summarize(
mean_BMI = mean(BMI, na.rm=TRUE),
median_BMI=median(BMI, na.rm=TRUE),
sd_BMI = sd(BMI, na.rm=TRUE)
)
print(group_summary)
## # A tibble: 4 × 4
## Label mean_BMI median_BMI sd_BMI
## <chr> <dbl> <dbl> <dbl>
## 1 Normal Weight 22.8 22.7 1.34
## 2 Obese 32.3 31.2 3.01
## 3 Overweight 26.6 27 1.08
## 4 Underweight 13.6 13.3 4.96
cor(obesity$BMI,obesity$Weight)
## [1] 0.9728293
##Weight against Age, Colored by Obesity Label
#males<-obesity_heavy[obesity_heavy$Gender=="Male",]
#View(males)
ggplot(obesity, aes(x=Age, y=Weight))+
geom_point(aes(colour=Label))+
scale_x_continuous(breaks=seq(0,60, by=10))
ggplot(obesity, aes(x=Height, y=BMI))+
geom_point(aes(colour=Label))+
scale_x_continuous(breaks=seq(140,190,by=10))
# Conclusion : 1) BMI is closely related to obesity 2)Male-to-Female
Obesity ratio is 24:8 3) height 150 - 180 shows lowest obesity rate 4)
However, the size of orginal dataset used is small,which limits the
accuracy of the analysis.