Data import and Gender Comparison

library(tidyverse)
library(ggplot2)
setwd("C:/R Study")
obesity <-read.csv("Obesity.csv")
head(obesity,10)
##    ID Age Gender Height Weight  BMI         Label
## 1   1  25   Male    175     80 25.3 Normal Weight
## 2   2  30 Female    160     60 22.5 Normal Weight
## 3   3  35   Male    180     90 27.3    Overweight
## 4   4  40 Female    150     50 20.0   Underweight
## 5   5  45   Male    190    100 31.2         Obese
## 6   6  50 Female    140     40 16.7   Underweight
## 7   7  55   Male    200    110 34.2         Obese
## 8   8  60 Female    130     30 13.3   Underweight
## 9   9  65   Male    210    120 37.2         Obese
## 10 10  70 Female    120     20 10.0   Underweight
#Correlation analysis of each variable 

matrics_cor<-cor(obesity[,c("Age","Height","Weight","BMI")], use="complete.obs")
matrics_cor
##                Age      Height    Weight       BMI
## Age     1.00000000 -0.07689606 0.4651065 0.4741851
## Height -0.07689606  1.00000000 0.4288899 0.3543398
## Weight  0.46510648  0.42888995 1.0000000 0.9728293
## BMI     0.47418510  0.35433984 0.9728293 1.0000000
# Bar plot for Gender
ggplot(obesity,aes(x=Gender)) +
  geom_bar()

#Bar Plot for Gender among obese and overweight
obesity_heavy <- subset(obesity,obesity$Label != "Underweight" & obesity$Label != "Normal Weight")


ggplot(obesity_heavy,aes(x=Gender)) +
  geom_bar()

#BMI

# BMI Top 10, 9 of 10 are Male with height of 190+
sort_BMI <- obesity[order(-obesity$BMI),]
head(sort_BMI,10)
##    ID Age Gender Height Weight  BMI      Label
## 9   9  65   Male    210    120 37.2      Obese
## 29 30 108   Male    210    120 37.2      Obese
## 7   7  55   Male    200    110 34.2      Obese
## 19 20  58   Male    210    110 34.2      Obese
## 27 28  98   Male    200    110 34.2      Obese
## 5   5  45   Male    190    100 31.2      Obese
## 17 17  48   Male    200    100 31.2      Obese
## 25 26  88   Male    190    100 31.2      Obese
## 37 39  59   Male    210    115 30.8      Obese
## 38 40  64 Female    120     95 29.1 Overweight
#Box plot for BMI
ggplot(obesity,aes(x=Label,y=BMI))+
  geom_boxplot()

#Summary by Group
group_summary <- obesity %>% 
  group_by(Label) %>% 
  summarize(
    mean_BMI = mean(BMI, na.rm=TRUE),
    median_BMI=median(BMI, na.rm=TRUE),
    sd_BMI = sd(BMI, na.rm=TRUE)
  )

print(group_summary)
## # A tibble: 4 × 4
##   Label         mean_BMI median_BMI sd_BMI
##   <chr>            <dbl>      <dbl>  <dbl>
## 1 Normal Weight     22.8       22.7   1.34
## 2 Obese             32.3       31.2   3.01
## 3 Overweight        26.6       27     1.08
## 4 Underweight       13.6       13.3   4.96
cor(obesity$BMI,obesity$Weight)
## [1] 0.9728293

##Weight against Age, Colored by Obesity Label

#males<-obesity_heavy[obesity_heavy$Gender=="Male",]

#View(males)


ggplot(obesity, aes(x=Age, y=Weight))+
  geom_point(aes(colour=Label))+
  scale_x_continuous(breaks=seq(0,60, by=10))

ggplot(obesity, aes(x=Height, y=BMI))+
  geom_point(aes(colour=Label))+
  scale_x_continuous(breaks=seq(140,190,by=10))

# Conclusion : 1) BMI is closely related to obesity 2)Male-to-Female Obesity ratio is 24:8 3) height 150 - 180 shows lowest obesity rate 4) However, the size of orginal dataset used is small,which limits the accuracy of the analysis.