Obesity is a disease involving too much body fat that can have potential negative effects on health. Such as increasing the risk of heart disease, high blood pressure, high cholesterol, liver disease, sleep apnea, and certain cancers. It is normally determined by Body Mass Index (BMI) which is calculated by weight in kg divided by height in meters squared. \[ \begin{align*} BMI = weight(kg) / height(meters)^2 \end{align*} \]
Dataset citation: Estimation of Obesity Levels Based On Eating Habits and Physical Condition [Dataset]. (2019). UCI Machine Learning Repository. https://doi.org/10.24432/C5H31Z
library(tidyverse)
library(plotly)
library(ggplot2)
library(glue)
obesity <- read.csv("ObesityDataSet.csv", stringsAsFactors = TRUE)
names(obesity) <- c("Gender", "Age", "Height", "Weight", "Family_History_Overweight", "High_Caloric_Food",
"Vegetables", "Main_Meals", "Food_Between_Meals", "Smoke", "Water_Daily", "Calories_Monitor",
"Physical_Activities", "Technologic_Devices", "Alcohol", "Transportation", "Obesity")
obesity <- obesity %>% mutate_at(vars(Vegetables, Main_Meals, Water_Daily, Physical_Activities,
Technologic_Devices), list(~round(.,0)))
summary(obesity)
## Gender Age Height Weight
## Female:1043 Min. :14.00 Min. :1.450 Min. : 39.00
## Male :1068 1st Qu.:19.95 1st Qu.:1.630 1st Qu.: 65.47
## Median :22.78 Median :1.700 Median : 83.00
## Mean :24.31 Mean :1.702 Mean : 86.59
## 3rd Qu.:26.00 3rd Qu.:1.768 3rd Qu.:107.43
## Max. :61.00 Max. :1.980 Max. :173.00
##
## Family_History_Overweight High_Caloric_Food Vegetables Main_Meals
## no : 385 no : 245 Min. :1.000 Min. :1.000
## yes:1726 yes:1866 1st Qu.:2.000 1st Qu.:3.000
## Median :2.000 Median :3.000
## Mean :2.423 Mean :2.688
## 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :3.000 Max. :4.000
##
## Food_Between_Meals Smoke Water_Daily Calories_Monitor
## Always : 53 no :2067 Min. :1.000 no :2015
## Frequently: 242 yes: 44 1st Qu.:2.000 yes: 96
## no : 51 Median :2.000
## Sometimes :1765 Mean :2.015
## 3rd Qu.:2.000
## Max. :3.000
##
## Physical_Activities Technologic_Devices Alcohol
## Min. :0.000 Min. :0.0000 Always : 1
## 1st Qu.:0.000 1st Qu.:0.0000 Frequently: 70
## Median :1.000 Median :1.0000 no : 639
## Mean :1.007 Mean :0.6646 Sometimes :1401
## 3rd Qu.:2.000 3rd Qu.:1.0000
## Max. :3.000 Max. :2.0000
##
## Transportation Obesity
## Automobile : 457 Insufficient_Weight:272
## Bike : 7 Normal_Weight :287
## Motorbike : 11 Obesity_Type_I :351
## Public_Transportation:1580 Obesity_Type_II :297
## Walking : 56 Obesity_Type_III :324
## Overweight_Level_I :290
## Overweight_Level_II:290
obesity_cor <- obesity %>%
select(c(Obesity, Height, Weight))
plot_obcor <- ggplot(data = obesity_cor, mapping = aes(x = Height, y = Weight,
col = Obesity)) +
geom_point(aes(col = Obesity)) +
geom_smooth(method=lm , color="black", se=FALSE, formula = y~x) +
labs(title = list(text = paste0('Height vs Weight Separated by Type')),
x = "Height (meters)",
y = "Weight (kg)"
) +
theme(legend.title = element_blank(),
plot.title = element_text(face = "bold"))
ggplotly(plot_obcor, tooltip = "text")
obesity_gen <- obesity %>%
select(c(Gender, Obesity)) %>%
group_by(Gender, Obesity) %>%
summarise(total = n()) %>%
mutate(label = glue("Total : {total}"))
## `summarise()` has grouped output by 'Gender'. You can override using the
## `.groups` argument.
plot_gen <- ggplot(data = obesity_gen, aes(x = Obesity, y = total,
fill = Gender, text = label)) +
geom_col(position = "dodge") +
labs(title = list(text = paste0('Obesity Type vs Gender')),
x = "Gender", y = "Total") +
theme(legend.title = element_blank(),
plot.title = element_text(face = "bold")) +
coord_flip()
ggplotly(plot_gen, tooltip = "text")
obesity$bmi <- obesity$Weight/(obesity$Height)**2
obesity$bmi <- round(obesity$bmi, 2)
obesity_bmi <- obesity %>%
select(c(bmi, Age, Gender)) %>% mutate(label = glue("BMI : {bmi}
Age : {Age}
Gender : {Gender}"))
plot_bmi <- ggplot(data = obesity_bmi, aes(x = Age, y = bmi, fill = Gender,
text = label)) +
geom_point(aes(col = Gender),
col ="white") +
labs(title = list(text = paste0('BMI vs Age')),
x = "Age",
y = "BMI") +
theme(legend.title = element_blank(),
plot.title = element_text(face = "bold"))
ggplotly(plot_bmi, tooltip = "text")
library(shiny)
library(ggplot2)
library(dplyr)
“shiny”, “ggplot2”, and “dplyr” libraries will be be imported for the Shiny app.
# Define UI for application that draws a regression line with a scatter plot
ui <- fluidPage(
# Application title
titlePanel("Obesity Analysis"),
# Sidebar with a numeric input for weight and slider input for height
sidebarLayout(
sidebarPanel(
numericInput("weight", "Weight in kg: ", min = 0, max = 700,
value = 0, step = 1),
sliderInput("height", "Height in meters: ", min = 0, max = 3,
value = 0, step = 0.1),
actionButton("goButton", "Submit")
),
# Show a plot with data points and regression line
mainPanel(
tabsetPanel(
tabPanel("Plot", plotOutput("regPlot", click = "plot_click"), verbatimTextOutput("info")),
tabPanel("Summary", verbatimTextOutput("regSummary"))
),
textOutput("BMI_output"),
textOutput("body_type")
)
),
)
server <- function(input, output) {
# Import data frame
df <- read.csv("ObesityDataSet.csv")
# Calculate BMI
BMI_output <- reactive({
req(input$goButton)
input$weight / (input$height)^2
})
# Determine body type
body_type <- reactive({
req(input$goButton)
bmi <- BMI_output()
if (bmi < 18.5) {
return("Underweight")
}
else if (bmi >= 18.5 & bmi < 25) {
return("Normal Weight")
}
else if (bmi >= 25 & bmi < 30 ) {
return("Overweight")
}
else if (bmi >= 30 & bmi < 35) {
return("Obese Type I")
}
else if (bmi >= 35 & bmi < 40) {
return("Obese Type II")
}
else {
return("Obese Type III")
}
})
# Create regression line formula
model <- reactive({
req(input$goButton)
lm(Weight ~ Height, data = df)
})
# Display calculated BMI
output$BMI_output <- renderText({
paste0("Your BMI is: ", round(BMI_output(), 2))
})
# Display determined body type
output$body_type <- renderText({
paste0("Your body type is: ", body_type())
})
# Display scatter plot and regression line
output$regPlot <- renderPlot({
req(model())
ggplot(df, aes(x = Height, y = Weight, color = Gender)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, aes(color = Gender))
})
# Show clicked coordinates
output$info <- renderText({
paste0("Clicked Coordinates Info:", "\nHeight = ",
round(as.numeric(input$plot_click$x), digits = 2),
"\nWeight = ", round(as.numeric(input$plot_click$y), digits = 2))
})
# Show summary of regression line formula
output$regSummary <- renderPrint({
req(model())
summary(model())
})
}