This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readxl);
library(tidyverse);
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(gapminder)
library(ggthemes)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
ob=read.csv("C:\\Users\\hntn\\OneDrive - Sun Hydraulics\\Hoa\\Ftu\\DATA ANALYSIS\\Dataset for TDTU workshop 4-2022\\obesity data.csv")
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2
ob = ob %>% mutate (sex=recode(gender, "F"=1, "M"=0))
ob$obese[ob$bmi<18.5]= "underweight"
ob$obese[ob$bmi>=18.5 & ob$bmi<25.0]= "normal"
ob$obese[ob$bmi>=25.0 & ob$bmi<30]= "overweight"
ob$obese[ob$bmi>=30]= "obese"
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat sex obese
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 1 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 0 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1 normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1 normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0 normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1 normal
ob=ob %>% mutate (obese1 = cut(bmi, breaks=c(0,18.5,25,30, Inf),labels=c("underweight","normal","overweight","obese")))
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat sex obese
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 1 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 0 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1 normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1 normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0 normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1 normal
## obese1
## 1 normal
## 2 normal
## 3 normal
## 4 normal
## 5 normal
## 6 normal
# ob$lean=ob$lean/1000
# ob$fat=ob$fat/1000
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat sex obese
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 1 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 0 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1 normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1 normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0 normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1 normal
## obese1
## 1 normal
## 2 normal
## 3 normal
## 4 normal
## 5 normal
## 6 normal
ob=ob %>% mutate(lean.kg=ob$lean/1000)
ob=ob %>% mutate (fat.kg=ob$fat/1000)
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat sex obese
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 1 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 0 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1 normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1 normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0 normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1 normal
## obese1 lean.kg fat.kg
## 1 normal 28.600 17.802
## 2 normal 40.229 8.381
## 3 normal 36.057 19.221
## 4 normal 33.094 17.472
## 5 normal 40.621 7.336
## 6 normal 30.068 14.904
##bIEU DO 1,2
p = ggplot (data=ob, aes(x=pcfat)) + geom_histogram(color="violet", fill="yellow")
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## BIEU DO 3
p = ggplot (data=ob, aes(x=pcfat))
p = p + geom_histogram(aes(y=..density..), col="blue")
p2= p + geom_density(col="red")
p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##BIEU DO 4
p = ggplot (data=ob, aes(x=pcfat))
p = p + geom_histogram(aes(y=..density..), col="blue")
p2= p + geom_density(col="red")
p2= p2 + ggtitle("Distribution of percent body fat") + xlab ("Percent body fat") + ylab ("Number of people")
p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
##BIEU DO 1 & 2
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat sex obese
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 1 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 0 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1 normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1 normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0 normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1 normal
## obese1 lean.kg fat.kg
## 1 normal 28.600 17.802
## 2 normal 40.229 8.381
## 3 normal 36.057 19.221
## 4 normal 33.094 17.472
## 5 normal 40.621 7.336
## 6 normal 30.068 14.904
p = ggplot (data=ob, aes(x=pcfat, fill=gender, color = gender))
p = p+ geom_histogram(aes(y=..density..),position="dodge", alpha = 1) + geom_density(aes(col=gender),alpha=0.01)
p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat sex obese
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 1 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 0 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1 normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1 normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0 normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1 normal
## obese1 lean.kg fat.kg
## 1 normal 28.600 17.802
## 2 normal 40.229 8.381
## 3 normal 36.057 19.221
## 4 normal 33.094 17.472
## 5 normal 40.621 7.336
## 6 normal 30.068 14.904
p = ggplot(data=ob, aes(x=obese, fill = obese))
p = p + geom_bar(position="dodge") + theme(legend.position="none")
p
##4.2 the hien tinh trang beo phi obese theo gioi tinh (gender)
head(ob)
## id gender height weight bmi age WBBMC wbbmd fat lean pcfat sex obese
## 1 1 F 150 49 21.8 53 1312 0.88 17802 28600 37.3 1 normal
## 2 2 M 165 52 19.1 65 1309 0.84 8381 40229 16.8 0 normal
## 3 3 F 157 57 23.1 64 1230 0.84 19221 36057 34.0 1 normal
## 4 4 F 156 53 21.8 56 1171 0.80 17472 33094 33.8 1 normal
## 5 5 M 160 51 19.9 54 1681 0.98 7336 40621 14.8 0 normal
## 6 6 F 153 47 20.1 52 1358 0.91 14904 30068 32.2 1 normal
## obese1 lean.kg fat.kg
## 1 normal 28.600 17.802
## 2 normal 40.229 8.381
## 3 normal 36.057 19.221
## 4 normal 33.094 17.472
## 5 normal 40.621 7.336
## 6 normal 30.068 14.904
temp = ob %>% count(obese,gender) %>% group_by (obese) %>% mutate(percent=n/sum(n)*100)
temp
## # A tibble: 8 x 4
## # Groups: obese [4]
## obese gender n percent
## <chr> <chr> <int> <dbl>
## 1 normal F 626 72.4
## 2 normal M 239 27.6
## 3 obese F 11 73.3
## 4 obese M 4 26.7
## 5 overweight F 149 64.8
## 6 overweight M 81 35.2
## 7 underweight F 76 71.0
## 8 underweight M 31 29.0
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.