R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(readxl);
library(tidyverse);
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(magrittr)
## 
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
## 
##     set_names
## The following object is masked from 'package:tidyr':
## 
##     extract
library(gapminder)
library(ggthemes)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00
ob=read.csv("C:\\Users\\hntn\\OneDrive - Sun Hydraulics\\Hoa\\Ftu\\DATA ANALYSIS\\Dataset for TDTU workshop 4-2022\\obesity data.csv")
head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2
ob = ob %>% mutate (sex=recode(gender, "F"=1, "M"=0))
ob$obese[ob$bmi<18.5]= "underweight"
ob$obese[ob$bmi>=18.5 & ob$bmi<25.0]= "normal"
ob$obese[ob$bmi>=25.0 & ob$bmi<30]= "overweight"
ob$obese[ob$bmi>=30]= "obese"

head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat sex  obese
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3   1 normal
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8   0 normal
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0   1 normal
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8   1 normal
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8   0 normal
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2   1 normal
ob=ob %>% mutate (obese1 = cut(bmi, breaks=c(0,18.5,25,30, Inf),labels=c("underweight","normal","overweight","obese")))
head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat sex  obese
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3   1 normal
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8   0 normal
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0   1 normal
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8   1 normal
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8   0 normal
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2   1 normal
##   obese1
## 1 normal
## 2 normal
## 3 normal
## 4 normal
## 5 normal
## 6 normal
# ob$lean=ob$lean/1000
# ob$fat=ob$fat/1000
head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat sex  obese
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3   1 normal
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8   0 normal
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0   1 normal
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8   1 normal
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8   0 normal
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2   1 normal
##   obese1
## 1 normal
## 2 normal
## 3 normal
## 4 normal
## 5 normal
## 6 normal
ob=ob %>% mutate(lean.kg=ob$lean/1000)
ob=ob %>% mutate (fat.kg=ob$fat/1000)

head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat sex  obese
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3   1 normal
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8   0 normal
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0   1 normal
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8   1 normal
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8   0 normal
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2   1 normal
##   obese1 lean.kg fat.kg
## 1 normal  28.600 17.802
## 2 normal  40.229  8.381
## 3 normal  36.057 19.221
## 4 normal  33.094 17.472
## 5 normal  40.621  7.336
## 6 normal  30.068 14.904

TASK3: SOAN BIEU DO PHAN BO BIEN SO DUNG GGPLOT

##bIEU DO 1,2

p = ggplot (data=ob, aes(x=pcfat)) + geom_histogram(color="violet", fill="yellow")

p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## BIEU DO 3

p = ggplot (data=ob, aes(x=pcfat))
p = p + geom_histogram(aes(y=..density..), col="blue")
p2= p + geom_density(col="red")
p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##BIEU DO 4

p = ggplot (data=ob, aes(x=pcfat))
p = p + geom_histogram(aes(y=..density..), col="blue")
p2= p + geom_density(col="red")
p2= p2 + ggtitle("Distribution of percent body fat") + xlab ("Percent body fat") + ylab ("Number of people")
p2
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

VE BIEU DO PHAN BO TI TRONG MO pcfat THEO GIOI TINH

##BIEU DO 1 & 2

head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat sex  obese
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3   1 normal
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8   0 normal
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0   1 normal
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8   1 normal
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8   0 normal
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2   1 normal
##   obese1 lean.kg fat.kg
## 1 normal  28.600 17.802
## 2 normal  40.229  8.381
## 3 normal  36.057 19.221
## 4 normal  33.094 17.472
## 5 normal  40.621  7.336
## 6 normal  30.068 14.904
p = ggplot (data=ob, aes(x=pcfat, fill=gender, color = gender))

p = p+ geom_histogram(aes(y=..density..),position="dodge", alpha = 1) + geom_density(aes(col=gender),alpha=0.01)

p
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

TASK4: SOAN BIEU DO THANH : THE HIEN TINH TRANG PHAN BO CUA BEO PHI BANG BIEU DO THANH

head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat sex  obese
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3   1 normal
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8   0 normal
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0   1 normal
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8   1 normal
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8   0 normal
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2   1 normal
##   obese1 lean.kg fat.kg
## 1 normal  28.600 17.802
## 2 normal  40.229  8.381
## 3 normal  36.057 19.221
## 4 normal  33.094 17.472
## 5 normal  40.621  7.336
## 6 normal  30.068 14.904
p = ggplot(data=ob, aes(x=obese, fill = obese))

p = p + geom_bar(position="dodge") + theme(legend.position="none")

p

##4.2 the hien tinh trang beo phi obese theo gioi tinh (gender)

head(ob)
##   id gender height weight  bmi age WBBMC wbbmd   fat  lean pcfat sex  obese
## 1  1      F    150     49 21.8  53  1312  0.88 17802 28600  37.3   1 normal
## 2  2      M    165     52 19.1  65  1309  0.84  8381 40229  16.8   0 normal
## 3  3      F    157     57 23.1  64  1230  0.84 19221 36057  34.0   1 normal
## 4  4      F    156     53 21.8  56  1171  0.80 17472 33094  33.8   1 normal
## 5  5      M    160     51 19.9  54  1681  0.98  7336 40621  14.8   0 normal
## 6  6      F    153     47 20.1  52  1358  0.91 14904 30068  32.2   1 normal
##   obese1 lean.kg fat.kg
## 1 normal  28.600 17.802
## 2 normal  40.229  8.381
## 3 normal  36.057 19.221
## 4 normal  33.094 17.472
## 5 normal  40.621  7.336
## 6 normal  30.068 14.904
temp = ob %>% count(obese,gender) %>% group_by (obese) %>% mutate(percent=n/sum(n)*100)

temp
## # A tibble: 8 x 4
## # Groups:   obese [4]
##   obese       gender     n percent
##   <chr>       <chr>  <int>   <dbl>
## 1 normal      F        626    72.4
## 2 normal      M        239    27.6
## 3 obese       F         11    73.3
## 4 obese       M          4    26.7
## 5 overweight  F        149    64.8
## 6 overweight  M         81    35.2
## 7 underweight F         76    71.0
## 8 underweight M         31    29.0

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.