Практическое занятие: индекс массы тела

Олеся Волченко

9 сентября 2020

Что в этом скрипте?

Загружаем базу данных, отбираем нужные переменные

переменные: страна, пол, рост, вес, возраст, тип населенного пункта

#install.packages("foreign")
library(foreign)
ess7 <- read.spss("/Users/olesyavolchenko/Yandex.Disk.localized/datafiles/ESS/ESS7e02_2.sav", use.value.labels = T, to.data.frame = T)
#install.packages(dplyr)
library(dplyr)
ess71 <- dplyr::select(ess7, c(cntry, height, weight, gndr, agea, domicil))

Рассчитаем индекс массы тела

ess71$weight <- as.numeric(as.character(ess71$weight))
ess71$height <- as.numeric(as.character(ess71$height))
summary(ess71$weight)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   30.00   63.50   74.00   74.86   84.80  195.00    1162
summary(ess71$height)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    76.0   164.0   170.0   170.5   178.0   210.0     434
glimpse(ess71)
## Rows: 40,185
## Columns: 6
## $ cntry   <fct> Austria, Austria, Austria, Austria, Austria, Austria, Austria…
## $ height  <dbl> 188, 162, 158, 176, 167, 160, 186, 173, 168, 164, 168, 170, 1…
## $ weight  <dbl> 90, 90, 60, 84, 71, 62, 102, 69, 56, 102, 64, 67, 75, 95, 57,…
## $ gndr    <fct> Male, Male, Female, Male, Female, Female, Male, Female, Femal…
## $ agea    <fct> 51, 67, 89, 32, 56, 67, 66, 67, 34, 66, 61, 55, 79, 38, 35, 4…
## $ domicil <fct> Farm or home in countryside, Farm or home in countryside, Far…
ess71$bmi <- ess71$weight/(ess71$height/100)^2 # it works now
summary(ess71$bmi)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   10.52   22.48   25.14   25.69   28.09  141.97    1313
hist(ess71$bmi)

mean(ess71$bmi)
## [1] NA
mean(ess71$bmi, na.rm = T)
## [1] 25.69089
median(ess71$bmi, na.rm = T)
## [1] 25.14286

Категории BMI

Category BMI (kg/m2)
Very severely underweight <15
Severely underweight 15-16
Underweight 16-18.5
Normal (healthy weight) 18.5-25
Overweight 25-30
Obese Class I (Moderately obese) 30-35
Obese Class II (Severely obese) 35-40
Obese Class III (Very severely obese) >40

Задача - узнать, распределение по группам bmi

ess71$bmi_cat <- cut(ess71$bmi, c(15, 16, 18.5, 25, 30, 35, 40))
table(ess71$bmi_cat) # абсолютные значения
## 
##   (15,16] (16,18.5] (18.5,25]   (25,30]   (30,35]   (35,40] 
##        47       929     17997     13909      4539      1052
table(ess71$bmi_cat) / nrow(ess71) # доли
## 
##     (15,16]   (16,18.5]   (18.5,25]     (25,30]     (30,35]     (35,40] 
## 0.001169591 0.023118079 0.447853677 0.346124176 0.112952594 0.026178922
table(ess71$bmi_cat) / nrow(ess71)*100 # проценты
## 
##    (15,16]  (16,18.5]  (18.5,25]    (25,30]    (30,35]    (35,40] 
##  0.1169591  2.3118079 44.7853677 34.6124176 11.2952594  2.6178922

Типы графиков

для визуализации распределения одной переменной:

hist(ess71$bmi)

barplot(table(ess71$bmi_cat))

Типы графиков

для визуализации совместного распределения двух переменных:

plot(ess71$bmi ~ ess71$domicil)

plot(ess71$bmi ~ ess71$domicil, horizontal = T)

ess71$agea <- as.numeric(as.character(ess71$agea))
plot(ess71$bmi ~ ess71$agea)

plot(ess71$gndr ~ ess71$domicil)

plot(ess71$domicil ~ ess71$gndr)

Задача: узнать страну с наибольшими проблемами с лишним весом

bmibycountry <- aggregate(ess71$bmi ~ ess71$cntry, FUN = mean)
bmibycountry
##       ess71$cntry ess71$bmi
## 1         Austria  25.45691
## 2         Belgium  25.10576
## 3     Switzerland  24.57902
## 4         Czechia  25.97555
## 5         Germany  25.92778
## 6         Denmark  25.03615
## 7         Estonia  26.02689
## 8           Spain  25.69651
## 9         Finland  26.12081
## 10         France  24.93149
## 11 United Kingdom  26.32321
## 12        Hungary  26.23544
## 13        Ireland  25.49591
## 14         Israel  25.65554
## 15      Lithuania  26.06954
## 16    Netherlands  25.46776
## 17         Norway  25.38933
## 18         Poland  25.74144
## 19       Portugal  26.10650
## 20         Sweden  25.50314
## 21       Slovenia  26.28850
names(bmibycountry) <- c("country", "bmi")
bmibycountry[order(bmibycountry$bmi),]
##           country      bmi
## 3     Switzerland 24.57902
## 10         France 24.93149
## 6         Denmark 25.03615
## 2         Belgium 25.10576
## 17         Norway 25.38933
## 1         Austria 25.45691
## 16    Netherlands 25.46776
## 13        Ireland 25.49591
## 20         Sweden 25.50314
## 14         Israel 25.65554
## 8           Spain 25.69651
## 18         Poland 25.74144
## 5         Germany 25.92778
## 4         Czechia 25.97555
## 7         Estonia 26.02689
## 15      Lithuania 26.06954
## 19       Portugal 26.10650
## 9         Finland 26.12081
## 12        Hungary 26.23544
## 21       Slovenia 26.28850
## 11 United Kingdom 26.32321
bmibycountry[order(bmibycountry$bmi, decreasing = T),]
##           country      bmi
## 11 United Kingdom 26.32321
## 21       Slovenia 26.28850
## 12        Hungary 26.23544
## 9         Finland 26.12081
## 19       Portugal 26.10650
## 15      Lithuania 26.06954
## 7         Estonia 26.02689
## 4         Czechia 25.97555
## 5         Germany 25.92778
## 18         Poland 25.74144
## 8           Spain 25.69651
## 14         Israel 25.65554
## 20         Sweden 25.50314
## 13        Ireland 25.49591
## 16    Netherlands 25.46776
## 1         Austria 25.45691
## 17         Norway 25.38933
## 2         Belgium 25.10576
## 6         Denmark 25.03615
## 10         France 24.93149
## 3     Switzerland 24.57902

Задача со звездочкой: сделать карту

library(rworldmap)
## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type :   vignette('rworldmap')
mapdata <- joinCountryData2Map(bmibycountry,
                               joinCode = "NAME",
                               nameJoinColumn = "country", verbose = T)
## 20 codes from your data successfully matched countries in the map
## 1 codes from your data failed to match with a country code in the map
##      failedCodes failedCountries
## [1,] NA          "Czechia"      
## 223 codes from the map weren't represented in your data
bmibycountry$country <- as.character(bmibycountry$country)

bmibycountry$country[bmibycountry$country == "Czechia"] <- "Czech Republic"
mapdata <- joinCountryData2Map(bmibycountry,
                               joinCode = "NAME",
                               nameJoinColumn = "country", verbose = T)
## 21 codes from your data successfully matched countries in the map
## 0 codes from your data failed to match with a country code in the map
##      failedCodes failedCountries
## 222 codes from the map weren't represented in your data
mapCountryData(mapdata, nameColumnToPlot = "bmi", xlim = c(-20, 59),
               ylim = c(35, 71))

Если мы хотим дальше более подробно работать с данными по Соединённому Королевству

unique(ess71$cntry)
##  [1] Austria        Belgium        Switzerland    Czechia        Germany       
##  [6] Denmark        Estonia        Spain          Finland        France        
## [11] United Kingdom Hungary        Ireland        Israel         Lithuania     
## [16] Netherlands    Norway         Poland         Portugal       Sweden        
## [21] Slovenia      
## 21 Levels: Austria Belgium Switzerland Czechia Germany Denmark ... Slovenia
ess71uk <- ess71[ess71$cntry == "United Kingdom" ,]