md = read.csv("Osteo data.csv")
md$bmig[md$bmi<18.5]="underweight"
md$bmig[md$bmi>=18.5 & md$bmi<25.0]="normal"
md$bmig[md$bmi>25.0]="overweight"
head(md)
## id lean.mass fat.mass pcfat age height weight bmi osta osteo osteo.group
## 1 1 27.98 16.49 37.09 76 156.0 45.0 18.5 6.2 2 Osteoporosis
## 2 8 29.02 27.54 48.70 54 153.0 56.0 23.9 -0.4 1 Osteopenia
## 3 21 31.72 20.65 39.43 56 158.2 51.5 20.6 0.9 1 Osteopenia
## 4 38 35.96 21.96 37.92 54 154.0 51.0 21.5 0.6 1 Osteopenia
## 5 39 35.00 26.29 42.89 60 159.5 60.0 23.6 0.0 1 Osteopenia
## 6 53 32.58 19.82 37.82 53 156.0 51.0 21.0 0.4 1 Osteopenia
## bmig
## 1 normal
## 2 normal
## 3 normal
## 4 normal
## 5 normal
## 6 normal
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.6
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.2.0 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
# Them cot so dong
# Tim diem du lieu ngoai lai theo weight va bmig
dl_nlai <- md |>
group_by(bmig) |>
mutate(
q1 = quantile(weight, 0.25),
q3 = quantile(weight, 0.75),
IQR = q3 - q1,
h_q1=quantile(height, 0.25),
h_q3=quantile(height, 0.75),
h_IQR=h_q3-h_q1
) |>
filter(
weight < q1 - 1.5 * IQR |
weight > q3 + 1.5 * IQR |
height<h_q1-1.5*h_IQR |
height>h_q3+1.5*h_IQR
)
# Hien thi cac diem ngoai lai
ggplot(md, aes(x = weight, y = height, colour = bmig)) +
geom_point(alpha = 0.5, size = 2) +
# Ve diem ngoai lai (to + dam)
geom_point(
data = dl_nlai,
aes(x = weight, y = height),
color = "darkblue",
size = 4,
shape = 21, #kieu hinh: tron co vievien
stroke = 1.5 # do day vien
) +
geom_text( #hien thi gia tri ngoai lailai
data = dl_nlai,
aes(
x = weight,
y = height,
label = paste0("weight: ", weight, "\nheight: ", height)
),
vjust = -1, #vi tri chu theo truc doc: -1 chu nam tren diemdiem
size = 2,
color = "black"
) +
# DUONG XU HUONG
geom_smooth(
method = "lm", #dang duong thang
se = FALSE,
linewidth = 1
) +
#scale_x_continuous(breaks=seq(from=30, to=200, by=5 ))+ #thay doi truc tung/hoanh
scale_color_manual(values = c("red", "lightblue", "lightgreen")) +
labs(
x = "Can nang",
y = "Chieu cao",
title = "Bieu do phan tan chieu cao – can nang theo BMI (ngoai lai)"
) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
