md = read.csv("Osteo data.csv")
md$bmig[md$bmi<18.5]="underweight"
md$bmig[md$bmi>=18.5 & md$bmi<25.0]="normal"
md$bmig[md$bmi>25.0]="overweight"
head(md)
##   id lean.mass fat.mass pcfat age height weight  bmi osta osteo  osteo.group
## 1  1     27.98    16.49 37.09  76  156.0   45.0 18.5  6.2     2 Osteoporosis
## 2  8     29.02    27.54 48.70  54  153.0   56.0 23.9 -0.4     1   Osteopenia
## 3 21     31.72    20.65 39.43  56  158.2   51.5 20.6  0.9     1   Osteopenia
## 4 38     35.96    21.96 37.92  54  154.0   51.0 21.5  0.6     1   Osteopenia
## 5 39     35.00    26.29 42.89  60  159.5   60.0 23.6  0.0     1   Osteopenia
## 6 53     32.58    19.82 37.82  53  156.0   51.0 21.0  0.4     1   Osteopenia
##     bmig
## 1 normal
## 2 normal
## 3 normal
## 4 normal
## 5 normal
## 6 normal
library(ggplot2)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.6
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.2.0     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)


# Them cot so dong

# Tim diem du lieu ngoai lai theo weight va bmig
dl_nlai <- md |>
  group_by(bmig) |>
  mutate(
    q1 = quantile(weight, 0.25),
    q3 = quantile(weight, 0.75),
    IQR = q3 - q1, 
    
    
    h_q1=quantile(height, 0.25),
    h_q3=quantile(height, 0.75),
    h_IQR=h_q3-h_q1
  ) |>
  filter(
    weight < q1 - 1.5 * IQR |
    weight > q3 + 1.5 * IQR |
    height<h_q1-1.5*h_IQR |
    height>h_q3+1.5*h_IQR 
    
  )

# Hien thi cac diem ngoai lai

ggplot(md, aes(x = weight, y = height, colour = bmig)) +
  geom_point(alpha = 0.5, size = 2) +

  # Ve diem ngoai lai (to + dam)
  geom_point(
    data = dl_nlai,
    aes(x = weight, y = height),
    color = "darkblue",
    size = 4,
    shape = 21, #kieu hinh: tron co vievien
    stroke = 1.5 # do day vien 
  ) +
  geom_text( #hien thi gia tri ngoai lailai
    data = dl_nlai,
    aes(
      x = weight,
      y = height,
      label = paste0("weight: ", weight, "\nheight: ", height)

    ),
    vjust = -1, #vi tri chu theo truc doc: -1 chu nam tren diemdiem
    size = 2,
    color = "black"
  ) +

  # DUONG XU HUONG
  geom_smooth(
    method = "lm", #dang duong thang 
    se = FALSE,
    linewidth = 1
  ) +

  #scale_x_continuous(breaks=seq(from=30, to=200, by=5 ))+ #thay doi truc tung/hoanh
  scale_color_manual(values = c("red", "lightblue", "lightgreen")) +
  labs(
    x = "Can nang",
    y = "Chieu cao",
    title = "Bieu do phan tan chieu cao – can nang theo BMI (ngoai lai)"
  ) +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'