R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

df = read.csv2("D:\\TAM DAN NON-ORTHO\\15. Non Ortho_SURVEY QUESTIONNAIRE  OF ORAL HYGIENE\\Edit 15. Non Ortho_SURVEY QUESTIONNAIRE  OF ORAL HYGIENE.csv")
library(lessR)
## Warning: package 'lessR' was built under R version 4.5.2
## 
## lessR 4.5                            feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is the default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, graphics,
## testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation to pivot tables.
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including modern time series forecasting
##   and many, new Plotly interactive visualizations output. Most
##   visualization functions are now reorganized to three functions:
##      Chart(): type="bar", "pie", "radar", "bubble", "treemap", "icicle"
##      X(): type="histogram", "density", "vbs" and more
##      XY(): type="scatter" for a scatterplot, or "contour", "smooth"
##    Most previous function calls still work, such as:
##      BarChart(), Histogram, and Plot().
##   Enter: news(package="lessR"), or ?Chart, ?X, or ?XY
## There is also Flows() for Sankey flow diagrams, see ?Flows
## 
## Interactive data analysis for constructing visualizations.
##   Enter: interact()
library(labelled)
## Warning: package 'labelled' was built under R version 4.5.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lessR':
## 
##     order_by, recode, rename
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(writexl)
## Warning: package 'writexl' was built under R version 4.5.3
# ==============================================================================
# BƯỚC 1: MÃ HÓA TOÀN BỘ DỮ LIỆU TRONG 1 LỆNH MUTATE DUY NHẤT
# ==============================================================================
df <- df %>%
  mutate(
    # --- 1. NHÓM GIỚI TÍNH & NHÂN KHẨU HỌC ---
    across(any_of("Gender"), ~ factor(., levels = c(0, 1), labels = c("Male", "Female"))),
    across(any_of("DS4"), ~ factor(., levels = c(1, 2), labels = c("Kinh", "Other"))),
    across(any_of("DS5"), ~ factor(., levels = c(0, 1, 2), labels = c("Hue City", "Urban", "Rural"))),
    across(any_of("DS6"), ~ factor(., levels = c(0, 1, 2, 3), labels = c("Parents house", "Relatives house", "Rented house", "Dormitory"))),
    across(any_of("DS7"), ~ factor(., levels = c(0:5), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Other"))),
    across(any_of("DS8"), ~ factor(., levels = c(0:6), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Housewife", "Other"))),
    across(any_of(c("DS9", "DS10")), ~ factor(., levels = c(0:4), labels = c("Primary", "Secondary", "High school", "Vocational diploma", "College/University/Postgraduate"))),
    
    # --- 2. NHÓM KHỔNG LỒ YES/NO (0 = No, 1 = Yes) ---
    across(
      c(any_of(c("B1", "B2", "B3", "B4", "B5", "C3", "C4", "C5", "D3", "D14A", "D14B", "D14C", "D15A", "D15B", "D17", "E9A", "E9B", "E9C", "E9D",
                 "FA6K_R", "FA6K_L", "FA6E_R", "FA6E_L", "FA6F_R", "FA6F_L", "FA6T_R", "FA6T_L", "HFSKs", "SCR1",
                 "RLS1", "RLS2", "RLS3", "MB1", "MB2", "MB3", "MB4", "MB5", "MB6", "MB7", "MB8", "MB9", "MB10", "JH1", "JH2", "JH3", "JH4", "JH5",
                 "TS1.1", "TS1.2", "TS1.3", "TS1.4", "TS1.5", "TS1.6", "TS1.7", "TS1.8", "TS2.1", "TS2.2", "TS2.3", "TS2.4", "TS2.5", "TS2.6", "TS2.7", "TS2.8", "TS3", "TS4",
                 "PB7A", "PB7B", "PB7C", "PB7D", "PB7E", "PB7F", "PB8A", "PB8B", "PB8C", "PB8D", "BE8",
                 "A1", "A41", "A42", "A43", "A44", "A5", "A71", "A72", "A73", "A74", "A8", "A81", "A82", "A83", "A9", "A91", "A92", "A93",
                 "A10", "A101", "A102", "A103", "A11", "A111", "A112", "A113", "A12", "A121", "A122", "A123", "A13", "A131", "A132", "A133", "A14", "A141", "A142", "A143",
                 "HV3", "HV6", "HV7", "HV8", "KT7", "TT31", "TT34")),
        matches("^DS11|^DS12|^TT2|^HV11|^HV12|^HV13|^KT6|^KT9|^KT14|^KT15|^KT16")),
      ~ factor(., levels = c(0, 1), labels = c("No", "Yes"))
    ),
    
    # --- 3. NHÓM YES/NO/DON'T KNOW (0 = No, 1 = Yes, 2 = Don't know) ---
    across(
      any_of(c("PSQ1", "PSQ2", "PSQ3", "PSQ4", "PSQ5", "PSQ6A", "PSQ6B", "PSQ7", "PSQ8", "PSQ9", "PSQ10", "PSQ11", "PSQ12", "PSQ13", "PSQ14", "PSQ15", "PSQ16", "PSQ17", "PSQ18", "PSQ19", "PSQ20", "PSQ21", "PSQ22",
               "BE1", "BE4", "BE10", "KT8", "KT11", "KT12")),
      ~ factor(., levels = c(0, 1, 2), labels = c("No", "Yes", "Don't know"))
    ),
    
    # --- 4. NHÓM SỨC KHỎE VÀ THÓI QUEN (HV, TT, OBC/VM) ---
    across(any_of("TT1"), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Every 1-2 years", "Only when sick"))),
    across(any_of("HV1"), ~ factor(., levels = c(0:3), labels = c("Irregularly", "Once", "Twice", "At least 3 times"))),
    across(any_of("HV2"), ~ factor(., levels = c(0:3), labels = c("Less than 1 minute", "1-3 minutes", "More than 3 minutes", "Until it feels clean"))),
    across(any_of("HV4"), ~ factor(., levels = c(0:2), labels = c("Soft bristles", "Hard bristles", "Any type"))),
    across(any_of("HV5"), ~ factor(., levels = c(0:3), labels = c("Every 3-6 months", "When bristles wear out", "When the handle breaks", "When new designs are available"))),
    across(any_of("HV9"), ~ factor(., levels = c(0:2), labels = c("No", "Yes", "Don't remember"))),
    across(any_of("HV10"), ~ factor(., levels = c(0:3), labels = c("Under 6 months ago", "6-12 months ago", "1-2 years ago", "Over 2 years ago"))),
    across(any_of("HV14"), ~ factor(., levels = c(0:3), labels = c("Once per day", "More than once per day", "1-2 times per week", "Never"))),
    across(any_of("HV15"), ~ factor(., levels = c(0:2), labels = c("Never", "Yes", "Used to, but quit"))),
    across(any_of("HV16"), ~ factor(., levels = c(0:2), labels = c("Occasionally", "1-5 cigarettes/day", "More than 5/day"))),
    across(any_of("HV17"), ~ factor(., levels = c(0:3), labels = c("Never", "Occasionally", "Weekly", "Daily"))),
    
    # VM1-VM2 (Tần suất ban đêm)
    across(any_of(c("VM1", "VM2")), ~ factor(., levels = c(0:4), labels = c("None", "<1 Night/month", "1-3 Nights/month", "1-3 Nights/week", "4-7 Nights/week"))),
    # VM3 (Tần suất ban ngày)
    across(any_of("VM3"), ~ factor(., levels = c(0:4), labels = c("None", "Rarely", "Occasionally", "Frequently", "Always"))),
    # VM4-VM21 (Tần suất hành vi ban ngày)
    across(any_of(paste0("VM", 4:21)), ~ factor(., levels = c(0:4), labels = c("None of time", "A little of the time", "Some of the time", "Most of the time", "All of the time"))),
    
    # --- 5. NHÓM KIẾN THỨC (KT) ---
    across(any_of("KT1"), ~ factor(., levels = c(0:4), labels = c("Healthy gums", "Dental infection", "Calcium deficiency", "Gingivitis", "Don't know"))),
    across(any_of("KT2"), ~ factor(., levels = c(0:4), labels = c("Regular brushing/flossing", "Occasionally", "Vitamin C supplements", "Eating soft food", "Don't know"))),
    across(any_of(c("KT3", "KT4")), ~ factor(., levels = c(0:3), labels = c("Soft deposits on teeth", "Tooth discoloration", "Hard deposits", "Don't know"))),
    across(any_of("KT5"), ~ factor(., levels = c(1:4), labels = c("Gingivitis", "Tooth discoloration", "Cavities", "Don't know"))),
    across(any_of("KT10"), ~ factor(., levels = c(0:4), labels = c("Don't know", "1-3 months", "4-6 months", "7-12 months", "Over a year"))),
    across(any_of("KT13"), ~ factor(., levels = c(0:3), labels = c("Don't know", "Every 6 months", "Once a year", "Every 2 years"))),
    
    # --- 6. NHÓM GIẤC NGỦ VÀ ĐAU (ESS, PB, BE, A3) ---
    across(any_of(paste0("ESS", 1:8)), ~ factor(., levels = c(0:3), labels = c("Would never doze", "Slight chance of dozing", "Moderate chance of dozing", "High chance of dozing"))),
    across(any_of(c("PB9A", "PB9B", "PB9C", "PB9D", "PB9E", "PB9F", "PB9G", "PB9H", "PB9I", "PB10", "PB11", "PB12")), 
           ~ factor(., levels = c(0:3), labels = c("Not during the past month", "Less than one a week", "One or twice a week", "Three or more times a week"))),
    across(any_of(c("BE3", "BE6", "BE7", "BE9")), ~ factor(., levels = c(0:4), labels = c("Nearly every day", "3-4 times a week", "1-2 times a week", "1-2 times a month", "Never or nearly never"))),
    across(any_of("PB13"), ~ factor(., levels = c(0:3), labels = c("No problem at all", "Only a very slight problem", "Somewhat of a problem", "A very big problem"))),
    across(any_of("PB14"), ~ factor(., levels = c(0:3), labels = c("Very good", "Fairly good", "Fairly bad", "Very bad"))),
    across(any_of("PB15"), ~ factor(., levels = c(0:3), labels = c("Never", "Occasionally", "Often", "Always"))),
    across(any_of("BE2"), ~ factor(., levels = c(0:3), labels = c("Slightly louder than breathing", "As loud as talking", "Louder than talking", "Very loud - can be heard in adjacent rooms"))),
    across(any_of("BE5"), ~ factor(., levels = c(0:4), labels = c("Almost every day", "3-4 times per week", "1-2 times per week", "1-2 times per month", "Rarely or never"))),
    across(any_of("A3"), ~ factor(., levels = c(0:2), labels = c("No pain", "Pain comes and goes", "Constant pain"))),
    
    # --- 7. NHÓM LÂM SÀNG NHA KHOA (Clinical: Răng, Mảng bám, Nướu, Túi nha chu) ---
    # Răng cơ bản (A11-A48) hoặc 160 bề mặt răng
    across(
      matches("^A[1-4][1-8]$|.*_Occlusal_Incisal$|.*_Occlisal_Incisal$|.*_Buccal$|.*_Lingua$|.*_Mesia$|.*_Distal$"),
      ~ factor(., levels = c(0:9), labels = c("Normal", "Tooth decay", "Filled, with decay", "Filled, no decay", "Missing due to decay", "Missing for another reason", "Fissure sealant", "Fix dental prosthesis/crown, abutment, veneer", "Unerupted", "Not recorded"))
    ),
    # Plaque Index (PI) & Calculus (CI)
    across(
      starts_with("PI") | starts_with("CI") | matches(".*_Disto$|.*_Buc$|.*_Mesio$|.*_Lin$"), # Bao hàm cả mảng bám và nướu
      ~ factor(., levels = c(0:3, 88, 888, 99, 999), labels = c("Score 0", "Score 1", "Score 2", "Score 3", "Not recorded", "Not recorded", "Missing tooth", "Missing tooth"))
    ),
    # CPI
    across(
      matches("_Bleed.*|_Periop.*"),
      ~ factor(., levels = c(0:4, 88), labels = c("Healthy", "Bleeding observed", "Calculus detected", "Pocket 4-5mm", "Pocket 6mm+", "Not recorded"))
    )
  )

# ==============================================================================
# BƯỚC 2: GẮN NHÃN MÔ TẢ (LABELS) TĨNH CHO CÁC BIẾN CỐ ĐỊNH
# ==============================================================================
# Lọc chỉ giữ lại các nhãn cho những biến THỰC SỰ TỒN TẠI trong dataframe hiện tại
label_list <- list(
  Gender = "Gender", DS3 = "Age", DS4 = "Ethnicity", DS5 = "Where are you from", DS6 = "Where do you currently live?",
  DS7 = "Father's occupation", DS8 = "Mother's occupation", DS9 = "Father's education level", DS10 = "Mother's education level",
  TT1 = "Do you regularly get health check-ups?", TT31 = "In the past 6 months, have you used any type of medication?", 
  TT34 = "Did you buy the medicine yourself or take it as prescribed by a doctor?",
  TT2A="Systemic diseases: None", TT2B="Systemic diseases: Asthma", TT2C="Systemic diseases: Allergy", TT2D="Systemic diseases: Diabetes",
  TT2E="Systemic diseases: Digestive disorders", TT2F="Systemic diseases: Kidney disease", TT2G="Systemic diseases: Liver disease",
  TT2H="Systemic diseases: Cardiovascular disease", TT2I="Systemic diseases: Hypertension", TT2K="Systemic diseases: Blood disorders",
  TT2L="Systemic diseases: Thyroid disease", TT2M="Systemic diseases: Cancer", TT2O="Systemic diseases: Other",
  TT2P="Systemic diseases: Low blood pressure/Osteoporosis", TT2Q="Systemic diseases: Low blood pressure/Osteoporosis",
  HV1 = "How many times a day do you brush your teeth?", HV2 = "How long do you brush each time?", HV3 = "Do you use toothpaste when brushing?",
  HV4 = "What kind of toothbrush do you use?", HV5 = "When do you replace your toothbrush?", HV6 = "Do you use toothpicks?",
  HV7 = "Do you use dental floss?", HV8 = "Do you use mouthwash?", HV9 = "Have you ever visited a dentist?", HV10 = "When was your last dentist visit?",
  HV14 = "How often do you eat sweets?", HV15 = "Do you smoke?", HV16 = "If you currently smoke, how do you smoke?", HV17 = "Do you drink alcohol?",
  KT1 = "What does bleeding gums mean to you?", KT2 = "How can gingivitis be prevented?", KT3 = "What is dental plaque?", KT4 = "What is tartar (calculus)?",
  KT5 = "What can plaque lead to?", KT7 = "Do you think cavities affect a person's appearance?", KT8 = "Does oral health affect overall health?",
  KT10 = "How often should you replace your toothbrush?", KT11 = "Does fluoride help strengthen teeth?", KT12 = "Does flossing help prevent gum disease?", KT13 = "How often should you have dental check-ups?"
)
# Cập nhật nhãn an toàn (tránh lỗi Can't find variable)
var_label(df) <- label_list[names(label_list) %in% names(df)]

# ==============================================================================
# BƯỚC 3: VÒNG LẶP GẮN NHÃN TỰ ĐỘNG CHO HÀNG TRĂM BIẾN (MULTIPLE CHOICES & CLINICAL)
# ==============================================================================
for (col in names(df)) {
  # Tự động hóa Multiple Choices
  if (grepl("^DS11", col)) var_label(df[[col]]) <- "Does your family have the following items?"
  if (grepl("^DS12", col)) var_label(df[[col]]) <- "In your family, does anyone have the following items?"
  if (grepl("^HV11", col)) var_label(df[[col]]) <- "Why did you visit the dentist?"
  if (grepl("^HV12", col)) var_label(df[[col]]) <- "Why haven't you seen a dentist in the past 3 years?"
  if (grepl("^HV13", col)) var_label(df[[col]]) <- "Which of the following foods do you often eat?"
  if (grepl("^KT6", col))  var_label(df[[col]]) <- "What causes cavities?"
  if (grepl("^KT9", col))  var_label(df[[col]]) <- "Why do we brush our teeth?"
  if (grepl("^KT14", col)) var_label(df[[col]]) <- "What is the best way to prevent cavities?"
  if (grepl("^KT15", col)) var_label(df[[col]]) <- "Which foods are beneficial for oral health?"
  if (grepl("^KT16", col)) var_label(df[[col]]) <- "What are the harms of smoking?"
  
  # Tự động hóa các mặt răng (Plaque, Gingival, Clinical Surfaces)
  if (grepl("_Occl[u|i]sal_Incisal$|_Buccal$|_Lingua$|_Mesia$|_Distal$|_Disto$|_Buc$|_Mesio$|_Lin$|_Bleed.*|_Periop.*", col)) {
    new_label <- col %>%
      gsub("_Occl[u|i]sal_Incisal", " Occlusal/Incisal Surface", .) %>% 
      gsub("_Buccal|_Buc", " Buccal Surface", .) %>%
      gsub("_Lingua|_Lin", " Lingual Surface", .) %>% 
      gsub("_Mesia|_Mesio", " Mesial Surface", .) %>%   
      gsub("_Distal|_Disto", " Distal Surface", .) %>%
      gsub("_Bleed.*", " Bleeding (CPI)", .) %>%
      gsub("_Periop.*", " Periodontal Pocket (CPI)", .) %>%
      paste("Tooth", .)
    var_label(df[[col]]) <- new_label
  }
}

# Hoàn tất! Bảng 'df' của bạn giờ đã sạch sẽ và sẵn sàng để phân tích.
# Bạn có thể xuất file Excel bằng lệnh sau nếu muốn:
# library(writexl)
# write_xlsx(df, "Master_Cleaned_Data.xlsx")
# Tạo một bảng copy tạm thời để đổi tên tiêu đề
df_export <- df %>%
  # Lệnh này biến toàn bộ các "Nhãn dài" thành tên cột thực sự
  setNames(var_label(., unlist = TRUE))

# Sau đó xuất cái bảng tạm này ra Excel
write_xlsx(df_export, "D:\\TAM DAN - NON ORTHO (NEW)\\15\\15. Non Ortho_SURVEY QUESTIONNAIRE  OF ORAL HYGIENE.xlsx")