R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

df = read.csv2("D:\\TAM DAN NON-ORTHO\\15. Non Ortho_SURVEY QUESTIONNAIRE  OF ORAL HYGIENE\\15. Non Ortho_SURVEY QUESTIONNAIRE  OF ORAL HYGIENE.csv")
library(lessR)
## Warning: package 'lessR' was built under R version 4.5.2
## 
## lessR 4.5                            feedback: gerbing@pdx.edu 
## --------------------------------------------------------------
## > d <- Read("")  Read data file, many formats available, e.g., Excel
##   d is the default data frame, data= in analysis routines optional
## 
## Many examples of reading, writing, and manipulating data, graphics,
## testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation to pivot tables.
##   Enter: browseVignettes("lessR")
## 
## View lessR updates, now including modern time series forecasting
##   and many, new Plotly interactive visualizations output. Most
##   visualization functions are now reorganized to three functions:
##      Chart(): type="bar", "pie", "radar", "bubble", "treemap", "icicle"
##      X(): type="histogram", "density", "vbs" and more
##      XY(): type="scatter" for a scatterplot, or "contour", "smooth"
##    Most previous function calls still work, such as:
##      BarChart(), Histogram, and Plot().
##   Enter: news(package="lessR"), or ?Chart, ?X, or ?XY
## There is also Flows() for Sankey flow diagrams, see ?Flows
## 
## Interactive data analysis for constructing visualizations.
##   Enter: interact()
library(labelled)
## Warning: package 'labelled' was built under R version 4.5.3
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lessR':
## 
##     order_by, recode, rename
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(writexl)
## Warning: package 'writexl' was built under R version 4.5.3
# ==============================================================================
# BƯỚC 0: TẢI THƯ VIỆN CẦN THIẾT
# ==============================================================================
library(dplyr)
library(labelled)

# ==============================================================================
# BƯỚC 1: MÃ HÓA TOÀN BỘ DỮ LIỆU TRONG 1 LỆNH DUY NHẤT
# ==============================================================================
df <- df %>%
  mutate(
    # --- 1. NHÂN KHẨU HỌC (DS) ---
    across(any_of("DS1"), ~ factor(., levels = c(1, 2), labels = c("Male", "Female"))),
    across(any_of("DS4"), ~ factor(., levels = c(1, 2), labels = c("Kinh", "Other"))),
    across(any_of("DS5"), ~ factor(., levels = c(0, 1, 2), labels = c("Hue City", "Urban", "Rural"))),
    across(any_of("DS6"), ~ factor(., levels = c(0, 1, 2, 3), labels = c("Parents house", "Relatives house", "Rented house", "Dormitory"))),
    across(any_of("DS7"), ~ factor(., levels = c(0:5), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Other"))),
    across(any_of("DS8"), ~ factor(., levels = c(0:6), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Housewife", "Other"))),
    across(any_of(c("DS9", "DS10")), ~ factor(., levels = c(0:4), labels = c("Primary", "Secondary", "High school", "Vocational diploma", "College/University/Postgraduate"))),
    
    # --- 2. NHÓM YES/NO CHUẨN (0 = No, 1 = Yes) ---
    across(
      c(any_of(c("TT31", "HV3", "HV6", "HV7", "HV8", "KT7", "KT8")),
        matches("^DS11|^DS12|^TT2|^HV11|^HV12|^HV13|^KT6|^KT9|^KT14|^KT15|^KT16")),
      ~ factor(., levels = c(0, 1), labels = c("No", "Yes"))
    ),
    
    # --- 3. TÌNH TRẠNG SỨC KHỎE (TT) ---
    across(any_of("TT1"), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Every 1-2 years", "Only when sick"))),
    across(any_of("TT34"), ~ factor(., levels = c(0, 1), labels = c("By it yourself", "Doctor prescribed"))),
    
    # --- 4. THÓI QUEN CHĂM SÓC (HV) ---
    across(any_of("HV1"), ~ factor(., levels = c(0:3), labels = c("Irregularly", "One", "Twice", "At least 3 times"))),
    across(any_of("HV2"), ~ factor(., levels = c(0:3), labels = c("Less than 1 minute", "1-3 minute", "More than 3 minute", "Until it feels clean"))),
    across(any_of("HV4"), ~ factor(., levels = c(0:2), labels = c("Soft bristles", "Hard bristles", "Any type"))),
    across(any_of("HV5"), ~ factor(., levels = c(0:3), labels = c("Every 3-6 months", "When bristles wear out", "When the handle breaks", "When new designs are available"))),
    across(any_of("HV9"), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Yes", "Don't remember"))),
    across(any_of("HV10"), ~ factor(., levels = c(0:3), labels = c("Under 6 months ago", "6-12 months ago", "1-2 years ago", "Over 2 years ago"))),
    across(any_of("HV14"), ~ factor(., levels = c(0:3), labels = c("Once per day", "More than once per day", "1-2 times per week", "Never"))),
    across(any_of("HV15"), ~ factor(., levels = c(0, 1, 2), labels = c("Never", "Yes", "Used to, but quit"))), # Khớp mã: 1:Yes, 2:Used to, 0:Never
    across(any_of("HV16"), ~ factor(., levels = c(0:2), labels = c("Occasionally", "1-5 cigarettes/day", "More than 5/day"))),
    across(any_of("HV17"), ~ factor(., levels = c(0:3), labels = c("Never", "Occasionally", "Weekly", "Daily"))),
    
    # --- 5. KIẾN THỨC (KT) ---
    across(any_of("KT1"), ~ factor(., levels = c(0:4), labels = c("Healthy gums", "Dental infection", "Calcium deficiency", "Gingivitis", "Don't know"))),
    across(any_of("KT2"), ~ factor(., levels = c(0:4), labels = c("Regular brushing/flossing", "Occasionally", "Vitamin C supplements", "Eating soft food", "Don't know"))),
    across(any_of(c("KT3", "KT4")), ~ factor(., levels = c(0:3), labels = c("Soft deposits on teeth", "Tooth discoloration", "Hard deposits", "Don't know"))),
    across(any_of("KT5"), ~ factor(., levels = c(1:4), labels = c("Gingivitis", "Tooth discoloration", "Cavities", "Don't know"))),
    across(any_of("KT10"), ~ factor(., levels = c(0:4), labels = c("Don't know", "1-3 months", "4-6 months", "7-12 months", "Over a year"))), # Khớp mã: 1,2,3,4,0
    across(any_of(c("KT11", "KT12")), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Yes", "Don't know"))),
    across(any_of("KT13"), ~ factor(., levels = c(0:3), labels = c("Don't know", "Every 6 months", "Once a year", "Every 2 years")))
  )

# ==============================================================================
# BƯỚC 2: GẮN NHÃN MÔ TẢ (LABELS) CHÍNH XÁC
# ==============================================================================
survey_labels <- list(
  # Thông tin chung
  DS1 = "Gender", DS2 = "Date of birth", DS3 = "Age", DS4 = "Ethnicity", DS5 = "Where are you from", DS6 = "Where do you currently live?",
  DS7 = "Father's occupation", DS75 = "Father's occupation (Other text)", DS8 = "Mother's occupation", DS85 = "Mother's occupation (Other text)", 
  DS9 = "Father's education level", DS10 = "Mother's education level",
  
  # Bệnh lý (TT)
  TT1 = "Do you regularly get health check-ups?", TT31 = "In the past 6 months, have you used any type of medication?", 
  TT34 = "Did you buy the medicine yourself or take it as prescribed by a doctor?",
  TT2A="Systemic diseases: None", TT2B="Systemic diseases: Asthma", TT2C="Systemic diseases: Allergy", TT2D="Systemic diseases: Diabetes",
  TT2E="Systemic diseases: Digestive disorders", TT2F="Systemic diseases: Kidney disease", TT2G="Systemic diseases: Liver disease",
  TT2H="Systemic diseases: Cardiovascular disease", TT2I="Systemic diseases: Hypertension", TT2J="Systemic diseases: Low blood pressure",
  TT2K="Systemic diseases: Blood disorders", TT2L="Systemic diseases: Thyroid disease", TT2M="Systemic diseases: Cancer", 
  TT2N="Systemic diseases: Osteoporosis", TT2O="Systemic diseases: Other",
  
  # Hành vi (HV)
  HV1 = "How many times a day do you brush your teeth?", HV2 = "How long do you brush each time?", HV3 = "Do you use toothpaste when brushing?",
  HV4 = "What kind of toothbrush do you use?", HV5 = "When do you replace your toothbrush?", HV6 = "Do you use toothpicks?",
  HV7 = "Do you use dental floss?", HV8 = "Do you use mouthwash?", HV9 = "Have you ever visited a dentist?", HV10 = "When was your last dentist visit?",
  HV14 = "How often do you eat sweets?", HV15 = "Do you smoke?", HV16 = "If you currently smoke, how do you smoke?", HV17 = "Do you drink alcohol?",
  
  # Kiến thức (KT)
  KT1 = "What does bleeding gums mean to you?", KT2 = "How can gingivitis be prevented?", KT3 = "What is dental plaque?", KT4 = "What is tartar (calculus)?",
  KT5 = "What can plaque lead to?", KT7 = "Do you think cavities affect a person's appearance?", KT8 = "Does oral health affect overall health?",
  KT10 = "How often should you replace your toothbrush?", KT11 = "Does fluoride help strengthen teeth?", KT12 = "Does flossing help prevent gum disease?", 
  KT13 = "How often should you have dental check-ups to prevent problems?"
)

# Cập nhật nhãn an toàn
var_label(df) <- survey_labels[names(survey_labels) %in% names(df)]

# ==============================================================================
# BƯỚC 3: GẮN NHÃN TỰ ĐỘNG CHO CÁC NHÓM BIẾN CHỌN NHIỀU ĐÁP ÁN (MULTIPLE CHOICES)
# ==============================================================================
for (col in names(df)) {
  if (grepl("^DS11", col)) var_label(df[[col]]) <- "Does your family have the following items?"
  if (grepl("^DS12", col)) var_label(df[[col]]) <- "In your family, does anyone have the following items?"
  if (grepl("^HV11", col)) var_label(df[[col]]) <- "Why did you visit the dentist?"
  if (grepl("^HV12", col)) var_label(df[[col]]) <- "Why haven't you seen a dentist in the past 2 years?"
  if (grepl("^HV13", col)) var_label(df[[col]]) <- "Which of the following foods do you often eat?"
  if (grepl("^KT6", col))  var_label(df[[col]]) <- "What causes cavities?"
  if (grepl("^KT9", col))  var_label(df[[col]]) <- "Why do we brush our teeth?"
  if (grepl("^KT14", col)) var_label(df[[col]]) <- "What is the best way to prevent cavities?"
  if (grepl("^KT15", col)) var_label(df[[col]]) <- "Which foods are beneficial for oral health?"
  if (grepl("^KT16", col)) var_label(df[[col]]) <- "What are the harms of smoking?"
}
# Tạo một bảng copy tạm thời để đổi tên tiêu đề
df_export <- df %>%
  # Lệnh này biến toàn bộ các "Nhãn dài" thành tên cột thực sự
  setNames(var_label(., unlist = TRUE))

# Sau đó xuất cái bảng tạm này ra Excel
write_xlsx(df_export, "D:\\TAM DAN - NON ORTHO (NEW)\\15\\15. Non Ortho_SURVEY QUESTIONNAIRE  OF ORAL HYGIENE.xlsx")