This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
df = read.csv2("D:\\TAM DAN NON-ORTHO\\15. Non Ortho_SURVEY QUESTIONNAIRE OF ORAL HYGIENE\\Edit 15. Non Ortho_SURVEY QUESTIONNAIRE OF ORAL HYGIENE.csv")
library(lessR)
## Warning: package 'lessR' was built under R version 4.5.2
##
## lessR 4.5 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is the default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data, graphics,
## testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation to pivot tables.
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including modern time series forecasting
## and many, new Plotly interactive visualizations output. Most
## visualization functions are now reorganized to three functions:
## Chart(): type="bar", "pie", "radar", "bubble", "treemap", "icicle"
## X(): type="histogram", "density", "vbs" and more
## XY(): type="scatter" for a scatterplot, or "contour", "smooth"
## Most previous function calls still work, such as:
## BarChart(), Histogram, and Plot().
## Enter: news(package="lessR"), or ?Chart, ?X, or ?XY
## There is also Flows() for Sankey flow diagrams, see ?Flows
##
## Interactive data analysis for constructing visualizations.
## Enter: interact()
library(labelled)
## Warning: package 'labelled' was built under R version 4.5.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lessR':
##
## order_by, recode, rename
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(writexl)
## Warning: package 'writexl' was built under R version 4.5.3
# ==============================================================================
# BƯỚC 1: MÃ HÓA TOÀN BỘ DỮ LIỆU TRONG 1 LỆNH MUTATE DUY NHẤT
# ==============================================================================
df <- df %>%
mutate(
# --- 1. NHÓM GIỚI TÍNH & NHÂN KHẨU HỌC ---
across(any_of("Gender"), ~ factor(., levels = c(0, 1), labels = c("Male", "Female"))),
across(any_of("DS4"), ~ factor(., levels = c(1, 2), labels = c("Kinh", "Other"))),
across(any_of("DS5"), ~ factor(., levels = c(0, 1, 2), labels = c("Hue City", "Urban", "Rural"))),
across(any_of("DS6"), ~ factor(., levels = c(0, 1, 2, 3), labels = c("Parents house", "Relatives house", "Rented house", "Dormitory"))),
across(any_of("DS7"), ~ factor(., levels = c(0:5), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Other"))),
across(any_of("DS8"), ~ factor(., levels = c(0:6), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Housewife", "Other"))),
across(any_of(c("DS9", "DS10")), ~ factor(., levels = c(0:4), labels = c("Primary", "Secondary", "High school", "Vocational diploma", "College/University/Postgraduate"))),
# --- 2. NHÓM KHỔNG LỒ YES/NO (0 = No, 1 = Yes) ---
across(
c(any_of(c("B1", "B2", "B3", "B4", "B5", "C3", "C4", "C5", "D3", "D14A", "D14B", "D14C", "D15A", "D15B", "D17", "E9A", "E9B", "E9C", "E9D",
"FA6K_R", "FA6K_L", "FA6E_R", "FA6E_L", "FA6F_R", "FA6F_L", "FA6T_R", "FA6T_L", "HFSKs", "SCR1",
"RLS1", "RLS2", "RLS3", "MB1", "MB2", "MB3", "MB4", "MB5", "MB6", "MB7", "MB8", "MB9", "MB10", "JH1", "JH2", "JH3", "JH4", "JH5",
"TS1.1", "TS1.2", "TS1.3", "TS1.4", "TS1.5", "TS1.6", "TS1.7", "TS1.8", "TS2.1", "TS2.2", "TS2.3", "TS2.4", "TS2.5", "TS2.6", "TS2.7", "TS2.8", "TS3", "TS4",
"PB7A", "PB7B", "PB7C", "PB7D", "PB7E", "PB7F", "PB8A", "PB8B", "PB8C", "PB8D", "BE8",
"A1", "A41", "A42", "A43", "A44", "A5", "A71", "A72", "A73", "A74", "A8", "A81", "A82", "A83", "A9", "A91", "A92", "A93",
"A10", "A101", "A102", "A103", "A11", "A111", "A112", "A113", "A12", "A121", "A122", "A123", "A13", "A131", "A132", "A133", "A14", "A141", "A142", "A143",
"HV3", "HV6", "HV7", "HV8", "KT7", "TT31", "TT34")),
matches("^DS11|^DS12|^TT2|^HV11|^HV12|^HV13|^KT6|^KT9|^KT14|^KT15|^KT16")),
~ factor(., levels = c(0, 1), labels = c("No", "Yes"))
),
# --- 3. NHÓM YES/NO/DON'T KNOW (0 = No, 1 = Yes, 2 = Don't know) ---
across(
any_of(c("PSQ1", "PSQ2", "PSQ3", "PSQ4", "PSQ5", "PSQ6A", "PSQ6B", "PSQ7", "PSQ8", "PSQ9", "PSQ10", "PSQ11", "PSQ12", "PSQ13", "PSQ14", "PSQ15", "PSQ16", "PSQ17", "PSQ18", "PSQ19", "PSQ20", "PSQ21", "PSQ22",
"BE1", "BE4", "BE10", "KT8", "KT11", "KT12")),
~ factor(., levels = c(0, 1, 2), labels = c("No", "Yes", "Don't know"))
),
# --- 4. NHÓM SỨC KHỎE VÀ THÓI QUEN (HV, TT, OBC/VM) ---
across(any_of("TT1"), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Every 1-2 years", "Only when sick"))),
across(any_of("HV1"), ~ factor(., levels = c(0:3), labels = c("Irregularly", "Once", "Twice", "At least 3 times"))),
across(any_of("HV2"), ~ factor(., levels = c(0:3), labels = c("Less than 1 minute", "1-3 minutes", "More than 3 minutes", "Until it feels clean"))),
across(any_of("HV4"), ~ factor(., levels = c(0:2), labels = c("Soft bristles", "Hard bristles", "Any type"))),
across(any_of("HV5"), ~ factor(., levels = c(0:3), labels = c("Every 3-6 months", "When bristles wear out", "When the handle breaks", "When new designs are available"))),
across(any_of("HV9"), ~ factor(., levels = c(0:2), labels = c("No", "Yes", "Don't remember"))),
across(any_of("HV10"), ~ factor(., levels = c(0:3), labels = c("Under 6 months ago", "6-12 months ago", "1-2 years ago", "Over 2 years ago"))),
across(any_of("HV14"), ~ factor(., levels = c(0:3), labels = c("Once per day", "More than once per day", "1-2 times per week", "Never"))),
across(any_of("HV15"), ~ factor(., levels = c(0:2), labels = c("Never", "Yes", "Used to, but quit"))),
across(any_of("HV16"), ~ factor(., levels = c(0:2), labels = c("Occasionally", "1-5 cigarettes/day", "More than 5/day"))),
across(any_of("HV17"), ~ factor(., levels = c(0:3), labels = c("Never", "Occasionally", "Weekly", "Daily"))),
# VM1-VM2 (Tần suất ban đêm)
across(any_of(c("VM1", "VM2")), ~ factor(., levels = c(0:4), labels = c("None", "<1 Night/month", "1-3 Nights/month", "1-3 Nights/week", "4-7 Nights/week"))),
# VM3 (Tần suất ban ngày)
across(any_of("VM3"), ~ factor(., levels = c(0:4), labels = c("None", "Rarely", "Occasionally", "Frequently", "Always"))),
# VM4-VM21 (Tần suất hành vi ban ngày)
across(any_of(paste0("VM", 4:21)), ~ factor(., levels = c(0:4), labels = c("None of time", "A little of the time", "Some of the time", "Most of the time", "All of the time"))),
# --- 5. NHÓM KIẾN THỨC (KT) ---
across(any_of("KT1"), ~ factor(., levels = c(0:4), labels = c("Healthy gums", "Dental infection", "Calcium deficiency", "Gingivitis", "Don't know"))),
across(any_of("KT2"), ~ factor(., levels = c(0:4), labels = c("Regular brushing/flossing", "Occasionally", "Vitamin C supplements", "Eating soft food", "Don't know"))),
across(any_of(c("KT3", "KT4")), ~ factor(., levels = c(0:3), labels = c("Soft deposits on teeth", "Tooth discoloration", "Hard deposits", "Don't know"))),
across(any_of("KT5"), ~ factor(., levels = c(1:4), labels = c("Gingivitis", "Tooth discoloration", "Cavities", "Don't know"))),
across(any_of("KT10"), ~ factor(., levels = c(0:4), labels = c("Don't know", "1-3 months", "4-6 months", "7-12 months", "Over a year"))),
across(any_of("KT13"), ~ factor(., levels = c(0:3), labels = c("Don't know", "Every 6 months", "Once a year", "Every 2 years"))),
# --- 6. NHÓM GIẤC NGỦ VÀ ĐAU (ESS, PB, BE, A3) ---
across(any_of(paste0("ESS", 1:8)), ~ factor(., levels = c(0:3), labels = c("Would never doze", "Slight chance of dozing", "Moderate chance of dozing", "High chance of dozing"))),
across(any_of(c("PB9A", "PB9B", "PB9C", "PB9D", "PB9E", "PB9F", "PB9G", "PB9H", "PB9I", "PB10", "PB11", "PB12")),
~ factor(., levels = c(0:3), labels = c("Not during the past month", "Less than one a week", "One or twice a week", "Three or more times a week"))),
across(any_of(c("BE3", "BE6", "BE7", "BE9")), ~ factor(., levels = c(0:4), labels = c("Nearly every day", "3-4 times a week", "1-2 times a week", "1-2 times a month", "Never or nearly never"))),
across(any_of("PB13"), ~ factor(., levels = c(0:3), labels = c("No problem at all", "Only a very slight problem", "Somewhat of a problem", "A very big problem"))),
across(any_of("PB14"), ~ factor(., levels = c(0:3), labels = c("Very good", "Fairly good", "Fairly bad", "Very bad"))),
across(any_of("PB15"), ~ factor(., levels = c(0:3), labels = c("Never", "Occasionally", "Often", "Always"))),
across(any_of("BE2"), ~ factor(., levels = c(0:3), labels = c("Slightly louder than breathing", "As loud as talking", "Louder than talking", "Very loud - can be heard in adjacent rooms"))),
across(any_of("BE5"), ~ factor(., levels = c(0:4), labels = c("Almost every day", "3-4 times per week", "1-2 times per week", "1-2 times per month", "Rarely or never"))),
across(any_of("A3"), ~ factor(., levels = c(0:2), labels = c("No pain", "Pain comes and goes", "Constant pain"))),
# --- 7. NHÓM LÂM SÀNG NHA KHOA (Clinical: Răng, Mảng bám, Nướu, Túi nha chu) ---
# Răng cơ bản (A11-A48) hoặc 160 bề mặt răng
across(
matches("^A[1-4][1-8]$|.*_Occlusal_Incisal$|.*_Occlisal_Incisal$|.*_Buccal$|.*_Lingua$|.*_Mesia$|.*_Distal$"),
~ factor(., levels = c(0:9), labels = c("Normal", "Tooth decay", "Filled, with decay", "Filled, no decay", "Missing due to decay", "Missing for another reason", "Fissure sealant", "Fix dental prosthesis/crown, abutment, veneer", "Unerupted", "Not recorded"))
),
# Plaque Index (PI) & Calculus (CI)
across(
starts_with("PI") | starts_with("CI") | matches(".*_Disto$|.*_Buc$|.*_Mesio$|.*_Lin$"), # Bao hàm cả mảng bám và nướu
~ factor(., levels = c(0:3, 88, 888, 99, 999), labels = c("Score 0", "Score 1", "Score 2", "Score 3", "Not recorded", "Not recorded", "Missing tooth", "Missing tooth"))
),
# CPI
across(
matches("_Bleed.*|_Periop.*"),
~ factor(., levels = c(0:4, 88), labels = c("Healthy", "Bleeding observed", "Calculus detected", "Pocket 4-5mm", "Pocket 6mm+", "Not recorded"))
)
)
# ==============================================================================
# BƯỚC 2: GẮN NHÃN MÔ TẢ (LABELS) TĨNH CHO CÁC BIẾN CỐ ĐỊNH
# ==============================================================================
# Lọc chỉ giữ lại các nhãn cho những biến THỰC SỰ TỒN TẠI trong dataframe hiện tại
label_list <- list(
Gender = "Gender", DS3 = "Age", DS4 = "Ethnicity", DS5 = "Where are you from", DS6 = "Where do you currently live?",
DS7 = "Father's occupation", DS8 = "Mother's occupation", DS9 = "Father's education level", DS10 = "Mother's education level",
TT1 = "Do you regularly get health check-ups?", TT31 = "In the past 6 months, have you used any type of medication?",
TT34 = "Did you buy the medicine yourself or take it as prescribed by a doctor?",
TT2A="Systemic diseases: None", TT2B="Systemic diseases: Asthma", TT2C="Systemic diseases: Allergy", TT2D="Systemic diseases: Diabetes",
TT2E="Systemic diseases: Digestive disorders", TT2F="Systemic diseases: Kidney disease", TT2G="Systemic diseases: Liver disease",
TT2H="Systemic diseases: Cardiovascular disease", TT2I="Systemic diseases: Hypertension", TT2K="Systemic diseases: Blood disorders",
TT2L="Systemic diseases: Thyroid disease", TT2M="Systemic diseases: Cancer", TT2O="Systemic diseases: Other",
TT2P="Systemic diseases: Low blood pressure/Osteoporosis", TT2Q="Systemic diseases: Low blood pressure/Osteoporosis",
HV1 = "How many times a day do you brush your teeth?", HV2 = "How long do you brush each time?", HV3 = "Do you use toothpaste when brushing?",
HV4 = "What kind of toothbrush do you use?", HV5 = "When do you replace your toothbrush?", HV6 = "Do you use toothpicks?",
HV7 = "Do you use dental floss?", HV8 = "Do you use mouthwash?", HV9 = "Have you ever visited a dentist?", HV10 = "When was your last dentist visit?",
HV14 = "How often do you eat sweets?", HV15 = "Do you smoke?", HV16 = "If you currently smoke, how do you smoke?", HV17 = "Do you drink alcohol?",
KT1 = "What does bleeding gums mean to you?", KT2 = "How can gingivitis be prevented?", KT3 = "What is dental plaque?", KT4 = "What is tartar (calculus)?",
KT5 = "What can plaque lead to?", KT7 = "Do you think cavities affect a person's appearance?", KT8 = "Does oral health affect overall health?",
KT10 = "How often should you replace your toothbrush?", KT11 = "Does fluoride help strengthen teeth?", KT12 = "Does flossing help prevent gum disease?", KT13 = "How often should you have dental check-ups?"
)
# Cập nhật nhãn an toàn (tránh lỗi Can't find variable)
var_label(df) <- label_list[names(label_list) %in% names(df)]
# ==============================================================================
# BƯỚC 3: VÒNG LẶP GẮN NHÃN TỰ ĐỘNG CHO HÀNG TRĂM BIẾN (MULTIPLE CHOICES & CLINICAL)
# ==============================================================================
for (col in names(df)) {
# Tự động hóa Multiple Choices
if (grepl("^DS11", col)) var_label(df[[col]]) <- "Does your family have the following items?"
if (grepl("^DS12", col)) var_label(df[[col]]) <- "In your family, does anyone have the following items?"
if (grepl("^HV11", col)) var_label(df[[col]]) <- "Why did you visit the dentist?"
if (grepl("^HV12", col)) var_label(df[[col]]) <- "Why haven't you seen a dentist in the past 3 years?"
if (grepl("^HV13", col)) var_label(df[[col]]) <- "Which of the following foods do you often eat?"
if (grepl("^KT6", col)) var_label(df[[col]]) <- "What causes cavities?"
if (grepl("^KT9", col)) var_label(df[[col]]) <- "Why do we brush our teeth?"
if (grepl("^KT14", col)) var_label(df[[col]]) <- "What is the best way to prevent cavities?"
if (grepl("^KT15", col)) var_label(df[[col]]) <- "Which foods are beneficial for oral health?"
if (grepl("^KT16", col)) var_label(df[[col]]) <- "What are the harms of smoking?"
# Tự động hóa các mặt răng (Plaque, Gingival, Clinical Surfaces)
if (grepl("_Occl[u|i]sal_Incisal$|_Buccal$|_Lingua$|_Mesia$|_Distal$|_Disto$|_Buc$|_Mesio$|_Lin$|_Bleed.*|_Periop.*", col)) {
new_label <- col %>%
gsub("_Occl[u|i]sal_Incisal", " Occlusal/Incisal Surface", .) %>%
gsub("_Buccal|_Buc", " Buccal Surface", .) %>%
gsub("_Lingua|_Lin", " Lingual Surface", .) %>%
gsub("_Mesia|_Mesio", " Mesial Surface", .) %>%
gsub("_Distal|_Disto", " Distal Surface", .) %>%
gsub("_Bleed.*", " Bleeding (CPI)", .) %>%
gsub("_Periop.*", " Periodontal Pocket (CPI)", .) %>%
paste("Tooth", .)
var_label(df[[col]]) <- new_label
}
}
# Hoàn tất! Bảng 'df' của bạn giờ đã sạch sẽ và sẵn sàng để phân tích.
# Bạn có thể xuất file Excel bằng lệnh sau nếu muốn:
# library(writexl)
# write_xlsx(df, "Master_Cleaned_Data.xlsx")
# Tạo một bảng copy tạm thời để đổi tên tiêu đề
df_export <- df %>%
# Lệnh này biến toàn bộ các "Nhãn dài" thành tên cột thực sự
setNames(var_label(., unlist = TRUE))
# Sau đó xuất cái bảng tạm này ra Excel
write_xlsx(df_export, "D:\\TAM DAN - NON ORTHO (NEW)\\15\\15. Non Ortho_SURVEY QUESTIONNAIRE OF ORAL HYGIENE.xlsx")