This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
df = read.csv2("D:\\TAM DAN NON-ORTHO\\15. Non Ortho_SURVEY QUESTIONNAIRE OF ORAL HYGIENE\\15. Non Ortho_SURVEY QUESTIONNAIRE OF ORAL HYGIENE.csv")
library(lessR)
## Warning: package 'lessR' was built under R version 4.5.2
##
## lessR 4.5 feedback: gerbing@pdx.edu
## --------------------------------------------------------------
## > d <- Read("") Read data file, many formats available, e.g., Excel
## d is the default data frame, data= in analysis routines optional
##
## Many examples of reading, writing, and manipulating data, graphics,
## testing means and proportions, regression, factor analysis,
## customization, forecasting, and aggregation to pivot tables.
## Enter: browseVignettes("lessR")
##
## View lessR updates, now including modern time series forecasting
## and many, new Plotly interactive visualizations output. Most
## visualization functions are now reorganized to three functions:
## Chart(): type="bar", "pie", "radar", "bubble", "treemap", "icicle"
## X(): type="histogram", "density", "vbs" and more
## XY(): type="scatter" for a scatterplot, or "contour", "smooth"
## Most previous function calls still work, such as:
## BarChart(), Histogram, and Plot().
## Enter: news(package="lessR"), or ?Chart, ?X, or ?XY
## There is also Flows() for Sankey flow diagrams, see ?Flows
##
## Interactive data analysis for constructing visualizations.
## Enter: interact()
library(labelled)
## Warning: package 'labelled' was built under R version 4.5.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:lessR':
##
## order_by, recode, rename
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(writexl)
## Warning: package 'writexl' was built under R version 4.5.3
# ==============================================================================
# BƯỚC 0: TẢI THƯ VIỆN CẦN THIẾT
# ==============================================================================
library(dplyr)
library(labelled)
# ==============================================================================
# BƯỚC 1: MÃ HÓA TOÀN BỘ DỮ LIỆU TRONG 1 LỆNH DUY NHẤT
# ==============================================================================
df <- df %>%
mutate(
# --- 1. NHÂN KHẨU HỌC (DS) ---
across(any_of("DS1"), ~ factor(., levels = c(1, 2), labels = c("Male", "Female"))),
across(any_of("DS4"), ~ factor(., levels = c(1, 2), labels = c("Kinh", "Other"))),
across(any_of("DS5"), ~ factor(., levels = c(0, 1, 2), labels = c("Hue City", "Urban", "Rural"))),
across(any_of("DS6"), ~ factor(., levels = c(0, 1, 2, 3), labels = c("Parents house", "Relatives house", "Rented house", "Dormitory"))),
across(any_of("DS7"), ~ factor(., levels = c(0:5), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Other"))),
across(any_of("DS8"), ~ factor(., levels = c(0:6), labels = c("Farmer", "Manual laborer", "Administrative staff", "Professional", "Business/trading", "Housewife", "Other"))),
across(any_of(c("DS9", "DS10")), ~ factor(., levels = c(0:4), labels = c("Primary", "Secondary", "High school", "Vocational diploma", "College/University/Postgraduate"))),
# --- 2. NHÓM YES/NO CHUẨN (0 = No, 1 = Yes) ---
across(
c(any_of(c("TT31", "HV3", "HV6", "HV7", "HV8", "KT7", "KT8")),
matches("^DS11|^DS12|^TT2|^HV11|^HV12|^HV13|^KT6|^KT9|^KT14|^KT15|^KT16")),
~ factor(., levels = c(0, 1), labels = c("No", "Yes"))
),
# --- 3. TÌNH TRẠNG SỨC KHỎE (TT) ---
across(any_of("TT1"), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Every 1-2 years", "Only when sick"))),
across(any_of("TT34"), ~ factor(., levels = c(0, 1), labels = c("By it yourself", "Doctor prescribed"))),
# --- 4. THÓI QUEN CHĂM SÓC (HV) ---
across(any_of("HV1"), ~ factor(., levels = c(0:3), labels = c("Irregularly", "One", "Twice", "At least 3 times"))),
across(any_of("HV2"), ~ factor(., levels = c(0:3), labels = c("Less than 1 minute", "1-3 minute", "More than 3 minute", "Until it feels clean"))),
across(any_of("HV4"), ~ factor(., levels = c(0:2), labels = c("Soft bristles", "Hard bristles", "Any type"))),
across(any_of("HV5"), ~ factor(., levels = c(0:3), labels = c("Every 3-6 months", "When bristles wear out", "When the handle breaks", "When new designs are available"))),
across(any_of("HV9"), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Yes", "Don't remember"))),
across(any_of("HV10"), ~ factor(., levels = c(0:3), labels = c("Under 6 months ago", "6-12 months ago", "1-2 years ago", "Over 2 years ago"))),
across(any_of("HV14"), ~ factor(., levels = c(0:3), labels = c("Once per day", "More than once per day", "1-2 times per week", "Never"))),
across(any_of("HV15"), ~ factor(., levels = c(0, 1, 2), labels = c("Never", "Yes", "Used to, but quit"))), # Khớp mã: 1:Yes, 2:Used to, 0:Never
across(any_of("HV16"), ~ factor(., levels = c(0:2), labels = c("Occasionally", "1-5 cigarettes/day", "More than 5/day"))),
across(any_of("HV17"), ~ factor(., levels = c(0:3), labels = c("Never", "Occasionally", "Weekly", "Daily"))),
# --- 5. KIẾN THỨC (KT) ---
across(any_of("KT1"), ~ factor(., levels = c(0:4), labels = c("Healthy gums", "Dental infection", "Calcium deficiency", "Gingivitis", "Don't know"))),
across(any_of("KT2"), ~ factor(., levels = c(0:4), labels = c("Regular brushing/flossing", "Occasionally", "Vitamin C supplements", "Eating soft food", "Don't know"))),
across(any_of(c("KT3", "KT4")), ~ factor(., levels = c(0:3), labels = c("Soft deposits on teeth", "Tooth discoloration", "Hard deposits", "Don't know"))),
across(any_of("KT5"), ~ factor(., levels = c(1:4), labels = c("Gingivitis", "Tooth discoloration", "Cavities", "Don't know"))),
across(any_of("KT10"), ~ factor(., levels = c(0:4), labels = c("Don't know", "1-3 months", "4-6 months", "7-12 months", "Over a year"))), # Khớp mã: 1,2,3,4,0
across(any_of(c("KT11", "KT12")), ~ factor(., levels = c(0, 1, 2), labels = c("No", "Yes", "Don't know"))),
across(any_of("KT13"), ~ factor(., levels = c(0:3), labels = c("Don't know", "Every 6 months", "Once a year", "Every 2 years")))
)
# ==============================================================================
# BƯỚC 2: GẮN NHÃN MÔ TẢ (LABELS) CHÍNH XÁC
# ==============================================================================
survey_labels <- list(
# Thông tin chung
DS1 = "Gender", DS2 = "Date of birth", DS3 = "Age", DS4 = "Ethnicity", DS5 = "Where are you from", DS6 = "Where do you currently live?",
DS7 = "Father's occupation", DS75 = "Father's occupation (Other text)", DS8 = "Mother's occupation", DS85 = "Mother's occupation (Other text)",
DS9 = "Father's education level", DS10 = "Mother's education level",
# Bệnh lý (TT)
TT1 = "Do you regularly get health check-ups?", TT31 = "In the past 6 months, have you used any type of medication?",
TT34 = "Did you buy the medicine yourself or take it as prescribed by a doctor?",
TT2A="Systemic diseases: None", TT2B="Systemic diseases: Asthma", TT2C="Systemic diseases: Allergy", TT2D="Systemic diseases: Diabetes",
TT2E="Systemic diseases: Digestive disorders", TT2F="Systemic diseases: Kidney disease", TT2G="Systemic diseases: Liver disease",
TT2H="Systemic diseases: Cardiovascular disease", TT2I="Systemic diseases: Hypertension", TT2J="Systemic diseases: Low blood pressure",
TT2K="Systemic diseases: Blood disorders", TT2L="Systemic diseases: Thyroid disease", TT2M="Systemic diseases: Cancer",
TT2N="Systemic diseases: Osteoporosis", TT2O="Systemic diseases: Other",
# Hành vi (HV)
HV1 = "How many times a day do you brush your teeth?", HV2 = "How long do you brush each time?", HV3 = "Do you use toothpaste when brushing?",
HV4 = "What kind of toothbrush do you use?", HV5 = "When do you replace your toothbrush?", HV6 = "Do you use toothpicks?",
HV7 = "Do you use dental floss?", HV8 = "Do you use mouthwash?", HV9 = "Have you ever visited a dentist?", HV10 = "When was your last dentist visit?",
HV14 = "How often do you eat sweets?", HV15 = "Do you smoke?", HV16 = "If you currently smoke, how do you smoke?", HV17 = "Do you drink alcohol?",
# Kiến thức (KT)
KT1 = "What does bleeding gums mean to you?", KT2 = "How can gingivitis be prevented?", KT3 = "What is dental plaque?", KT4 = "What is tartar (calculus)?",
KT5 = "What can plaque lead to?", KT7 = "Do you think cavities affect a person's appearance?", KT8 = "Does oral health affect overall health?",
KT10 = "How often should you replace your toothbrush?", KT11 = "Does fluoride help strengthen teeth?", KT12 = "Does flossing help prevent gum disease?",
KT13 = "How often should you have dental check-ups to prevent problems?"
)
# Cập nhật nhãn an toàn
var_label(df) <- survey_labels[names(survey_labels) %in% names(df)]
# ==============================================================================
# BƯỚC 3: GẮN NHÃN TỰ ĐỘNG CHO CÁC NHÓM BIẾN CHỌN NHIỀU ĐÁP ÁN (MULTIPLE CHOICES)
# ==============================================================================
for (col in names(df)) {
if (grepl("^DS11", col)) var_label(df[[col]]) <- "Does your family have the following items?"
if (grepl("^DS12", col)) var_label(df[[col]]) <- "In your family, does anyone have the following items?"
if (grepl("^HV11", col)) var_label(df[[col]]) <- "Why did you visit the dentist?"
if (grepl("^HV12", col)) var_label(df[[col]]) <- "Why haven't you seen a dentist in the past 2 years?"
if (grepl("^HV13", col)) var_label(df[[col]]) <- "Which of the following foods do you often eat?"
if (grepl("^KT6", col)) var_label(df[[col]]) <- "What causes cavities?"
if (grepl("^KT9", col)) var_label(df[[col]]) <- "Why do we brush our teeth?"
if (grepl("^KT14", col)) var_label(df[[col]]) <- "What is the best way to prevent cavities?"
if (grepl("^KT15", col)) var_label(df[[col]]) <- "Which foods are beneficial for oral health?"
if (grepl("^KT16", col)) var_label(df[[col]]) <- "What are the harms of smoking?"
}
# Tạo một bảng copy tạm thời để đổi tên tiêu đề
df_export <- df %>%
# Lệnh này biến toàn bộ các "Nhãn dài" thành tên cột thực sự
setNames(var_label(., unlist = TRUE))
# Sau đó xuất cái bảng tạm này ra Excel
write_xlsx(df_export, "D:\\TAM DAN - NON ORTHO (NEW)\\15\\15. Non Ortho_SURVEY QUESTIONNAIRE OF ORAL HYGIENE.xlsx")