dự báo fage bằng ann

dữ liệu từ nghiên cứu tại Hà nội

PI: Le Nguyen

đọc dữ liệu vào R

fa3 <- read.csv("D:/fage3.csv", stringsAsFactors = TRUE)
attach(fa3)
names(fa3)

## [1] "age"      "level"    "mar"      "spending" "life"     "fage"     "inc"

Chuẩn hóa dữ liệu

maxs <- apply(fa3, 2, max)
mins <- apply(fa3, 2, min)
data_scaled <- as.data.frame(scale(fa3, center = mins, scale = maxs - mins))

Tách dữ liệu thành 70% huấn luyện và 30% kiểm tra

set.seed(123)
index <- sample(1:nrow(data_scaled), round(0.7 * nrow(data_scaled)))
train_data <- data_scaled[index, ]  # Tập huấn luyện
test_data <- data_scaled[-index, ]  # Tập kiểm tra

thiết lập mô hình ann dự báo fage

library(neuralnet)
ann_model <- neuralnet(fage ~ age+level+mar+spending+life+inc, data = train_data,hidden = c(10, 5),linear.output = TRUE)

Dự đoán với mô hình từ gói nnet

predicted_nnet <- predict(ann_model, test_data[, c("age", "level", "mar","spending","life","inc")])

chuyển đổi về giá trị ban đầu trên tập kiểm thử

predicted_original <- predicted_nnet * (maxs["fage"] - mins["fage"]) + mins["fage"]
actual_original <- test_data$fage * (maxs["fage"] - mins["fage"]) + mins["fage"]
mse <- mean((actual_original - predicted_original)^2)
# Chuyển đổi giá trị thực về khoảng ban đầu
actual_original <- test_data$fage * (maxs["fage"] - mins["fage"]) + mins["fage"]

Tính MAE

mae <- mean(abs(actual_original - predicted_original))
cat("MAE:", mae, "\n")

## MAE: 3.645421

Vẽ biểu đồ so sánh

plot(actual_original, type = "l", col = "blue", lwd = 2,ylab = "Giá trị fage", xlab = "mẫu kiểm thử",main = "So sánh Giá trị Thực và Dự đoán")
lines(predicted_original, col = "red", lwd = 2)
legend("topright", legend = c("Thực tế", "Dự đoán"),col = c("blue", "red"), lty = 1, lwd = 2)

# Vẽ biểu đồ tán xạ

plot(actual_original, predicted_original,xlab = "Giá trị thực của fage",ylab = "Giá trị dự đoán của fage",main = "Biểu đồ tán xạ: Thực tế vs. Dự đoán",pch = 16, col = "darkgreen")
# Thêm đường chéo y = x để tham chiếu
abline(0, 1, col = "red", lwd = 2, lty = 2)

# Tính sai số tuyệt đối

absolute_error <- abs(actual_original - predicted_original)
# Tạo bảng so sánh đầy đủ
comparison_table <- data.frame(Gia_tri_thuc = round(actual_original, 2),     Gia_tri_du_doan = round(predicted_original, 2),Sai_so_tuyet_doi = round(absolute_error, 2))
# Hiển thị 10 dòng đầu tiên
head(comparison_table, 10)

##    Gia_tri_thuc Gia_tri_du_doan Sai_so_tuyet_doi
## 1          25.2           25.43             0.23
## 2          22.8           20.45             2.35
## 3          24.7           29.08             4.38
## 10         22.8           24.49             1.69
## 11         23.9           27.59             3.69
## 19         24.5           24.76             0.26
## 20         68.4           71.71             3.31
## 24         25.1           22.86             2.24
## 28         69.9           67.44             2.46
## 35         34.0           36.03             2.03

# ghi lại kết quả vi file excel
write.csv(comparison_table, "so_sanh_thuc_du_doan_day_du.csv", row.names = FALSE)
tong_sai_so <- sum(absolute_error)
cat("Tổng sai số tuyệt đối:", round(tong_sai_so, 2), "\n")

## Tổng sai số tuyệt đối: 109.36

# vẽ phân bố sai số tuyệt đối
hist(absolute_error,breaks = 20,main = "Phân bố Sai số Tuyệt đối",xlab = "Sai số Tuyệt đối",col = "skyblue",border = "white")

# Xác định các điểm có sai số lớn hơn ngưỡng
nguong <- 5
mau_diem <- ifelse(absolute_error > nguong, "red", "darkgreen")

Vẽ biểu đồ tán xạ với màu khác nhau

plot(actual_original, predicted_original,xlab = "Giá trị thực của fage",ylab = "Giá trị dự đoán của fage",main = "Biểu đồ tán xạ: Sai số lớn được tô màu đỏ",pch = 16, col = mau_diem)
# Thêm đường tham chiếu y = x
abline(0, 1, col = "blue", lwd = 2, lty = 2)
# Thêm chú thích
legend("topleft", legend = c("Sai số > 5", "Sai số ≤ 5"),col = c("red", "darkgreen"), pch = 16)

# tính các chỉ số đánh giá mô hình trên tập kiểm thử

# Tính MAE trên tập kiểm thử
mae_test <- mean(abs(actual_original - predicted_original))
cat("MAE trên tập kiểm thử:", round(mae_test, 2), "\n")

## MAE trên tập kiểm thử: 3.65

# Tính RMSE trên tập kiểm thử
rmse_test <- sqrt(mean((actual_original - predicted_original)^2))
cat("RMSE trên tập kiểm thử:", round(rmse_test, 2), "\n")

## RMSE trên tập kiểm thử: 5.07

# Tính R² trên tập kiểm thử
ss_res <- sum((actual_original - predicted_original)^2)
ss_tot <- sum((actual_original - mean(actual_original))^2)
r_squared <- 1 - (ss_res / ss_tot)
cat("R² (R-squared):", round(r_squared, 4), "\n")

## R² (R-squared): 0.9265

ĐÁNH GIÁ ĐỘ CHÍNH XÁC TRÊN TẬP HỌC

# Chuyển giá trị thực và giá trị dự đoán về thang đo ban đầu
actual_train <- train_data$fage* (maxs["fage"] - mins["fage"]) + mins["fage"]
predicted_train <- predict(ann_model, train_data[, c("age", "level", "mar","spending","inc","life")]) * (maxs["fage"] - mins["fage"]) + mins["fage"]
# Tính MAE trên tập huấn luyện
mae_train <- mean(abs(actual_train - predicted_train))
cat("MAE trên tập huấn luyện:", round(mae_train, 2), "\n")

## MAE trên tập huấn luyện: 4.31

# Tính RMSE trên tập huấn luyện
rmse_train <- sqrt(mean((actual_train - predicted_train)^2))
cat("RMSE trên tập huấn luyện:", round(rmse_train, 2), "\n")

## RMSE trên tập huấn luyện: 5.97

# Tính R² trên tập huấn luyện
ss_res_train <- sum((actual_train - predicted_train)^2)
ss_tot_train <- sum((actual_train - mean(actual_train))^2)
r_squared_train <- 1 - (ss_res_train / ss_tot_train)
cat("R² trên tập huấn luyện:", round(r_squared_train, 4), "\n")

## R² trên tập huấn luyện: 0.8737

Hiển thị mô hình ANN đã huấn luyện

plot(ann_model, rep = "best")

du bao fage bang ann

Lenguyen

2025-07-08

dự báo fage bằng ann

dữ liệu từ nghiên cứu tại Hà nội

PI: Le Nguyen

đọc dữ liệu vào R

Chuẩn hóa dữ liệu

Tách dữ liệu thành 70% huấn luyện và 30% kiểm tra

thiết lập mô hình ann dự báo fage

Dự đoán với mô hình từ gói nnet

chuyển đổi về giá trị ban đầu trên tập kiểm thử

Tính MAE

Vẽ biểu đồ so sánh

Vẽ biểu đồ tán xạ với màu khác nhau

ĐÁNH GIÁ ĐỘ CHÍNH XÁC TRÊN TẬP HỌC

Hiển thị mô hình ANN đã huấn luyện