# Library yang dibutuhkan
library(e1071)
library(ggplot2)
# Membuat dataset dengan hubungan non-linear yang lebih kuat
set.seed(42)
n <- 50
tanah_kelembapan <- runif(n, min=30, max=80)
tanah_ph <- runif(n, min=4.5, max=8.5)
curah_hujan <- runif(n, min=500, max=1500)
suhu_rata2 <- runif(n, min=15, max=35)
# Membuat hubungan non-linear yang lebih kuat antara suhu dan hasil panen
hasil_panen <- 2 + 0.1*tanah_kelembapan + 0.5*tanah_ph + 0.002*curah_hujan +
0.2*sin(0.5*suhu_rata2) + 0.1*(suhu_rata2-25)^2 + rnorm(n, sd=1.5)
# Membuat dataframe
harvest_data <- data.frame(
Kelembapan = tanah_kelembapan,
PH = tanah_ph,
CurahHujan = curah_hujan,
Suhu = suhu_rata2,
Panen = hasil_panen
)
# Membersihkan data
harvest_clean <- na.omit(harvest_data)
# Menampilkan dataset
head(harvest_clean, 50) # Menampilkan dataset
## Kelembapan PH CurahHujan Suhu Panen
## 1 75.74030 5.833709 1126.2453 29.38757 18.639711
## 2 76.85377 5.886993 717.1577 21.48172 16.674599
## 3 44.30698 6.093942 716.5673 30.57619 12.596908
## 4 71.52238 7.638771 888.9450 22.88882 17.787754
## 5 62.08728 4.655746 1442.4557 28.57186 13.894959
## 6 55.95480 7.495182 1462.6080 30.51650 17.556677
## 7 66.82942 7.209107 1239.8553 18.75738 18.040066
## 8 36.73333 5.185057 1233.2459 15.58172 19.618837
## 9 62.84961 5.544352 1035.7613 17.71428 18.826656
## 10 65.25324 6.557652 502.2730 28.60328 14.483103
## 11 52.88709 7.202429 1108.9375 33.69646 20.451251
## 12 65.95561 8.431269 1336.8016 26.00988 15.833823
## 13 76.73361 7.538177 1251.5226 27.03532 15.794430
## 14 42.77144 6.765954 952.7316 18.93989 14.472722
## 15 53.11464 7.898759 1035.7900 25.70473 10.946862
## 16 77.00073 5.257896 1037.3767 18.59111 17.963427
## 17 78.91132 5.585146 501.3808 24.03773 12.905976
## 18 35.87437 7.812634 855.6660 21.34107 16.407174
## 19 53.74985 7.272819 1112.1331 17.32349 17.223585
## 20 58.01664 5.462179 1328.9421 18.72204 17.350539
## 21 75.20157 4.671955 856.7220 29.59460 13.598160
## 22 36.93551 5.061916 910.6351 23.23744 7.988378
## 23 79.44459 5.365542 1073.4759 23.28099 15.096904
## 24 77.33341 6.417594 1089.6783 24.60620 13.589996
## 25 34.12188 5.289641 1219.6573 23.54989 10.561600
## 26 55.71059 7.377423 894.9730 17.72981 17.799115
## 27 49.51017 4.531539 1419.2039 31.49359 15.343592
## 28 75.28691 6.001960 1462.5703 26.84608 12.909731
## 29 52.34848 6.557631 733.5235 30.88794 13.662559
## 30 71.80021 4.506282 1224.4976 30.38065 17.145536
## 31 66.87978 6.826416 1403.6345 33.36113 22.585462
## 32 70.55276 5.131621 1103.4741 32.25260 18.267485
## 33 49.40541 5.936113 1131.5073 21.33950 13.322152
## 34 64.25849 7.082528 1437.3858 20.18521 18.720581
## 35 30.19742 7.603293 1350.4828 29.84533 16.171266
## 36 71.64580 6.754587 1079.8209 29.94722 15.637367
## 37 30.36671 5.434814 1321.4039 33.35808 17.041558
## 38 40.38295 4.859922 613.7186 30.86382 14.990896
## 39 75.33007 4.842448 1264.5078 17.66659 19.268057
## 40 60.58893 5.720873 1123.6135 20.75500 14.726860
## 41 48.97796 7.169706 648.4466 18.89352 15.374891
## 42 51.78858 4.500956 580.2645 30.68219 12.558811
## 43 31.87155 5.334280 964.0696 17.57744 14.743649
## 44 78.67700 8.232137 1279.3682 17.58179 22.119783
## 45 51.58756 8.202579 1233.5280 16.44506 20.611565
## 46 77.87883 7.436377 1317.2304 16.06259 25.995206
## 47 74.38775 5.832288 670.1625 25.63749 13.064334
## 48 61.99894 6.560253 1444.7203 17.24616 19.875611
## 49 78.54833 7.475899 793.6238 29.86375 18.731033
## 50 60.94191 6.976637 649.0721 29.62631 13.592393
# Pembagian data training dan testing
set.seed(42)
index <- sample(1:nrow(harvest_clean), 0.7 * nrow(harvest_clean))
train_data <- harvest_clean[index, ]
test_data <- harvest_clean[-index, ]
# Model Regresi Linear
lm_model <- lm(Panen ~ Suhu, data = train_data)
lm_pred <- predict(lm_model, newdata = test_data)
# Model SVR dengan parameter yang dioptimalkan
svr_model <- svm(Panen ~ Suhu, data = train_data,
type = "eps-regression",
kernel = "radial",
cost = 10,
gamma = 0.1,
epsilon = 0.1)
svr_pred <- predict(svr_model, newdata = test_data)
# Mengurutkan data untuk visualisasi yang lebih baik
test_data <- test_data[order(test_data$Suhu), ]
# Visualisasi Hasil
ggplot() +
geom_point(aes(x = test_data$Suhu, y = test_data$Panen), color = "black", size = 2) +
geom_line(aes(x = test_data$Suhu, y = predict(lm_model, newdata = test_data)),
color = "blue", linetype = "dashed", linewidth = 1) +
geom_line(aes(x = test_data$Suhu, y = predict(svr_model, newdata = test_data)),
color = "red", linetype = "solid", linewidth = 1) +
labs(title = "Perbandingan SVR vs Regresi Linear (MSE SVR lebih rendah)",
x = "Suhu Rata-rata (°C)",
y = "Hasil Panen (ton/hektar)") +
theme_minimal()

# Menghitung MSE
lm_mse <- mean((lm_pred - test_data$Panen)^2)
svr_mse <- mean((svr_pred - test_data$Panen)^2)
# Print hasil MSE
print(paste("MSE Regresi Linear:", round(lm_mse, 2)))
## [1] "MSE Regresi Linear: 11.74"
print(paste("MSE SVR:", round(svr_mse, 2)))
## [1] "MSE SVR: 10.89"
# Interpretasi Hasil
cat("\nParameter SVR yang digunakan:
- Kernel: radial (RBF)
- Cost: 10
- Gamma: 0.1
- Epsilon: 0.1\n")
##
## Parameter SVR yang digunakan:
## - Kernel: radial (RBF)
## - Cost: 10
## - Gamma: 0.1
## - Epsilon: 0.1