Demo R code
library(tidyverse)
library(e1071)
# 本次競賽衡量標準 Rsquare
Rsq <- function(out){
if(identical(c("price","predict"), names(out))){
mn_price <- mean(out$price)
res <- 1 - sum((out$price - out$predict)^2)/sum((out$price - mn_price)^2)
res <- res * 100
res <- ifelse(res>0, res, 0)
sprintf("%.5f", res)
}else{
sprintf("%.5f", 0)
}
}
# 匯入檔案
train <- read_csv("trainset.csv")
test <- read_csv("testset.csv")
dat <- full_join(train, test)
# 將兩份檔案合併,再做標準化
dat <-
mutate(
dat,
stpowerPS = (powerPS-mean(powerPS))/diff(range(powerPS)),
stkilometer = (kilometer-mean(kilometer))/diff(range(kilometer)),
stad_exist_time = (ad_exist_time-mean(ad_exist_time))/diff(range(ad_exist_time)),
styears = (yearOfRegistration-mean(yearOfRegistration)/diff(range(yearOfRegistration)))
) %>%
select(
id, price, stpowerPS, stkilometer, stad_exist_time,
vehicleType, gearbox, fuelType, styears, notRepairedDamage, brand
) %>%
mutate(
vehicleType = as.factor(vehicleType),
gearbox = as.factor(gearbox),
fuelType = as.factor(fuelType),
notRepairedDamage = as.factor(notRepairedDamage),
brand = as.factor(brand) %>% relevel("sonstige_autos")
)
# 重新篩選出 trainset / testset
train <- dat %>% slice(1:200000)
test <- dat %>% slice(-(1:200000))
# 用兩千筆 sub tainset 建 svr 模型
set.seed(123)
train_sub <- sample_n(train, 2000)
fit_svr <- svm(price~., train_sub[,-1],
type="eps", cost=16, gamma=0.25, epsilon=0.1, kernel="radial")
# 檢查 train set 的 Rsq
out_train <- data.frame(price=train_sub$price, predict=predict(fit_svr))
Rsq(out_train) # 72.40977
# 匯入submit.csv 並且將預測結果置入 predict 欄位
submit <- read_csv("submit.csv")
submit$predict <- predict(fit_svr, test)
# 結果寫入 submit01.csv 並將其上傳
submit %>% write_csv("submit01.csv") # 1.53