This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.
library(caTools) library(ROCR)
data <- read.csv(“airline_ticket_prices_dataset.csv”)
data\(Airline <- as.factor(data\)Airline) data\(Origin <- as.factor(data\)Origin) data\(Destination <- as.factor(data\)Destination) data\(Class <- as.factor(data\)Class)
median_price <- median(data$Price_USD)
data\(HighPrice <- ifelse(data\)Price_USD > median_price, 1, 0)
set.seed(923) # βάλε τα δικά σου τελευταία 2 ψηφία
split <- sample.split(data$HighPrice, SplitRatio = 0.65)
train <- subset(data, split == TRUE) test <- subset(data, split == FALSE)
cat(“Train size:”, nrow(train), “”) cat(“Test size:”, nrow(test), “”)
model <- glm(HighPrice ~ Airline + Origin + Destination + Distance_km + Class + Days_Before_Departure, data = train, family = binomial)
summary(model)
predictTest <- predict(model, newdata = test, type = “response”)
head(predictTest)
predictClass <- ifelse(predictTest > 0.5, 1, 0)
cm <- table(Predicted = predictClass, Actual = test$HighPrice) print(cm)
accuracy <- (cm[1,1] + cm[2,2]) / sum(cm)
sensitivity <- cm[2,2] / (cm[2,2] + cm[1,2])
specificity <- cm[1,1] / (cm[1,1] + cm[2,1])
cat(“Accuracy:”, accuracy, “”) cat(“Sensitivity:”, sensitivity, “”) cat(“Specificity:”, specificity, “”)
baseline <- max(prop.table(table(test$HighPrice)))
cat(“Baseline Accuracy:”, baseline, “”)
ROCRpred <- prediction(predictTest, test$HighPrice)
ROCRperf <- performance(ROCRpred, “tpr”, “fpr”)
plot(ROCRperf, colorize = TRUE, main=“ROC Curve”)
auc <- performance(ROCRpred, “auc”) cat(“AUC:”, auc@y.values[[1]], “”)
data2 <- na.omit(data)
split2 <- sample.split(data2$HighPrice, SplitRatio = 0.65)
train2 <- subset(data2, split2 == TRUE) test2 <- subset(data2, split2 == FALSE)
cat(“Train2 size:”, nrow(train2), “”) cat(“Test2 size:”, nrow(test2), “”)