# Βιβλιοθήκες που χρειάζονται
library(caTools)
library(ROCR)
data <- read.csv("creditcard.csv")
str(data)
## 'data.frame': 284807 obs. of 31 variables:
## $ Time : num 0 0 1 1 2 2 4 7 7 9 ...
## $ V1 : num -1.36 1.192 -1.358 -0.966 -1.158 ...
## $ V2 : num -0.0728 0.2662 -1.3402 -0.1852 0.8777 ...
## $ V3 : num 2.536 0.166 1.773 1.793 1.549 ...
## $ V4 : num 1.378 0.448 0.38 -0.863 0.403 ...
## $ V5 : num -0.3383 0.06 -0.5032 -0.0103 -0.4072 ...
## $ V6 : num 0.4624 -0.0824 1.8005 1.2472 0.0959 ...
## $ V7 : num 0.2396 -0.0788 0.7915 0.2376 0.5929 ...
## $ V8 : num 0.0987 0.0851 0.2477 0.3774 -0.2705 ...
## $ V9 : num 0.364 -0.255 -1.515 -1.387 0.818 ...
## $ V10 : num 0.0908 -0.167 0.2076 -0.055 0.7531 ...
## $ V11 : num -0.552 1.613 0.625 -0.226 -0.823 ...
## $ V12 : num -0.6178 1.0652 0.0661 0.1782 0.5382 ...
## $ V13 : num -0.991 0.489 0.717 0.508 1.346 ...
## $ V14 : num -0.311 -0.144 -0.166 -0.288 -1.12 ...
## $ V15 : num 1.468 0.636 2.346 -0.631 0.175 ...
## $ V16 : num -0.47 0.464 -2.89 -1.06 -0.451 ...
## $ V17 : num 0.208 -0.115 1.11 -0.684 -0.237 ...
## $ V18 : num 0.0258 -0.1834 -0.1214 1.9658 -0.0382 ...
## $ V19 : num 0.404 -0.146 -2.262 -1.233 0.803 ...
## $ V20 : num 0.2514 -0.0691 0.525 -0.208 0.4085 ...
## $ V21 : num -0.01831 -0.22578 0.248 -0.1083 -0.00943 ...
## $ V22 : num 0.27784 -0.63867 0.77168 0.00527 0.79828 ...
## $ V23 : num -0.11 0.101 0.909 -0.19 -0.137 ...
## $ V24 : num 0.0669 -0.3398 -0.6893 -1.1756 0.1413 ...
## $ V25 : num 0.129 0.167 -0.328 0.647 -0.206 ...
## $ V26 : num -0.189 0.126 -0.139 -0.222 0.502 ...
## $ V27 : num 0.13356 -0.00898 -0.05535 0.06272 0.21942 ...
## $ V28 : num -0.0211 0.0147 -0.0598 0.0615 0.2152 ...
## $ Amount: num 149.62 2.69 378.66 123.5 69.99 ...
## $ Class : int 0 0 0 0 0 0 0 0 0 0 ...
set.seed(924) # τελευταίο νούμερο email
split <- sample.split(data$Class, SplitRatio = 0.65)
train <- subset(data, split == TRUE)
test <- subset(data, split == FALSE)
cat("Train set έχει", nrow(train), "παρατηρήσεις\n")
## Train set έχει 185125 παρατηρήσεις
cat("Test set έχει", nrow(test), "παρατηρήσεις\n")
## Test set έχει 99682 παρατηρήσεις
model <- glm(Class ~ ., data = train, family = binomial)
summary(model)
##
## Call:
## glm(formula = Class ~ ., family = binomial, data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.223e+00 3.099e-01 -26.531 < 2e-16 ***
## Time -4.589e-06 2.693e-06 -1.704 0.08836 .
## V1 5.446e-02 5.209e-02 1.045 0.29584
## V2 6.410e-02 8.246e-02 0.777 0.43698
## V3 -2.466e-03 6.449e-02 -0.038 0.96950
## V4 7.360e-01 1.065e-01 6.914 4.73e-12 ***
## V5 1.522e-01 9.191e-02 1.656 0.09776 .
## V6 -9.357e-02 8.794e-02 -1.064 0.28732
## V7 -7.025e-02 8.971e-02 -0.783 0.43358
## V8 -1.613e-01 3.635e-02 -4.437 9.11e-06 ***
## V9 -2.204e-02 1.634e-01 -0.135 0.89267
## V10 -7.954e-01 1.410e-01 -5.641 1.69e-08 ***
## V11 3.916e-02 9.873e-02 0.397 0.69165
## V12 -4.638e-03 1.032e-01 -0.045 0.96414
## V13 -3.145e-01 9.909e-02 -3.174 0.00150 **
## V14 -5.056e-01 7.526e-02 -6.718 1.84e-11 ***
## V15 -2.501e-02 1.024e-01 -0.244 0.80699
## V16 2.894e-02 2.038e-01 0.142 0.88704
## V17 -5.994e-02 8.501e-02 -0.705 0.48076
## V18 -1.433e-01 1.959e-01 -0.731 0.46456
## V19 2.002e-01 1.359e-01 1.473 0.14081
## V20 -3.023e-01 1.081e-01 -2.797 0.00515 **
## V21 4.022e-01 7.977e-02 5.042 4.62e-07 ***
## V22 7.023e-01 1.701e-01 4.128 3.65e-05 ***
## V23 -8.175e-02 6.918e-02 -1.182 0.23734
## V24 5.913e-02 1.776e-01 0.333 0.73921
## V25 -1.039e-01 1.588e-01 -0.654 0.51299
## V26 -1.239e-01 2.336e-01 -0.531 0.59573
## V27 -7.789e-01 1.506e-01 -5.172 2.32e-07 ***
## V28 -3.537e-01 1.279e-01 -2.766 0.00567 **
## Amount 8.461e-04 4.873e-04 1.736 0.08254 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 4710.1 on 185124 degrees of freedom
## Residual deviance: 1532.2 on 185094 degrees of freedom
## AIC: 1594.2
##
## Number of Fisher Scoring iterations: 12
predictTest <- predict(model, newdata = test, type = "response")
summary(predictTest)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000000 0.0000852 0.0002162 0.0018112 0.0004890 1.0000000
predROCR <- prediction(predictTest, test$Class)
perfROCR <- performance(predROCR, "tpr", "fpr")
plot(perfROCR, col = "blue", main = "ROC Curve")
