This is logistic regression algorithm for the machine.csv dataset from the link https://archive.ics.uci.edu/ml/datasets/Computer+Hardware
hurl <- url("https://archive.ics.uci.edu/ml/machine-learning-databases/cpu-performance/machine.data")
hdataset = read.table(hurl,header = F,sep=",",col.names = c("vendor name","Model Name","MYCT","MMIN","MMAX","CACH","CHMIN","CHMAX","PRP","ERP"))
head(hdataset)
## vendor.name Model.Name MYCT MMIN MMAX CACH CHMIN CHMAX PRP ERP
## 1 adviser 32/60 125 256 6000 256 16 128 198 199
## 2 amdahl 470v/7 29 8000 32000 32 8 32 269 253
## 3 amdahl 470v/7a 29 8000 32000 32 8 32 220 253
## 4 amdahl 470v/7b 29 8000 32000 32 8 32 172 253
## 5 amdahl 470v/7c 29 8000 16000 32 8 16 132 132
## 6 amdahl 470v/b 26 8000 32000 64 8 32 318 290
hdataset = hdataset[3:10]
head(hdataset)
## MYCT MMIN MMAX CACH CHMIN CHMAX PRP ERP
## 1 125 256 6000 256 16 128 198 199
## 2 29 8000 32000 32 8 32 269 253
## 3 29 8000 32000 32 8 32 220 253
## 4 29 8000 32000 32 8 32 172 253
## 5 29 8000 16000 32 8 16 132 132
## 6 26 8000 32000 64 8 32 318 290
str(hdataset)
## 'data.frame': 209 obs. of 8 variables:
## $ MYCT : int 125 29 29 29 29 26 23 23 23 23 ...
## $ MMIN : int 256 8000 8000 8000 8000 8000 16000 16000 16000 32000 ...
## $ MMAX : int 6000 32000 32000 32000 16000 32000 32000 32000 64000 64000 ...
## $ CACH : int 256 32 32 32 32 64 64 64 64 128 ...
## $ CHMIN: int 16 8 8 8 8 8 16 16 16 32 ...
## $ CHMAX: int 128 32 32 32 16 32 32 32 32 64 ...
## $ PRP : int 198 269 220 172 132 318 367 489 636 1144 ...
## $ ERP : int 199 253 253 253 132 290 381 381 749 1238 ...
summary(hdataset)
## MYCT MMIN MMAX CACH
## Min. : 17.0 Min. : 64 Min. : 64 Min. : 0.00
## 1st Qu.: 50.0 1st Qu.: 768 1st Qu.: 4000 1st Qu.: 0.00
## Median : 110.0 Median : 2000 Median : 8000 Median : 8.00
## Mean : 203.8 Mean : 2868 Mean :11796 Mean : 25.21
## 3rd Qu.: 225.0 3rd Qu.: 4000 3rd Qu.:16000 3rd Qu.: 32.00
## Max. :1500.0 Max. :32000 Max. :64000 Max. :256.00
## CHMIN CHMAX PRP ERP
## Min. : 0.000 Min. : 0.00 Min. : 6.0 Min. : 15.00
## 1st Qu.: 1.000 1st Qu.: 5.00 1st Qu.: 27.0 1st Qu.: 28.00
## Median : 2.000 Median : 8.00 Median : 50.0 Median : 45.00
## Mean : 4.699 Mean : 18.27 Mean : 105.6 Mean : 99.33
## 3rd Qu.: 6.000 3rd Qu.: 24.00 3rd Qu.: 113.0 3rd Qu.: 101.00
## Max. :52.000 Max. :176.00 Max. :1150.0 Max. :1238.00
plot(hdataset)
library(caTools)
split = sample.split(hdataset$MYCT,SplitRatio = 0.75)
training_set = subset(hdataset,split == T)
test_set = subset(hdataset,split == F)
dim(training_set)
## [1] 163 8
dim(test_set)
## [1] 46 8
training_set[-8] = scale(training_set[-8])
test_set[-8] = scale(test_set[-8])
classifier = glm(formula = PRP~.,data = training_set,family = gaussian())
classifier
##
## Call: glm(formula = PRP ~ ., family = gaussian(), data = training_set)
##
## Coefficients:
## (Intercept) MYCT MMIN MMAX CACH CHMIN
## -0.589340 -0.029046 0.127542 -0.059697 0.067984 0.037615
## CHMAX ERP
## 0.041677 0.006257
##
## Degrees of Freedom: 162 Total (i.e. Null); 155 Residual
## Null Deviance: 162
## Residual Deviance: 13.72 AIC: 77.21
plot(classifier,main = "CLASSIFIER")
summary(classifier)
##
## Call:
## glm(formula = PRP ~ ., family = gaussian(), data = training_set)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.22305 -0.11790 -0.01718 0.05665 1.69013
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5893402 0.0647541 -9.101 4.17e-16 ***
## MYCT -0.0290465 0.0271164 -1.071 0.28575
## MMIN 0.1275424 0.0442669 2.881 0.00452 **
## MMAX -0.0596975 0.0636481 -0.938 0.34974
## CACH 0.0679840 0.0356138 1.909 0.05812 .
## CHMIN 0.0376149 0.0327483 1.149 0.25249
## CHMAX 0.0416765 0.0342107 1.218 0.22499
## ERP 0.0062569 0.0006414 9.755 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.08853978)
##
## Null deviance: 162.000 on 162 degrees of freedom
## Residual deviance: 13.724 on 155 degrees of freedom
## AIC: 77.21
##
## Number of Fisher Scoring iterations: 2
y_pred = predict(object = classifier,newdata = test_set,type = "response")
y_pred
## 3 10 12 20 23 28
## 1.12429122 8.16702591 -0.57247340 0.60023604 -0.29457982 -0.58244737
## 29 33 41 54 58 62
## -0.54388585 -0.03590057 -0.40337148 -0.48888301 -0.60231899 -0.60231899
## 69 72 74 78 84 90
## -0.46466189 -0.21367688 -0.50516141 -0.40570801 -0.58207969 -0.18823199
## 91 98 108 113 119 127
## 0.48677977 0.84479913 -0.67061913 -0.44647102 -0.29289533 -0.35730751
## 130 133 137 139 143 144
## -0.13538910 -0.35327718 -0.05047970 -0.35592780 -0.40313021 -0.07765276
## 147 156 162 164 166 173
## 0.24132097 1.26170150 -0.40305098 -0.23667143 0.09127259 -0.14031462
## 175 177 184 192 195 200
## -0.53980960 -0.53982581 -0.42678653 1.32898425 0.48441230 5.94064812
## 203 204 207 208
## -0.56205734 -0.55558335 -0.39770018 -0.41931962
plot(y_pred, main = "predictions")
y_pred =ifelse(y_pred>0.5,1,0)
summary(y_pred)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.0000 0.1522 0.0000 1.0000
cm = table(test_set$PRP,y_pred)
cm
## y_pred
## 0 1
## -0.452056845590158 1 0
## -0.447757772839042 1 0
## -0.430561481834579 1 0
## -0.421963336332347 1 0
## -0.409066118079 1 0
## -0.404767045327884 2 0
## -0.396168899825652 1 0
## -0.387570754323421 1 0
## -0.383271681572305 1 0
## -0.370374463318957 1 0
## -0.361776317816726 2 0
## -0.344580026812262 3 0
## -0.327383735807799 2 0
## -0.323084663056683 1 0
## -0.318785590305567 1 0
## -0.30588837205222 1 0
## -0.288692081047757 1 0
## -0.284393008296641 3 0
## -0.280093935545525 1 0
## -0.275794862794409 1 0
## -0.271495790043293 1 0
## -0.237103208034367 1 0
## -0.224205989781019 1 0
## -0.219906917029903 1 0
## -0.215607844278787 2 0
## -0.211308771527672 1 0
## -0.168318044016513 1 0
## -0.155420825763166 1 0
## -0.09953287999866 1 0
## -0.00925235222522757 1 0
## 0.0165420842814674 0 1
## 0.1068226120549 1 0
## 0.446449359393051 0 1
## 0.566823396424294 0 1
## 0.691496506206653 0 1
## 0.902151071011329 0 1
## 4.41879258142408 0 1
## 4.44458701793077 0 1
acc = sum(diag(cm))/sum(cm)
acc
## [1] 0.02173913