Linear Regression

data("iris")
# Data k line diye represent korbe
linear_model = lm(Sepal.Length ~ Petal.Length, data = iris)
#The first one (Sepal.Length) is for y and second one (Petal.Length) is for x
summary(linear_model)

Call:
lm(formula = Sepal.Length ~ Petal.Length, data = iris)

Residuals:
     Min       1Q   Median 
-1.24675 -0.29657 -0.01515 
      3Q      Max 
 0.27676  1.00269 

Coefficients:
             Estimate
(Intercept)   4.30660
Petal.Length  0.40892
             Std. Error
(Intercept)     0.07839
Petal.Length    0.01889
             t value Pr(>|t|)
(Intercept)    54.94   <2e-16
Petal.Length   21.65   <2e-16
                
(Intercept)  ***
Petal.Length ***
---
Signif. codes:  
  0 ‘***’ 0.001 ‘**’ 0.01
  ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4071 on 148 degrees of freedom
Multiple R-squared:   0.76, Adjusted R-squared:  0.7583 
F-statistic: 468.6 on 1 and 148 DF,  p-value: < 2.2e-16

Linear Regression plot

x= iris$Petal.Length
y= iris$Sepal.Length
plot(x, y)

NA
NA

Linear Regression line plot

pred = predict(linear_model)
Error: object 'linear_model' not found

to show confidence interval using ggplot

ggplot(iris, aes(x= Petal.Length, y= Sepal.Length))+
  geom_point()+
  geom_smooth(method = "lm", level = 0.95)
Error in ggplot(iris, aes(x = Petal.Length, y = Sepal.Length)) : 
  could not find function "ggplot"

Linear regression using ggplot

ggplot(iris, aes(x= Petal.Length, y= Sepal.Length, color= Species))+
  geom_point()+
  geom_smooth(method = "lm", level = 0.95)
Error in ggplot(iris, aes(x = Petal.Length, y = Sepal.Length, color = Species)) : 
  could not find function "ggplot"

POlynomial regression model

ggplot(iris, aes(x= Petal.Length, y= Sepal.Length, color= Species))+
  geom_point()+
  geom_smooth(method = "lm",formula = y~poly(x,3), level = 0.95)
Error in ggplot(iris, aes(x = Petal.Length, y = Sepal.Length, color = Species)) : 
  could not find function "ggplot"

Clustering

kmeans_result = kmeans(iris[ ,1:4],centers = 3)
#centers diye 3 ta cluster e vag kore
kmeans_result
K-means clustering with 3 clusters of sizes 38, 62, 50

Cluster means:
  Sepal.Length Sepal.Width Petal.Length Petal.Width
1     6.850000    3.073684     5.742105    2.071053
2     5.901613    2.748387     4.393548    1.433871
3     5.006000    3.428000     1.462000    0.246000

Clustering vector:
  [1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2
 [57] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1 1 2 1 1 1 1 1
[113] 1 2 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2

Within cluster sum of squares by cluster:
[1] 23.87947 39.82097 15.15100
 (between_SS / total_SS =  88.4 %)

Available components:

[1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss" "betweenss"    "size"        
[8] "iter"         "ifault"      

cluster plot

library(cluster)
clusplot(iris, kmeans_result$cluster)

data("iris")
iris

Clasification

library(lattice)
library(e1071)
library(caret)
#SVM- e1071
#caret-evaluation matrix

train_index= createDataPartition(iris$Species, p = 0.8, list = FALSE)
train_data = iris[train_index, ]
test_data = iris[-train_index, ]
train_data 
test_data


svm_model = svm(Species ~ Sepal.Length+Sepal.Width+Petal.Length, data = train_data, kernel = "linear")

##SVM model

svm_model = svm(Species ~  Sepal.Length+Sepal.Width
+Petal.Length, data = train_data, kernel = "linear")
test_data[24, ]

##Predict data

predict(svm_model, newdata = test_data[24, ] )
      115 
virginica 
Levels: setosa versicolor virginica
predictions = predict(svm_model, newdata = test_data)
conf_max = confusionMatrix(predictions, test_data$Species)
conf_max
Confusion Matrix and Statistics

            Reference
Prediction   setosa versicolor virginica
  setosa         10          0         0
  versicolor      0         10         1
  virginica       0          0         9

Overall Statistics
                                          
               Accuracy : 0.9667          
                 95% CI : (0.8278, 0.9992)
    No Information Rate : 0.3333          
    P-Value [Acc > NIR] : 2.963e-13       
                                          
                  Kappa : 0.95            
                                          
 Mcnemar's Test P-Value : NA              

Statistics by Class:

                     Class: setosa Class: versicolor Class: virginica
Sensitivity                 1.0000            1.0000           0.9000
Specificity                 1.0000            0.9500           1.0000
Pos Pred Value              1.0000            0.9091           1.0000
Neg Pred Value              1.0000            1.0000           0.9524
Prevalence                  0.3333            0.3333           0.3333
Detection Rate              0.3333            0.3333           0.3000
Detection Prevalence        0.3333            0.3667           0.3000
Balanced Accuracy           1.0000            0.9750           0.9500

Confiusion matrix plot

cm = data.frame(conf_max$table)

ggplot(cm, aes(Prediction, Reference, fill = Freq))+ 
geom_tile()+
geom_text(aes(label = Freq))+
scale_fill_gradient(low = "white", high = "skyblue")

NA
LS0tDQp0aXRsZTogIlJlZ3Jlc3Npb24iDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIExpbmVhciBSZWdyZXNzaW9uDQoNCmBgYHtyfQ0KZGF0YSgiaXJpcyIpDQojIERhdGEgayBsaW5lIGRpeWUgcmVwcmVzZW50IGtvcmJlDQpsaW5lYXJfbW9kZWwgPSBsbShTZXBhbC5MZW5ndGggfiBQZXRhbC5MZW5ndGgsIGRhdGEgPSBpcmlzKQ0KI1RoZSBmaXJzdCBvbmUgKFNlcGFsLkxlbmd0aCkgaXMgZm9yIHkgYW5kIHNlY29uZCBvbmUgKFBldGFsLkxlbmd0aCkgaXMgZm9yIHgNCnN1bW1hcnkobGluZWFyX21vZGVsKQ0KYGBgDQoNCiMjIExpbmVhciBSZWdyZXNzaW9uIHBsb3QNCg0KYGBge3J9DQp4PSBpcmlzJFBldGFsLkxlbmd0aA0KeT0gaXJpcyRTZXBhbC5MZW5ndGgNCnBsb3QoeCwgeSkNCg0KDQpgYGANCg0KDQojIyBMaW5lYXIgUmVncmVzc2lvbiBsaW5lIHBsb3QNCg0KDQpgYGB7cn0NCnByZWQgPSBwcmVkaWN0KGxpbmVhcl9tb2RlbCkNCml4ID0gc29ydCh4LCBpbmRleC5yZXR1cm4gPSBUKSRpeA0KaXgNCiMgdG8gc29ydCBhbmQgcmV0dXJuIGluZGV4IG5vdCB0aGUgYWN0dWFsIHZhbHVlDQpwbG90KHgseSkNCmxpbmVzKHhbaXhdLCBwcmVkW2l4XSkNCmBgYA0KDQojIHRvIHNob3cgY29uZmlkZW5jZSBpbnRlcnZhbCB1c2luZyBnZ3Bsb3QNCg0KYGBge3J9DQpnZ3Bsb3QoaXJpcywgYWVzKHg9IFBldGFsLkxlbmd0aCwgeT0gU2VwYWwuTGVuZ3RoKSkrDQogIGdlb21fcG9pbnQoKSsNCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgbGV2ZWwgPSAwLjk1KQ0KI3RvIHNob3cgYXQgOTUlIGNvbmZpZGVuY2UgaW50ZXJ2YWwNCmBgYA0KDQojIyBMaW5lYXIgcmVncmVzc2lvbiB1c2luZyBnZ3Bsb3QNCg0KYGBge3J9DQpnZ3Bsb3QoaXJpcywgYWVzKHg9IFBldGFsLkxlbmd0aCwgeT0gU2VwYWwuTGVuZ3RoLCBjb2xvcj0gU3BlY2llcykpKw0KICBnZW9tX3BvaW50KCkrDQogIGdlb21fc21vb3RoKG1ldGhvZCA9ICJsbSIsIGxldmVsID0gMC45NSkNCiN0byBzaG93IGF0IDk1JSBjb25maWRlbmNlIGludGVydmFsDQpgYGANCiMjIFBPbHlub21pYWwgcmVncmVzc2lvbiBtb2RlbA0KDQpgYGB7cn0NCmdncGxvdChpcmlzLCBhZXMoeD0gUGV0YWwuTGVuZ3RoLCB5PSBTZXBhbC5MZW5ndGgsIGNvbG9yPSBTcGVjaWVzKSkrDQogIGdlb21fcG9pbnQoKSsNCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIixmb3JtdWxhID0geX5wb2x5KHgsMyksIGxldmVsID0gMC45NSkNCmBgYA0KDQoNCg0KIyMgQ2x1c3RlcmluZw0KYGBge3J9DQprbWVhbnNfcmVzdWx0ID0ga21lYW5zKGlyaXNbICwxOjRdLGNlbnRlcnMgPSAzKQ0KI2NlbnRlcnMgZGl5ZSAzIHRhIGNsdXN0ZXIgZSB2YWcga29yZQ0Ka21lYW5zX3Jlc3VsdA0KYGBgDQoNCg0KIyMgY2x1c3RlciBwbG90DQpgYGB7cn0NCmxpYnJhcnkoY2x1c3RlcikNCmNsdXNwbG90KGlyaXMsIGttZWFuc19yZXN1bHQkY2x1c3RlcikNCmBgYA0KYGBge3J9DQpkYXRhKCJpcmlzIikNCmlyaXMNCmBgYA0KDQoNCiMgQ2xhc2lmaWNhdGlvbg0KYGBge3J9DQpsaWJyYXJ5KGxhdHRpY2UpDQpsaWJyYXJ5KGUxMDcxKQ0KbGlicmFyeShjYXJldCkNCiNTVk0tIGUxMDcxDQojY2FyZXQtZXZhbHVhdGlvbiBtYXRyaXgNCg0KdHJhaW5faW5kZXg9IGNyZWF0ZURhdGFQYXJ0aXRpb24oaXJpcyRTcGVjaWVzLCBwID0gMC44LCBsaXN0ID0gRkFMU0UpDQp0cmFpbl9kYXRhID0gaXJpc1t0cmFpbl9pbmRleCwgXQ0KdGVzdF9kYXRhID0gaXJpc1stdHJhaW5faW5kZXgsIF0NCnRyYWluX2RhdGEgDQp0ZXN0X2RhdGENCg0KDQpzdm1fbW9kZWwgPSBzdm0oU3BlY2llcyB+IFNlcGFsLkxlbmd0aCtTZXBhbC5XaWR0aCtQZXRhbC5MZW5ndGgsIGRhdGEgPSB0cmFpbl9kYXRhLCBrZXJuZWwgPSAibGluZWFyIikNCmBgYA0KIyNTVk0gbW9kZWwNCmBgYHtyfQ0Kc3ZtX21vZGVsID0gc3ZtKFNwZWNpZXMgfiAgU2VwYWwuTGVuZ3RoK1NlcGFsLldpZHRoDQorUGV0YWwuTGVuZ3RoLCBkYXRhID0gdHJhaW5fZGF0YSwga2VybmVsID0gImxpbmVhciIpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KdGVzdF9kYXRhWzI0LCBdDQpgYGANCiMjUHJlZGljdCBkYXRhDQpgYGB7cn0NCnByZWRpY3Qoc3ZtX21vZGVsLCBuZXdkYXRhID0gdGVzdF9kYXRhWzI0LCBdICkNCmBgYA0KYGBge3J9DQpwcmVkaWN0aW9ucyA9IHByZWRpY3Qoc3ZtX21vZGVsLCBuZXdkYXRhID0gdGVzdF9kYXRhKQ0KY29uZl9tYXggPSBjb25mdXNpb25NYXRyaXgocHJlZGljdGlvbnMsIHRlc3RfZGF0YSRTcGVjaWVzKQ0KY29uZl9tYXgNCiNzaG9iIGRhdGEgdGhpayB2YWJlIHByZWRpY3Qga29ydGUgcGFyc2Uga2luYSB0YXIgYWt0YSByZXBvcnQNCmBgYA0KIyMgQ29uZml1c2lvbiBtYXRyaXggcGxvdA0KDQpgYGB7cn0NCmNtID0gZGF0YS5mcmFtZShjb25mX21heCR0YWJsZSkNCg0KZ2dwbG90KGNtLCBhZXMoUHJlZGljdGlvbiwgUmVmZXJlbmNlLCBmaWxsID0gRnJlcSkpKyANCmdlb21fdGlsZSgpKw0KZ2VvbV90ZXh0KGFlcyhsYWJlbCA9IEZyZXEpKSsNCnNjYWxlX2ZpbGxfZ3JhZGllbnQobG93ID0gIndoaXRlIiwgaGlnaCA9ICJza3libHVlIikNCiAgDQpgYGANCg0K