iris = read.csv("iris.csv")
iris
library(ggplot2)
scatter_plot = ggplot(iris, aes(x = sepal.length, y = sepal.width, color = variety)) +
geom_point(size = 2) +
scale_color_manual(values = c("Setosa" = "blue", "Versicolor" = "pink", "Virginica" = "purple")) +
labs(title = "This plot is created using ggplot",
x = "Length",
y = "Width",
caption = "Source: Iris dataset") +
theme(
legend.position = "top", # top, left, right, bottom
text = element_text(colour = 'black', size = 15),
axis.text.x = element_text(color = "blue", size = 16),
axis.text.y = element_text(color = "red", size = 12)
)
ggplot(data = iris, aes(x= variety, y = sepal.length, fill = variety)) +
geom_boxplot() +
labs(title = "This plot is created using ggplot",
x = "Class",
y = "Sepal Length",
caption = "Source: Iris dataset")

ggplot(data = iris, aes(x= variety, y = sepal.length, fill = variety)) +
geom_violin() +
labs(title = "This plot is created using ggplot",
x = "Class",
y = "Sepal Length",
caption = "Source: Iris dataset")

summary(iris)
  sepal.length    sepal.width     petal.length    petal.width   
 Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
 1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
 Median :5.800   Median :3.000   Median :4.350   Median :1.300  
 Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
 3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
 Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
   variety         
 Length:150        
 Class :character  
 Mode  :character  
                   
                   
                   
cor(iris$sepal.length, iris$sepal.width)
[1] -0.1175698
cor(iris$sepal.length, iris$sepal.length)
[1] 1
cor_matrix = cor (iris[ ,1:4])
cor_matrix
             sepal.length sepal.width petal.length petal.width
sepal.length    1.0000000  -0.1175698    0.8717538   0.8179411
sepal.width    -0.1175698   1.0000000   -0.4284401  -0.3661259
petal.length    0.8717538  -0.4284401    1.0000000   0.9628654
petal.width     0.8179411  -0.3661259    0.9628654   1.0000000
library(ggcorrplot)
ggcorrplot(cor_matrix)

ggcorrplot(cor_matrix, type = "lower")

ggcorrplot(cor_matrix, type = "upper")

ggcorrplot(cor_matrix,
           type = "lower",
           color = c("pink","white","purple"),
           lab = TRUE
           )

library(GGally)

ggpairs(iris, aes(colour = variety))


library(plotly)

fig = iris %>%
  plot_ly(y = ~sepal.length, type = 'violin')

  
fig

fig = iris %>%
  plot_ly(y = ~sepal.length, type = 'box')

  
fig
library(plotly)

fig = iris %>%
  plot_ly(x = ~sepal.length, type = 'histogram')

  
fig
library("stats")

iris_pca = prcomp(iris[ , -5], scale = TRUE, center = TRUE)
iris_pca
Standard deviations (1, .., p=4):
[1] 1.7083611 0.9560494 0.3830886 0.1439265

Rotation (n x k) = (4 x 4):
                    PC1         PC2        PC3        PC4
sepal.length  0.5210659 -0.37741762  0.7195664  0.2612863
sepal.width  -0.2693474 -0.92329566 -0.2443818 -0.1235096
petal.length  0.5804131 -0.02449161 -0.1421264 -0.8014492
petal.width   0.5648565 -0.06694199 -0.6342727  0.5235971
pc_12 = data.frame(iris_pca$x[ , 1:2])
head(pc_12)

pc_12_class = cbind(pc_12, variety = iris$variety)
pc_12_class
ggplot(pc_12_class, aes(PC1, PC2, color=variety)) + 
  geom_point() + 
  theme_minimal()

library("corrplot")
library("corrplot")
var = get_pca_var(iris_pca)
corrplot(var$cos2)

fviz_pca_ind(iris_pca,
             geom.ind = "point",
             col.ind = iris$variety,
             addEllipses = TRUE)

fviz_pca_ind(iris_pca,
             geom.ind = "point",
             col.ind = iris$variety,
             addEllipses= FALSE)

lm_model = lm(sepal.length~petal.length, data = iris)
summary(lm_model)

Call:
lm(formula = sepal.length ~ petal.length, data = iris)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.24675 -0.29657 -0.01515  0.27676  1.00269 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)   4.30660    0.07839   54.94   <2e-16 ***
petal.length  0.40892    0.01889   21.65   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.4071 on 148 degrees of freedom
Multiple R-squared:   0.76, Adjusted R-squared:  0.7583 
F-statistic: 468.6 on 1 and 148 DF,  p-value: < 2.2e-16
x = iris$sepal.length
y = iris$petal.length
plot(x,y)

pred = predict(lm_model)
ix = sort(x, index.return = T)$ix # sotr
ix
  [1]  14   9  39  43  42   4   7  23  48   3  30  12  13  25  31  46   2
 [18]  10  35  38  58 107   5   8  26  27  36  41  44  50  61  94   1  18
 [35]  20  22  24  40  45  47  99  28  29  33  60  49   6  11  17  21  32
 [52]  85  34  37  54  81  82  90  91  65  67  70  89  95 122  16  19  56
 [69]  80  96  97 100 114  15  68  83  93 102 115 143  62  71 150  63  79
 [86]  84  86 120 139  64  72  74  92 128 135  69  98 127 149  57  73  88
[103] 101 104 124 134 137 147  52  75 112 116 129 133 138  55 105 111 117
[120] 148  59  76  66  78  87 109 125 141 145 146  77 113 144  53 121 140
[137] 142  51 103 110 126 130 108 131 106 118 119 123 136 132
plot(x,y)
lines(x[ix],pred[ix])


plot(x,y)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCmlyaXMgPSByZWFkLmNzdigiaXJpcy5jc3YiKQ0KaXJpcw0KYGBgDQoNCmBgYHtyfQ0KbGlicmFyeShnZ3Bsb3QyKQ0Kc2NhdHRlcl9wbG90ID0gZ2dwbG90KGlyaXMsIGFlcyh4ID0gc2VwYWwubGVuZ3RoLCB5ID0gc2VwYWwud2lkdGgsIGNvbG9yID0gdmFyaWV0eSkpICsNCmdlb21fcG9pbnQoc2l6ZSA9IDIpICsNCnNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXMgPSBjKCJTZXRvc2EiID0gImJsdWUiLCAiVmVyc2ljb2xvciIgPSAicGluayIsICJWaXJnaW5pY2EiID0gInB1cnBsZSIpKSArDQpsYWJzKHRpdGxlID0gIlRoaXMgcGxvdCBpcyBjcmVhdGVkIHVzaW5nIGdncGxvdCIsDQp4ID0gIkxlbmd0aCIsDQp5ID0gIldpZHRoIiwNCmNhcHRpb24gPSAiU291cmNlOiBJcmlzIGRhdGFzZXQiKSArDQp0aGVtZSgNCmxlZ2VuZC5wb3NpdGlvbiA9ICJ0b3AiLCAjIHRvcCwgbGVmdCwgcmlnaHQsIGJvdHRvbQ0KdGV4dCA9IGVsZW1lbnRfdGV4dChjb2xvdXIgPSAnYmxhY2snLCBzaXplID0gMTUpLA0KYXhpcy50ZXh0LnggPSBlbGVtZW50X3RleHQoY29sb3IgPSAiYmx1ZSIsIHNpemUgPSAxNiksDQpheGlzLnRleHQueSA9IGVsZW1lbnRfdGV4dChjb2xvciA9ICJyZWQiLCBzaXplID0gMTIpDQopDQpnZ3Bsb3QoZGF0YSA9IGlyaXMsIGFlcyh4PSB2YXJpZXR5LCB5ID0gc2VwYWwubGVuZ3RoLCBmaWxsID0gdmFyaWV0eSkpICsNCmdlb21fYm94cGxvdCgpICsNCmxhYnModGl0bGUgPSAiVGhpcyBwbG90IGlzIGNyZWF0ZWQgdXNpbmcgZ2dwbG90IiwNCnggPSAiQ2xhc3MiLA0KeSA9ICJTZXBhbCBMZW5ndGgiLA0KY2FwdGlvbiA9ICJTb3VyY2U6IElyaXMgZGF0YXNldCIpDQoNCg0KYGBgDQpgYGB7cn0NCmdncGxvdChkYXRhID0gaXJpcywgYWVzKHg9IHZhcmlldHksIHkgPSBzZXBhbC5sZW5ndGgsIGZpbGwgPSB2YXJpZXR5KSkgKw0KZ2VvbV92aW9saW4oKSArDQpsYWJzKHRpdGxlID0gIlRoaXMgcGxvdCBpcyBjcmVhdGVkIHVzaW5nIGdncGxvdCIsDQp4ID0gIkNsYXNzIiwNCnkgPSAiU2VwYWwgTGVuZ3RoIiwNCmNhcHRpb24gPSAiU291cmNlOiBJcmlzIGRhdGFzZXQiKQ0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0Kc3VtbWFyeShpcmlzKQ0KYGBgDQpgYGB7cn0NCmNvcihpcmlzJHNlcGFsLmxlbmd0aCwgaXJpcyRzZXBhbC53aWR0aCkNCmNvcihpcmlzJHNlcGFsLmxlbmd0aCwgaXJpcyRzZXBhbC5sZW5ndGgpDQpgYGANCmBgYHtyfQ0KY29yX21hdHJpeCA9IGNvciAoaXJpc1sgLDE6NF0pDQpjb3JfbWF0cml4DQpgYGANCmBgYHtyfQ0KbGlicmFyeShnZ2NvcnJwbG90KQ0KZ2djb3JycGxvdChjb3JfbWF0cml4KQ0KYGBgDQoNCg0KDQpgYGB7cn0NCmdnY29ycnBsb3QoY29yX21hdHJpeCwgdHlwZSA9ICJsb3dlciIpDQpnZ2NvcnJwbG90KGNvcl9tYXRyaXgsIHR5cGUgPSAidXBwZXIiKQ0KYGBgDQpgYGB7cn0NCmdnY29ycnBsb3QoY29yX21hdHJpeCwNCiAgICAgICAgICAgdHlwZSA9ICJsb3dlciIsDQogICAgICAgICAgIGNvbG9yID0gYygicGluayIsIndoaXRlIiwicHVycGxlIiksDQogICAgICAgICAgIGxhYiA9IFRSVUUNCiAgICAgICAgICAgKQ0KDQpgYGANCg0KYGBge3J9DQpsaWJyYXJ5KEdHYWxseSkNCg0KZ2dwYWlycyhpcmlzLCBhZXMoY29sb3VyID0gdmFyaWV0eSkpDQoNCmBgYA0KDQpgYGB7cn0NCg0KbGlicmFyeShwbG90bHkpDQoNCmZpZyA9IGlyaXMgJT4lDQogIHBsb3RfbHkoeSA9IH5zZXBhbC5sZW5ndGgsIHR5cGUgPSAndmlvbGluJykNCg0KICANCmZpZw0KYGBgDQoNCmBgYHtyfQ0KDQpmaWcgPSBpcmlzICU+JQ0KICBwbG90X2x5KHkgPSB+c2VwYWwubGVuZ3RoLCB0eXBlID0gJ2JveCcpDQoNCiAgDQpmaWcNCmBgYA0KDQpgYGB7cn0NCmxpYnJhcnkocGxvdGx5KQ0KDQpmaWcgPSBpcmlzICU+JQ0KICBwbG90X2x5KHggPSB+c2VwYWwubGVuZ3RoLCB0eXBlID0gJ2hpc3RvZ3JhbScpDQoNCiAgDQpmaWcNCmBgYA0KDQoNCmBgYHtyfQ0KbGlicmFyeSgic3RhdHMiKQ0KDQppcmlzX3BjYSA9IHByY29tcChpcmlzWyAsIC01XSwgc2NhbGUgPSBUUlVFLCBjZW50ZXIgPSBUUlVFKQ0KaXJpc19wY2ENCmBgYA0KDQoNCmBgYHtyfQ0KcGNfMTIgPSBkYXRhLmZyYW1lKGlyaXNfcGNhJHhbICwgMToyXSkNCmhlYWQocGNfMTIpDQoNCnBjXzEyX2NsYXNzID0gY2JpbmQocGNfMTIsIHZhcmlldHkgPSBpcmlzJHZhcmlldHkpDQpwY18xMl9jbGFzcw0KZ2dwbG90KHBjXzEyX2NsYXNzLCBhZXMoUEMxLCBQQzIsIGNvbG9yPXZhcmlldHkpKSArIA0KICBnZW9tX3BvaW50KCkgKyANCiAgdGhlbWVfbWluaW1hbCgpDQpgYGANCg0KDQpgYGB7cn0NCmxpYnJhcnkoImNvcnJwbG90IikNCg0KYGBgDQpgYGB7cn0NCmxpYnJhcnkoImNvcnJwbG90IikNCnZhciA9IGdldF9wY2FfdmFyKGlyaXNfcGNhKQ0KY29ycnBsb3QodmFyJGNvczIpDQpgYGANCmBgYHtyfQ0KZnZpel9wY2FfaW5kKGlyaXNfcGNhLA0KICAgICAgICAgICAgIGdlb20uaW5kID0gInBvaW50IiwNCiAgICAgICAgICAgICBjb2wuaW5kID0gaXJpcyR2YXJpZXR5LA0KICAgICAgICAgICAgIGFkZEVsbGlwc2VzID0gVFJVRSkNCmBgYA0KYGBge3J9DQpmdml6X3BjYV9pbmQoaXJpc19wY2EsDQogICAgICAgICAgICAgZ2VvbS5pbmQgPSAicG9pbnQiLA0KICAgICAgICAgICAgIGNvbC5pbmQgPSBpcmlzJHZhcmlldHksDQogICAgICAgICAgICAgYWRkRWxsaXBzZXM9IEZBTFNFKQ0KYGBgDQpgYGB7cn0NCmxtX21vZGVsID0gbG0oc2VwYWwubGVuZ3RofnBldGFsLmxlbmd0aCwgZGF0YSA9IGlyaXMpDQpzdW1tYXJ5KGxtX21vZGVsKQ0KYGBgDQpgYGB7cn0NCnggPSBpcmlzJHNlcGFsLmxlbmd0aA0KeSA9IGlyaXMkcGV0YWwubGVuZ3RoDQpwbG90KHgseSkNCmBgYA0KYGBge3J9DQpwcmVkID0gcHJlZGljdChsbV9tb2RlbCkNCml4ID0gc29ydCh4LCBpbmRleC5yZXR1cm4gPSBUKSRpeCAjIHNvdHINCml4DQpwbG90KHgseSkNCmxpbmVzKHhbaXhdLHByZWRbaXhdKQ0KYGBgDQpgYGB7cn0NCg0KcGxvdCh4LHkpDQpgYGANCg0K