data

import the iris data

data <- iris

# data's number of dimensions
dim <- dim(data)

head(data)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
x = data$Sepal.Length
y = data$Petal.Length

data2 <- data.frame(x, y)

basic analysis

# variance-covariance matrix
cov(data2)
##           x        y
## x 0.6856935 1.274315
## y 1.2743154 3.116278
# correlation matrix
cor(data2)
##           x         y
## x 1.0000000 0.8717538
## y 0.8717538 1.0000000

regression

# linear regression
model <- lm(y ~ x, data = data)

print(model)
## 
## Call:
## lm(formula = y ~ x, data = data)
## 
## Coefficients:
## (Intercept)            x  
##      -7.101        1.858
summary(model)
## 
## Call:
## lm(formula = y ~ x, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.47747 -0.59072 -0.00668  0.60484  2.49512 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -7.10144    0.50666  -14.02   <2e-16 ***
## x            1.85843    0.08586   21.65   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8678 on 148 degrees of freedom
## Multiple R-squared:   0.76,  Adjusted R-squared:  0.7583 
## F-statistic: 468.6 on 1 and 148 DF,  p-value: < 2.2e-16
cat("coefficient of determination:", summary(model)$r.squared)
## coefficient of determination: 0.7599546
# drawing
plot(y~x, data = data)

# quadratic regression
model_quad <- lm(y ~ x + I(x^2), data = data) 
# model_quad <- lm(y ~ poly(x, degree = 2, raw = TRUE), data = data)
# cubic regression
model_cubed <- lm(y ~ x + I(x^2) + I(x^3), data = data) 
# model_cubed <- lm(y ~ poly(x, degree = 3, raw = TRUE), data = data)

PCA

# PCA processing
pca_data <- prcomp(data2)
# obtain limitation of axis
lim <- range(pca_data$x)
# draw plot
plot(pca_data$x, xlim = lim, ylim = lim)

summary(pca_data)
## Importance of components:
##                           PC1     PC2
## Standard deviation     1.9136 0.37426
## Proportion of Variance 0.9632 0.03684
## Cumulative Proportion  0.9632 1.00000
pca_data
## Standard deviations (1, .., p=2):
## [1] 1.9136088 0.3742627
## 
## Rotation (n x k) = (2 x 2):
##         PC1        PC2
## x 0.3936059 -0.9192793
## y 0.9192793  0.3936059
# get the deviation
pca_data$sdev
## [1] 1.9136088 0.3742627
# plot the variance
plot(pca_data)

# plot the biplot
biplot(pca_data)