import the iris data
data <- iris
# data's number of dimensions
dim <- dim(data)
head(data)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
x = data$Sepal.Length
y = data$Petal.Length
data2 <- data.frame(x, y)
# variance-covariance matrix
cov(data2)
## x y
## x 0.6856935 1.274315
## y 1.2743154 3.116278
# correlation matrix
cor(data2)
## x y
## x 1.0000000 0.8717538
## y 0.8717538 1.0000000
# linear regression
model <- lm(y ~ x, data = data)
print(model)
##
## Call:
## lm(formula = y ~ x, data = data)
##
## Coefficients:
## (Intercept) x
## -7.101 1.858
summary(model)
##
## Call:
## lm(formula = y ~ x, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.47747 -0.59072 -0.00668 0.60484 2.49512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -7.10144 0.50666 -14.02 <2e-16 ***
## x 1.85843 0.08586 21.65 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8678 on 148 degrees of freedom
## Multiple R-squared: 0.76, Adjusted R-squared: 0.7583
## F-statistic: 468.6 on 1 and 148 DF, p-value: < 2.2e-16
cat("coefficient of determination:", summary(model)$r.squared)
## coefficient of determination: 0.7599546
# drawing
plot(y~x, data = data)
# quadratic regression
model_quad <- lm(y ~ x + I(x^2), data = data)
# model_quad <- lm(y ~ poly(x, degree = 2, raw = TRUE), data = data)
# cubic regression
model_cubed <- lm(y ~ x + I(x^2) + I(x^3), data = data)
# model_cubed <- lm(y ~ poly(x, degree = 3, raw = TRUE), data = data)
# PCA processing
pca_data <- prcomp(data2)
# obtain limitation of axis
lim <- range(pca_data$x)
# draw plot
plot(pca_data$x, xlim = lim, ylim = lim)
summary(pca_data)
## Importance of components:
## PC1 PC2
## Standard deviation 1.9136 0.37426
## Proportion of Variance 0.9632 0.03684
## Cumulative Proportion 0.9632 1.00000
pca_data
## Standard deviations (1, .., p=2):
## [1] 1.9136088 0.3742627
##
## Rotation (n x k) = (2 x 2):
## PC1 PC2
## x 0.3936059 -0.9192793
## y 0.9192793 0.3936059
# get the deviation
pca_data$sdev
## [1] 1.9136088 0.3742627
# plot the variance
plot(pca_data)
# plot the biplot
biplot(pca_data)