1 load packages

library('lattice')
library('ggplot2')
library('pastecs')
## Warning: package 'pastecs' was built under R version 4.0.4

2 input data

dta <- read.table("C:/Users/pc/Desktop/stat.txt", h=T)

3 Descriptive Statistics

head(dta) #6 rows
##    X  Y
## 1 25 16
## 2 15 11
## 3 13 12
## 4  8 10
## 5  6  7
## 6  4  5
summary(dta)
##        X               Y        
##  Min.   : 1.00   Min.   : 1.00  
##  1st Qu.: 2.50   1st Qu.: 3.50  
##  Median : 5.00   Median : 7.50  
##  Mean   : 8.00   Mean   : 7.50  
##  3rd Qu.:11.75   3rd Qu.:10.75  
##  Max.   :25.00   Max.   :16.00
stat.desc(dta, basic = TRUE, desc=TRUE, norm=FALSE, p=0.95)
##                       X          Y
## nbr.val      10.0000000 10.0000000
## nbr.null      0.0000000  0.0000000
## nbr.na        0.0000000  0.0000000
## min           1.0000000  1.0000000
## max          25.0000000 16.0000000
## range        24.0000000 15.0000000
## sum          80.0000000 75.0000000
## median        5.0000000  7.5000000
## mean          8.0000000  7.5000000
## SE.mean       2.4037009  1.5293426
## CI.mean.0.95  5.4375491  3.4596134
## var          57.7777778 23.3888889
## std.dev       7.6011695  4.8362060
## coef.var      0.9501462  0.6448275
cor(dta)
##           X         Y
## X 1.0000000 0.8946723
## Y 0.8946723 1.0000000

4 Scatter diagram

xyplot(Y ~ X, data=dta,
       ylab="Y", 
       xlab="X",
       type=c("p", "g", "r"))

regressor <- lm(formula = Y~X, data = dta)

5 linear models

model <- lm(formula= Y ~ X, data=dta)
summary(model)
## 
## Call:
## lm(formula = Y ~ X, data = dta)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5154 -1.8577 -0.3538  1.4000  3.9154 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.9462     1.0823   2.722 0.026162 *  
## X             0.5692     0.1005   5.665 0.000473 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.291 on 8 degrees of freedom
## Multiple R-squared:  0.8004, Adjusted R-squared:  0.7755 
## F-statistic: 32.09 on 1 and 8 DF,  p-value: 0.0004733

6 Data visualisation

ggplot(data = dta, aes(x=X))+
  geom_smooth(aes(y=Y), method = 'lm')+
  geom_point(aes(y=Y))
## `geom_smooth()` using formula 'y ~ x'

7 The end