url <- "https://raw.githubusercontent.com/underckit/machine_learning/main/SolarPrediction.csv"
SolarPrediction <- read.csv(url)

##Разделение датасета

dummy_sep <- rbinom(nrow(SolarPrediction), 1, 0.9)
SolarPrediction.train <- SolarPrediction[dummy_sep == 1, ]
SolarPrediction.test <- SolarPrediction[dummy_sep == 0, ]

##Линейная регрессия построим модель

model_ols <- lm(data = SolarPrediction.train, Radiation ~ Temperature + Pressure + Humidity)
summary(model_ols)
## 
## Call:
## lm(formula = Radiation ~ Temperature + Pressure + Humidity, data = SolarPrediction.train)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -752.48 -138.02  -20.78  103.57 1188.71 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.070e+04  7.263e+02  28.506   <2e-16 ***
## Temperature  3.889e+01  2.149e-01 180.975   <2e-16 ***
## Pressure    -7.380e+02  2.395e+01 -30.816   <2e-16 ***
## Humidity    -4.442e-01  4.994e-02  -8.895   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 211.2 on 29469 degrees of freedom
## Multiple R-squared:  0.5523, Adjusted R-squared:  0.5523 
## F-statistic: 1.212e+04 on 3 and 29469 DF,  p-value: < 2.2e-16

##Прогноз значений для тестового набора данных Вычислим разницу

ost.lr <- SolarPrediction.test$Radiation - predict(model_ols, SolarPrediction.test)

Проверим модель с помощью T-test

t.test(ost.lr, mu=0)
## 
##  One Sample t-test
## 
## data:  ost.lr
## t = 0.61863, df = 3212, p-value = 0.5362
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -4.916401  9.448825
## sample estimates:
## mean of x 
##  2.266212

p-value больше 5%, т.е. можем сказать, что среднее значение удерживаемых остатков статистически похоже на 0.

##Partial Least Squares Regression

Построим модель

library(pls)
## 
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
## 
##     loadings
model_rq <- plsr(data = SolarPrediction.train, Radiation ~ Temperature + Pressure + Humidity, scale = TRUE, validation = "CV", method = "oscorespls")
summary(model_rq)
## Data:    X dimension: 29473 3 
##  Y dimension: 29473 1
## Fit method: oscorespls
## Number of components considered: 3
## 
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
##        (Intercept)  1 comps  2 comps  3 comps
## CV           315.6    228.5    211.2    211.2
## adjCV        315.6    228.5    211.2    211.2
## 
## TRAINING: % variance explained
##            1 comps  2 comps  3 comps
## X            46.55    74.12   100.00
## Radiation    47.60    55.23    55.23

##Прогноз значений для тестового набора данных

ost.rq <- SolarPrediction.test$Radiation - predict(model_rq, SolarPrediction.test)

T-test

t.test(ost.rq, mu=0)
## 
##  One Sample t-test
## 
## data:  ost.rq
## t = 1.2083, df = 9638, p-value = 0.227
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  -1.633604  6.883849
## sample estimates:
## mean of x 
##  2.625123

p-value больше 5%