url <- "https://raw.githubusercontent.com/underckit/machine_learning/main/SolarPrediction.csv"
SolarPrediction <- read.csv(url)
##Разделение датасета
dummy_sep <- rbinom(nrow(SolarPrediction), 1, 0.9)
SolarPrediction.train <- SolarPrediction[dummy_sep == 1, ]
SolarPrediction.test <- SolarPrediction[dummy_sep == 0, ]
##Линейная регрессия построим модель
model_ols <- lm(data = SolarPrediction.train, Radiation ~ Temperature + Pressure + Humidity)
summary(model_ols)
##
## Call:
## lm(formula = Radiation ~ Temperature + Pressure + Humidity, data = SolarPrediction.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -752.48 -138.02 -20.78 103.57 1188.71
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.070e+04 7.263e+02 28.506 <2e-16 ***
## Temperature 3.889e+01 2.149e-01 180.975 <2e-16 ***
## Pressure -7.380e+02 2.395e+01 -30.816 <2e-16 ***
## Humidity -4.442e-01 4.994e-02 -8.895 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 211.2 on 29469 degrees of freedom
## Multiple R-squared: 0.5523, Adjusted R-squared: 0.5523
## F-statistic: 1.212e+04 on 3 and 29469 DF, p-value: < 2.2e-16
##Прогноз значений для тестового набора данных Вычислим разницу
ost.lr <- SolarPrediction.test$Radiation - predict(model_ols, SolarPrediction.test)
Проверим модель с помощью T-test
t.test(ost.lr, mu=0)
##
## One Sample t-test
##
## data: ost.lr
## t = 0.61863, df = 3212, p-value = 0.5362
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -4.916401 9.448825
## sample estimates:
## mean of x
## 2.266212
p-value больше 5%, т.е. можем сказать, что среднее значение удерживаемых остатков статистически похоже на 0.
##Partial Least Squares Regression
Построим модель
library(pls)
##
## Attaching package: 'pls'
## The following object is masked from 'package:stats':
##
## loadings
model_rq <- plsr(data = SolarPrediction.train, Radiation ~ Temperature + Pressure + Humidity, scale = TRUE, validation = "CV", method = "oscorespls")
summary(model_rq)
## Data: X dimension: 29473 3
## Y dimension: 29473 1
## Fit method: oscorespls
## Number of components considered: 3
##
## VALIDATION: RMSEP
## Cross-validated using 10 random segments.
## (Intercept) 1 comps 2 comps 3 comps
## CV 315.6 228.5 211.2 211.2
## adjCV 315.6 228.5 211.2 211.2
##
## TRAINING: % variance explained
## 1 comps 2 comps 3 comps
## X 46.55 74.12 100.00
## Radiation 47.60 55.23 55.23
##Прогноз значений для тестового набора данных
ost.rq <- SolarPrediction.test$Radiation - predict(model_rq, SolarPrediction.test)
T-test
t.test(ost.rq, mu=0)
##
## One Sample t-test
##
## data: ost.rq
## t = 1.2083, df = 9638, p-value = 0.227
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -1.633604 6.883849
## sample estimates:
## mean of x
## 2.625123
p-value больше 5%