Linear Regression is a commonly used type of predictive analysis. Linear Regression is a statistical approach for modelling the relationship between a dependent variable and a given set of independent variables. It is predicted that a straight line can be used to approximate the relationship. The goal of linear regression is to identify the line that minimizes the discrepancies between the observed data points and the line’s anticipated values.
In Machine Learning Linear regression is one of the easiest and most popular Machine Learning algorithms.
Below are some important assumptions of Linear Regression. These are some formal checks while building a Linear Regression model, which ensures to get the best possible result from the given dataset.
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.3.3
## corrplot 0.92 loaded
bikes <- read_csv("C:/Users/ooxmw/Downloads/bikes.csv")
## Rows: 731 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): date
## dbl (9): season, holiday, weekday, weather, temperature, realfeel, humidity,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bikes
## # A tibble: 731 × 10
## date season holiday weekday weather temperature realfeel humidity windspeed
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 01/01… 1 0 6 2 46.7 46.4 0.806 6.68
## 2 02/01… 1 0 0 2 48.4 45.2 0.696 10.3
## 3 03/01… 1 0 1 1 34.2 25.7 0.437 10.3
## 4 04/01… 1 0 2 1 34.5 28.4 0.590 6.67
## 5 05/01… 1 0 3 1 36.8 30.4 0.437 7.78
## 6 06/01… 1 0 4 1 34.9 30.9 0.518 3.73
## 7 07/01… 1 0 5 2 34.2 28.0 0.499 7.02
## 8 08/01… 1 0 6 2 31.6 22.5 0.536 11.1
## 9 09/01… 1 0 0 1 29.3 17.0 0.434 15.1
## 10 10/01… 1 0 1 1 30.4 21.1 0.483 9.30
## # ℹ 721 more rows
## # ℹ 1 more variable: rentals <dbl>
cov(bikes$humidity, bikes$rentals)
## [1] -27.77323
sd(bikes$humidity)
## [1] 0.1424291
sd(bikes$rentals)
## [1] 1937.211
pearson <- cov(bikes$humidity, bikes$rentals) /
(sd(bikes$humidity) * sd(bikes$rentals))
pearson
## [1] -0.1006586
cor(bikes$humidity, bikes$rentals)
## [1] -0.1006586
cor(bikes$windspeed, bikes$rentals)
## [1] -0.234545
cor(bikes$temperature, bikes$rentals)
## [1] 0.627494
bikenumeric <- bikes %>%
select(-date)
bike_correlations <- cor(bikenumeric)
corrplot(bike_correlations)
corrplot(bike_correlations, type="upper")
model <- lm (rentals~temperature,data=bikes)
model
##
## Call:
## lm(formula = rentals ~ temperature, data = bikes)
##
## Coefficients:
## (Intercept) temperature
## -166.9 78.5
summary(model)
##
## Call:
## lm(formula = rentals ~ temperature, data = bikes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4615.3 -1134.9 -104.4 1044.3 3737.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -166.877 221.816 -0.752 0.452
## temperature 78.495 3.607 21.759 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1509 on 729 degrees of freedom
## Multiple R-squared: 0.3937, Adjusted R-squared: 0.3929
## F-statistic: 473.5 on 1 and 729 DF, p-value: < 2.2e-16
predict(model)
## 1 2 3 4 5 6 7 8
## 3500.155 3628.394 2518.638 2542.784 2721.798 2571.658 2519.688 2310.359
## 9 10 11 12 13 14 15 16
## 2133.271 2216.280 2337.526 2361.672 2310.359 2282.933 2764.139 2753.075
## 17 18 19 20 21 22 23 24
## 2382.298 2653.465 3154.885 2952.297 2393.368 1607.310 1855.615 1861.389
## 25 26 27 28 29 30 31 32
## 2698.695 2658.997 2509.581 2565.881 2519.688 2652.502 2415.502 2490.814
## 33 34 35 36 37 38 39 40
## 2941.227 2456.169 2617.851 2764.139 3112.776 3018.704 2681.130 2109.697
## 41 42 43 44 45 46 47 48
## 2173.215 2470.341 2692.200 3316.573 3970.537 2981.649 3328.121 4108.883
## 49 50 51 52 53 54 55 56
## 4678.881 3865.394 3108.686 3228.989 2424.726 2687.147 3177.981 3634.172
## 57 58 59 60 61 62 63 64
## 3090.643 3495.580 3919.224 2985.500 3439.280 2531.714 2952.297 3765.784
## 65 66 67 68 69 70 71 72
## 3715.016 2952.775 3157.050 3179.183 3798.483 3316.573 3400.545 3766.986
## 73 74 75 76 77 78 79 80
## 3374.314 3322.344 3639.942 3970.537 4800.626 4352.378 3422.678 4073.036
## 81 82 83 84 85 86 87 88
## 4147.625 3518.683 3107.244 2968.899 2979.962 2895.027 2970.101 3223.457
## 89 90 91 92 93 94 95 96
## 3206.855 2996.564 3206.855 3306.466 3727.042 5021.980 3965.005 3810.051
## 97 98 99 100 101 102 103 104
## 4119.953 3444.812 3489.085 4048.014 5170.194 4551.599 3953.935 4319.174
## 105 106 107 108 109 110 111 112
## 4180.828 4075.679 4247.235 4618.006 4573.732 5165.865 4263.837 3450.350
## 113 114 115 116 117 118 119 120
## 4269.369 5077.324 5243.342 5409.359 5331.882 5315.281 4601.404 4352.378
## 121 122 123 124 125 126 127 128
## 4214.032 4861.501 5309.749 3965.005 4263.837 4396.651 4667.811 4723.148
## 129 130 131 132 133 134 135 136
## 4750.820 4750.820 4817.227 4767.422 4618.006 4673.343 4950.041 5049.652
## 137 138 139 140 141 142 143 144
## 4944.510 4867.033 4739.750 4778.492 5215.670 5226.740 5409.359 5597.511
## 145 146 147 148 149 150 151 152
## 5603.042 5918.476 5741.395 5569.839 5647.316 6084.494 6361.192 6289.254
## 153 154 155 156 157 158 159 160
## 5962.750 5331.882 5431.493 5520.034 5719.255 5912.944 6366.724 6582.547
## 161 162 163 164 165 166 167 168
## 6228.378 6029.157 5813.334 5431.493 5226.740 5376.156 5387.219 5525.572
## 169 170 171 172 173 174 175 176
## 5841.006 5857.607 5431.493 5735.857 6084.494 6051.290 6023.625 5829.936
## 177 178 179 180 181 182 183 184
## 5730.325 5746.927 6156.439 6051.290 5841.006 6012.555 6117.697 5973.820
## 185 186 187 188 189 190 191 192
## 6040.227 6173.041 5995.953 6195.175 5924.015 6084.494 6178.573 6278.183
## 193 194 195 196 197 198 199 200
## 6488.475 6173.041 5735.857 5619.644 5774.599 5990.422 6173.041 6372.262
## 201 202 203 204 205 206 207 208
## 6316.919 6626.821 6848.176 6853.714 6726.431 6150.901 6339.059 6361.192
## 209 210 211 212 213 214 215 216
## 6388.864 6781.768 6554.882 6565.945 6339.059 6416.529 6073.430 5929.546
## 217 218 219 220 221 222 223 224
## 5935.078 5973.820 6145.369 6294.785 6361.192 6305.855 5979.352 5918.476
## 225 226 227 228 229 230 231 232
## 5769.060 5708.191 5636.246 5868.671 6018.087 5940.616 5763.528 5846.537
## 233 234 235 236 237 238 239 240
## 5935.078 5807.802 5470.228 5686.051 5757.997 5863.139 5730.325 5910.016
## 241 242 243 244 245 246 247 248
## 5442.563 5459.165 5575.377 5564.307 5486.830 5658.386 5924.015 5686.051
## 249 250 251 252 253 254 255 256
## 4800.626 5193.536 5424.275 5531.104 5597.511 5553.237 5493.570 5536.635
## 257 258 259 260 261 262 263 264
## 5686.051 5049.652 4330.244 4479.660 4584.802 4861.501 4944.510 5165.865
## 265 266 267 268 269 270 271 272
## 5387.219 5259.944 5243.342 5425.961 5525.572 5442.563 5431.493 5309.749
## 273 274 275 276 277 278 279 280
## 4961.112 3937.333 3583.164 3765.784 4429.855 4789.555 4496.262 4606.936
## 281 282 283 284 285 286 287 288
## 4678.881 4806.157 5005.379 4977.713 4822.759 5127.129 4872.564 4579.271
## 289 290 291 292 293 294 295 296
## 4612.474 4761.890 4750.820 4812.174 4374.511 4053.546 4020.342 4014.810
## 297 298 299 300 301 302 303 304
## 4291.502 4346.846 4429.855 4335.776 3411.608 2902.491 3334.138 3472.484
## 305 306 307 308 309 310 311 312
## 3876.458 3721.510 3926.263 3893.060 3383.943 3527.821 3837.723 3926.263
## 313 314 315 316 317 318 319 320
## 3870.926 3738.112 3367.341 3583.164 4142.086 4734.218 4734.218 4247.235
## 321 322 323 324 325 326 327 328
## 3483.554 3035.306 3400.545 4291.502 4186.360 3981.607 4142.086 3693.838
## 329 330 331 332 333 334 335 336
## 3704.908 3710.440 4263.837 4558.094 4258.299 3372.873 3289.864 3300.934
## 337 338 339 340 341 342 343 344
## 3201.323 3411.608 3776.847 4285.970 3937.333 2979.962 3145.980 3040.837
## 345 346 347 348 349 350 351 352
## 2681.130 2797.342 3090.643 3323.068 4020.342 3704.908 2930.157 2797.342
## 353 354 355 356 357 358 359 360
## 3051.907 3776.847 4059.077 4025.874 3693.838 3223.457 3039.396 3351.218
## 361 362 363 364 365 366 367 368
## 3372.873 3201.078 2863.750 3284.332 3937.333 3671.705 3027.841 2210.749
## 369 370 371 372 373 374 375 376
## 1928.518 2979.962 3433.748 3826.653 3455.882 2703.270 3264.603 3035.306
## 377 378 379 380 381 382 383 384
## 3754.714 3035.306 2409.970 2321.429 2476.377 3691.912 3228.989 2476.377
## 385 386 387 388 389 390 391 392
## 2658.997 2365.696 2293.757 2664.528 3489.085 3168.120 3483.554 4036.944
## 393 394 395 396 397 398 399 400
## 3311.997 3090.643 3002.102 3804.519 4330.244 3865.394 3295.396 2968.899
## 401 402 403 404 405 406 407 408
## 2979.962 3091.367 3566.562 2919.093 2974.430 3079.573 2703.270 2061.333
## 409 410 411 412 413 414 415 416
## 2692.200 3334.138 3527.821 3317.536 3494.617 3516.757 3074.041 3074.041
## 417 418 419 420 421 422 423 424
## 3126.011 3843.254 4230.633 3920.731 3145.980 3068.509 3649.571 3599.766
## 425 426 427 428 429 430 431 432
## 3501.357 4440.918 3561.024 3965.005 3378.405 2830.546 2930.157 3898.598
## 433 434 435 436 437 438 439 440
## 4717.617 3942.865 3123.846 3616.846 4313.642 4966.643 5016.449 4916.838
## 441 442 443 444 445 446 447 448
## 4108.883 4629.076 4352.378 4833.829 4938.971 4745.288 4894.704 5210.138
## 449 450 451 452 453 454 455 456
## 4551.599 4119.953 4175.290 3361.803 4429.855 4496.262 3671.705 4031.412
## 457 458 459 460 461 462 463 464
## 4042.476 4096.133 4313.642 4811.696 4103.351 3893.060 4119.953 4534.997
## 465 466 467 468 469 470 471 472
## 4463.058 4180.828 3530.231 3854.324 4153.156 4501.794 5243.342 5625.183
## 473 474 475 476 477 478 479 480
## 5254.405 4291.502 4523.927 4712.085 4999.847 3848.793 3350.739 3959.467
## 481 482 483 484 485 486 487 488
## 4380.049 4523.927 4252.767 3715.978 4258.299 4297.041 5287.609 4961.112
## 489 490 491 492 493 494 495 496
## 4933.440 5381.688 5342.952 4950.041 4784.024 5077.324 5033.050 4573.732
## 497 498 499 500 501 502 503 504
## 4756.352 4961.112 5282.077 5021.980 5276.545 5442.563 5154.795 4961.112
## 505 506 507 508 509 510 511 512
## 5199.068 5337.414 5187.998 5298.679 5342.952 5564.307 5730.325 5813.334
## 513 514 515 516 517 518 519 520
## 5796.732 5946.148 6012.555 5575.377 5730.325 5558.775 5088.387 5215.670
## 521 522 523 524 525 526 527 528
## 5182.466 4806.157 4894.704 5215.670 5525.572 5935.078 6040.227 6001.485
## 529 530 531 532 533 534 535 536
## 5553.237 5569.839 5520.034 5459.165 5409.359 5149.263 4988.777 5785.662
## 537 538 539 540 541 542 543 544
## 6410.998 6565.945 6377.794 6073.430 6150.901 5968.281 5403.821 5846.537
## 545 546 547 548 549 550 551 552
## 6189.643 6754.103 6294.785 6632.352 6405.466 6399.928 6455.271 6709.830
## 553 554 555 556 557 558 559 560
## 6715.361 6936.723 6676.626 5935.078 6001.485 5973.820 5968.281 6073.430
## 561 562 563 564 565 566 567 568
## 5885.273 6167.503 6283.715 6648.954 6482.937 6327.989 5636.246 5171.396
## 569 570 571 572 573 574 575 576
## 5647.316 6139.838 6200.706 6023.625 6372.262 6405.466 6233.910 6007.023
## 577 578 579 580 581 582 583 584
## 6067.892 5951.680 5979.352 6211.776 6300.317 6482.937 6322.457 6211.776
## 585 586 587 588 589 590 591 592
## 6101.096 6195.175 6233.910 5968.281 5813.334 5868.671 6001.485 6040.227
## 593 594 595 596 597 598 599 600
## 5907.413 5990.422 6018.087 5719.255 5437.025 5437.025 5525.572 5647.316
## 601 602 603 604 605 606 607 608
## 5835.467 5879.741 5608.581 5553.237 5885.273 6051.290 5763.528 5907.413
## 609 610 611 612 613 614 615 616
## 6289.254 6217.308 5841.006 5912.944 6034.689 6106.634 5841.006 5885.273
## 617 618 619 620 621 622 623 624
## 5591.979 5265.475 5088.387 5049.652 5193.536 5282.077 5420.423 5254.405
## 625 626 627 628 629 630 631 632
## 5066.254 5071.786 5354.016 4883.634 4844.899 5193.536 5531.104 4728.687
## 633 634 635 636 637 638 639 640
## 4629.076 4867.033 5431.493 5531.104 5326.351 4817.227 4712.085 4673.343
## 641 642 643 644 645 646 647 648
## 5138.193 5580.909 5580.909 5298.679 4894.704 3976.068 3760.245 4180.828
## 649 650 651 652 653 654 655 656
## 4629.076 4103.351 4119.953 3826.653 4678.881 4944.510 4324.706 4241.697
## 657 658 659 660 661 662 663 664
## 4684.413 4955.573 4429.855 4297.041 4451.988 4828.297 5116.059 4867.033
## 665 666 667 668 669 670 671 672
## 4839.361 4734.218 4385.581 4136.555 3327.597 3588.696 3644.033 3572.094
## 673 674 675 676 677 678 679 680
## 3494.617 3378.405 3334.138 3079.573 3179.183 3553.328 3616.368 3798.987
## 681 682 683 684 685 686 687 688
## 4009.272 4435.386 3494.617 3134.916 3350.739 3505.687 3372.873 3489.085
## 689 690 691 692 693 694 695 696
## 3743.644 3699.377 3561.024 3472.484 3660.635 3062.971 2847.148 3295.396
## 697 698 699 700 701 702 703 704
## 3151.518 3184.722 3079.818 3195.785 3195.785 3522.289 4219.563 4374.511
## 705 706 707 708 709 710 711 712
## 4125.484 2913.555 3345.201 3749.182 3765.784 4108.883 3561.024 3190.253
## 713 714 715 716 717 718 719 720
## 3179.183 3085.111 3367.341 3621.899 3826.653 3942.865 3422.678 3406.076
## 721 722 723 724 725 726 727 728
## 3383.943 2979.962 2847.148 2750.665 3149.108 2830.546 2902.491 2896.953
## 729 730 731
## 2896.953 2913.555 2647.926
new_data <- data.frame(temperature = c(35.403, 32.44, 22.0))
predicted_model <- predict(model, newdata = new_data)
print(predicted_model)
## 1 2 3
## 2612.096 2379.514 1560.022
Logistic regression is also known as Binomial logistics regression. It is based on the sigmoid function where output is probability and input can be from -infinity to +infinity.
Logistic regression in R Programming is a classification algorithm used to find the probability of event success and event failure. Logistic regression is used when the dependent variable is binary(0/1, True/False, Yes/No) in nature. The logit function is used as a link function in a binomial distribution.
A binary outcome variable’s probability can be predicted using the statistical modeling technique known as logistic regression. It is widely employed in many different industries, including marketing, finance, social sciences, and medical research.
The logistic function, commonly referred to as the sigmoid function, is the basic idea underpinning logistic regression. This sigmoid function is used in logistic regression to describe the correlation between the predictor variables and the likelihood of the binary outcome.
library(tidyverse)
library(dplyr)
library(caTools)
## Warning: package 'caTools' was built under R version 4.3.3
data(mtcars)
mtcars
## mpg cyl disp hp drat wt qsec vs am gear carb
## Mazda RX4 21.0 6 160.0 110 3.90 2.620 16.46 0 1 4 4
## Mazda RX4 Wag 21.0 6 160.0 110 3.90 2.875 17.02 0 1 4 4
## Datsun 710 22.8 4 108.0 93 3.85 2.320 18.61 1 1 4 1
## Hornet 4 Drive 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## Hornet Sportabout 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## Valiant 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## Duster 360 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## Merc 240D 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## Merc 230 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## Merc 280 19.2 6 167.6 123 3.92 3.440 18.30 1 0 4 4
## Merc 280C 17.8 6 167.6 123 3.92 3.440 18.90 1 0 4 4
## Merc 450SE 16.4 8 275.8 180 3.07 4.070 17.40 0 0 3 3
## Merc 450SL 17.3 8 275.8 180 3.07 3.730 17.60 0 0 3 3
## Merc 450SLC 15.2 8 275.8 180 3.07 3.780 18.00 0 0 3 3
## Cadillac Fleetwood 10.4 8 472.0 205 2.93 5.250 17.98 0 0 3 4
## Lincoln Continental 10.4 8 460.0 215 3.00 5.424 17.82 0 0 3 4
## Chrysler Imperial 14.7 8 440.0 230 3.23 5.345 17.42 0 0 3 4
## Fiat 128 32.4 4 78.7 66 4.08 2.200 19.47 1 1 4 1
## Honda Civic 30.4 4 75.7 52 4.93 1.615 18.52 1 1 4 2
## Toyota Corolla 33.9 4 71.1 65 4.22 1.835 19.90 1 1 4 1
## Toyota Corona 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
## Dodge Challenger 15.5 8 318.0 150 2.76 3.520 16.87 0 0 3 2
## AMC Javelin 15.2 8 304.0 150 3.15 3.435 17.30 0 0 3 2
## Camaro Z28 13.3 8 350.0 245 3.73 3.840 15.41 0 0 3 4
## Pontiac Firebird 19.2 8 400.0 175 3.08 3.845 17.05 0 0 3 2
## Fiat X1-9 27.3 4 79.0 66 4.08 1.935 18.90 1 1 4 1
## Porsche 914-2 26.0 4 120.3 91 4.43 2.140 16.70 0 1 5 2
## Lotus Europa 30.4 4 95.1 113 3.77 1.513 16.90 1 1 5 2
## Ford Pantera L 15.8 8 351.0 264 4.22 3.170 14.50 0 1 5 4
## Ferrari Dino 19.7 6 145.0 175 3.62 2.770 15.50 0 1 5 6
## Maserati Bora 15.0 8 301.0 335 3.54 3.570 14.60 0 1 5 8
## Volvo 142E 21.4 4 121.0 109 4.11 2.780 18.60 1 1 4 2
split <- sample.split(mtcars, SplitRatio = 0.8)
split
## [1] FALSE TRUE TRUE TRUE FALSE TRUE FALSE TRUE TRUE TRUE TRUE
train_reg <- subset(mtcars, split == "TRUE")
test_reg <- subset(mtcars, split == "FALSE")
logistic_model <- glm(vs ~ wt + disp,
data = train_reg,
family = "binomial")
logistic_model
##
## Call: glm(formula = vs ~ wt + disp, family = "binomial", data = train_reg)
##
## Coefficients:
## (Intercept) wt disp
## 4.19333 0.52950 -0.02703
##
## Degrees of Freedom: 22 Total (i.e. Null); 20 Residual
## Null Deviance: 31.49
## Residual Deviance: 15.01 AIC: 21.01
summary(logistic_model)
##
## Call:
## glm(formula = vs ~ wt + disp, family = "binomial", data = train_reg)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.19333 3.95116 1.061 0.2886
## wt 0.52950 1.90004 0.279 0.7805
## disp -0.02703 0.01548 -1.746 0.0809 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 31.492 on 22 degrees of freedom
## Residual deviance: 15.013 on 20 degrees of freedom
## AIC: 21.013
##
## Number of Fisher Scoring iterations: 6
predict_reg <- predict(logistic_model,
test_reg, type = "response")
predict_reg
## Mazda RX4 Hornet Sportabout Duster 360 Merc 450SE
## 0.778382747 0.023776512 0.025427735 0.248671051
## Lincoln Continental Fiat 128 AMC Javelin Porsche 914-2
## 0.004645554 0.961991355 0.099381608 0.888458175
## Ford Pantera L
## 0.026218711