powerball_data = read.csv(file = "Powerball csv.csv")
head(powerball_data)
first_number = powerball_data$First.number
second_number = powerball_data$Second.Number
third_number = powerball_data$Third.Number
fourth_number = powerball_data$Fourth.Number
fifth_number = powerball_data$Fifth.Number
summary(powerball_data)
Draw.Date Order First.number Second.Number
Sat, Apr 01, 2017: 1 Min. : 1.00 Min. : 1.00 Min. : 2.00
Sat, Apr 07, 2018: 1 1st Qu.: 54.75 1st Qu.: 5.00 1st Qu.:14.75
Sat, Apr 08, 2017: 1 Median :108.50 Median :10.00 Median :22.00
Sat, Apr 14, 2018: 1 Mean :108.50 Mean :12.41 Mean :24.07
Sat, Apr 15, 2017: 1 3rd Qu.:162.25 3rd Qu.:18.25 3rd Qu.:32.00
Sat, Apr 21, 2018: 1 Max. :216.00 Max. :50.00 Max. :61.00
(Other) :210
Third.Number Fourth.Number Fifth.Number PB
Min. : 7.00 Min. :15.00 Min. :23.00 Min. : 1.00
1st Qu.:27.00 1st Qu.:40.00 1st Qu.:53.00 1st Qu.: 8.00
Median :36.00 Median :48.00 Median :61.00 Median :14.00
Mean :36.12 Mean :47.04 Mean :58.24 Mean :13.98
3rd Qu.:45.00 3rd Qu.:57.00 3rd Qu.:66.00 3rd Qu.:21.00
Max. :64.00 Max. :68.00 Max. :69.00 Max. :26.00
Power.Play Jackpot
Min. : 2.000 $40.00 Million : 15
1st Qu.: 2.000 $50.00 Million : 8
Median : 2.000 $60.00 Million : 7
Mean : 2.644 $80.00 Million : 6
3rd Qu.: 3.000 $70.00 Million : 5
Max. :10.000 $100.00 Million: 4
(Other) :171
FIRST NUMBER POSSIBILITIES = 1-50 SECOND NUMBER POSSIBILITIES = 2-61 THIRD NUMBER POSSIBILITIES = 7-64 FOURTH NUMBER POSSIBILITIES = 15-68 FIFTH NUMBER POSSIBILITIES = 23-69
scatter.smooth(first_number)
scatter.smooth(second_number)
scatter.smooth(third_number)
scatter.smooth(fourth_number)
scatter.smooth(fifth_number)
cor(powerball_data[,c(2,3,4,5,6,7,8,9)])
Order First.number Second.Number Third.Number
Order 1.000000000 0.00749623 -0.048474909 -0.04517491
First.number 0.007496230 1.00000000 0.617934431 0.43950464
Second.Number -0.048474909 0.61793443 1.000000000 0.75508139
Third.Number -0.045174912 0.43950464 0.755081390 1.00000000
Fourth.Number -0.075205377 0.37593956 0.585785958 0.78539574
Fifth.Number -0.002441859 0.26564158 0.388036157 0.54681435
PB -0.077599905 -0.10191896 -0.099358907 -0.09597954
Power.Play -0.085907116 0.05171796 0.006389309 -0.03959939
Fourth.Number Fifth.Number PB Power.Play
Order -0.07520538 -0.0024418593 -0.07759990 -0.0859071163
First.number 0.37593956 0.2656415832 -0.10191896 0.0517179581
Second.Number 0.58578596 0.3880361574 -0.09935891 0.0063893095
Third.Number 0.78539574 0.5468143474 -0.09597954 -0.0395993926
Fourth.Number 1.00000000 0.6667634848 -0.09822327 0.0454489285
Fifth.Number 0.66676348 1.0000000000 -0.02900352 -0.0007800508
PB -0.09822327 -0.0290035166 1.00000000 -0.0433355184
Power.Play 0.04544893 -0.0007800508 -0.04333552 1.0000000000
HIGHEST TO LOWEST CORRELATIONS THIRD NUMBER - FOURTH NUMBER 0.785 SECOND NUMBER - THIRD NUMBER 0.755 FOURTH NUMBER - FIFTH NUMBER 0.667 FIRST NUMBER - SECOND NUMBER 0.618 SECOND NUMBER - FOURTH NUMBER 0.586
linear_regression_34 = lm(third_number ~ fourth_number)
summary(linear_regression_34)
Call:
lm(formula = third_number ~ fourth_number)
Residuals:
Min 1Q Median 3Q Max
-27.810 -5.201 2.406 6.341 12.330
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.2077 2.2477 -1.872 0.0626 .
fourth_number 0.8575 0.0462 18.561 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8.447 on 214 degrees of freedom
Multiple R-squared: 0.6168, Adjusted R-squared: 0.6151
F-statistic: 344.5 on 1 and 214 DF, p-value: < 2.2e-16
THIRD NUMBER PREDICTED = -4.2077 + 0.8575(FOURTH NUMBER)
linear_regression_23 = lm(second_number ~ third_number)
summary(linear_regression_23)
Call:
lm(formula = second_number ~ third_number)
Residuals:
Min 1Q Median 3Q Max
-27.8911 -5.4014 0.8112 5.8854 17.6760
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.81231 1.64123 -1.104 0.271
third_number 0.71645 0.04253 16.848 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 8.489 on 214 degrees of freedom
Multiple R-squared: 0.5701, Adjusted R-squared: 0.5681
F-statistic: 283.8 on 1 and 214 DF, p-value: < 2.2e-16
SECOND NUMBER PREDICTED = -1.81231 + 0.71645(THHIRD NUMBER)
linear_regression_45 = lm(fourth_number ~ fifth_number)
summary(linear_regression_45)
Call:
lm(formula = fourth_number ~ fifth_number)
Residuals:
Min 1Q Median 3Q Max
-37.031 -6.069 2.811 7.479 11.969
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.62521 3.77179 -0.431 0.667
fifth_number 0.83560 0.06385 13.088 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.316 on 214 degrees of freedom
Multiple R-squared: 0.4446, Adjusted R-squared: 0.442
F-statistic: 171.3 on 1 and 214 DF, p-value: < 2.2e-16
FOURTH NUMBER PREDICTED = -1.62521 + 0.83560(FIFTH NUMBER)
linear_regression_12 = lm(first_number ~ second_number)
summary(linear_regression_12)
Call:
lm(formula = first_number ~ second_number)
Residuals:
Min 1Q Median 3Q Max
-24.8391 -4.7736 -0.3344 5.0214 24.9717
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.13613 1.11249 1.021 0.308
second_number 0.46847 0.04075 11.497 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 7.718 on 214 degrees of freedom
Multiple R-squared: 0.3818, Adjusted R-squared: 0.379
F-statistic: 132.2 on 1 and 214 DF, p-value: < 2.2e-16
**FIRST NUMBER PREDICTED = 1.12613 + 0.46847(SECOND NUMBER)
linear_regression_24 = lm(second_number ~ fourth_number)
summary(linear_regression_24)
Call:
lm(formula = second_number ~ fourth_number)
Residuals:
Min 1Q Median 3Q Max
-27.115 -7.507 -1.132 7.363 26.637
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -4.47356 2.79237 -1.602 0.111
fourth_number 0.60682 0.05739 10.573 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.49 on 214 degrees of freedom
Multiple R-squared: 0.3431, Adjusted R-squared: 0.3401
F-statistic: 111.8 on 1 and 214 DF, p-value: < 2.2e-16