COURSE 10 - MODUL 1

REGRESSION ALGORITHM FOR TESTING AND PREDICTING DATA

Data

library(readr)

## Warning: package 'readr' was built under R version 4.2.3

df <- read_delim("C:/Users/hp/Downloads/day.csv", 
                  delim = ";", escape_double = FALSE, trim_ws = TRUE)

## Rows: 731 Columns: 17
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ";"
## dbl  (16): instant, season, yr, mnth, holiday, weekday, workingday, weathers...
## date  (1): dteday
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# Convert 'dteday' column to date format
df$dteday <- as.Date(df$dteday, format="%Y-%m-%d")  

# Extract month name as string
df$name_month <- format(df$dteday, "%B")  

# Select only the required columns
df_selected <- df[, c("name_month", "temp", "cnt")]

# Save the resulting dataset to a new CSV file.
write.csv(df_selected, "day_processed.csv", row.names = FALSE)

# Show multiple rows of results
head(df_selected)

print(df)

## # A tibble: 731 × 18
##    instant dteday     season    yr  mnth holiday weekday working…¹ weath…²  temp
##      <dbl> <date>      <dbl> <dbl> <dbl>   <dbl>   <dbl>     <dbl>   <dbl> <dbl>
##  1       1 2011-01-01      1     0     1       0       6         0       2 0.344
##  2       2 2011-01-02      1     0     1       0       0         0       2 0.363
##  3       3 2011-01-03      1     0     1       0       1         1       1 0.196
##  4       4 2011-01-04      1     0     1       0       2         1       1 0.2  
##  5       5 2011-01-05      1     0     1       0       3         1       1 0.227
##  6       6 2011-01-06      1     0     1       0       4         1       1 0.204
##  7       7 2011-01-07      1     0     1       0       5         1       2 0.197
##  8       8 2011-01-08      1     0     1       0       6         0       2 0.165
##  9       9 2011-01-09      1     0     1       0       0         0       1 0.138
## 10      10 2011-01-10      1     0     1       0       1         1       1 0.151
## # … with 721 more rows, 8 more variables: atemp <dbl>, hum <dbl>,
## #   windspeed <dbl>, casual <dbl>, registered <dbl>, cnt <dbl>,
## #   nama_bulan <dbl>, name_month <chr>, and abbreviated variable names
## #   ¹workingday, ²weathersit

Model 1

A. Simple Linear Regression Model

Model1 <- lm(cnt ~ name_month, data = df)
summary(Model1)

## 
## Call:
## lm(formula = cnt ~ name_month, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5177.2 -1095.2  -249.3  1290.0  4669.7 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4484.9      196.7  22.799  < 2e-16 ***
## name_monthAugust      1179.5      275.9   4.275 2.17e-05 ***
## name_monthDecember   -1081.1      275.9  -3.918 9.79e-05 ***
## name_monthFebruary   -1829.6      281.8  -6.492 1.58e-10 ***
## name_monthJanuary    -2308.6      275.9  -8.366 3.09e-16 ***
## name_monthJuly        1078.8      275.9   3.909 0.000101 ***
## name_monthJune        1287.5      278.2   4.628 4.38e-06 ***
## name_monthMarch       -792.6      275.9  -2.873 0.004192 ** 
## name_monthMay          864.9      275.9   3.134 0.001793 ** 
## name_monthNovember    -237.7      278.2  -0.854 0.393113    
## name_monthOctober      714.3      275.9   2.589 0.009829 ** 
## name_monthSeptember   1281.6      278.2   4.607 4.83e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1524 on 719 degrees of freedom
## Multiple R-squared:  0.3906, Adjusted R-squared:  0.3813 
## F-statistic:  41.9 on 11 and 719 DF,  p-value: < 2.2e-16

B. Interpretasi R-Square

Based on the regression output, the R-squared (R²) value of 0.3906 indicates that this model is able to explain about 39.06% of the variation in the dependent variable, while the remaining 60.94% is influenced by other factors not included in the model. Meanwhile, the Adjusted R-squared of 0.3813 indicates that after adjusting the number of variables in the model, the model’s predictive ability decreased slightly, indicating that some variables may not contribute significantly. Overall, this model has moderate predictive ability, but can still be improved by considering additional variables or more appropriate modeling methods to improve prediction accuracy.

C. Moon Reference

Since April does not appear in the summary of the regression results, it is used as the reference month in the model. This means that the coefficients for the other months show different values compared to April as the reference month.

intercept <- coef(Model1)["(Intercept)"]
cat("Cnt prediction for April:", intercept,"\n")

## Cnt prediction for April: 4484.9

Based on the results above, it was found that the predicted total number of rental bicycles including regular bicycles and registered ones for April was 4484,9 units.

D. January and June Month Predictions

predicted <- data.frame(name_month = c("January", "June"))
predicted$predicted <- predict(Model1, newdata = predicted)
print(predicted)

##   name_month predicted
## 1    January  2176.339
## 2       June  5772.367

So the predicted results for January are 2176,339 and June are 5772,367.

Model 2

A. Simple Linear Regression Model

Model2 <- lm(cnt ~ temp + name_month, data = df)
summary(Model2)

## 
## Call:
## lm(formula = cnt ~ temp + name_month, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4896.6 -1080.0  -228.4  1245.2  3372.9 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1554.39     390.76   3.978 7.66e-05 ***
## temp                 6235.14     729.40   8.548  < 2e-16 ***
## name_monthAugust     -308.08     315.42  -0.977   0.3290    
## name_monthDecember   -170.96     283.80  -0.602   0.5471    
## name_monthFebruary   -764.81     296.15  -2.582   0.0100 *  
## name_monthJanuary    -852.31     313.41  -2.719   0.0067 ** 
## name_monthJuly       -701.18     335.50  -2.090   0.0370 *  
## name_monthJune        -47.47     307.78  -0.154   0.8775    
## name_monthMarch      -297.20     269.38  -1.103   0.2703    
## name_monthMay          86.73     278.37   0.312   0.7555    
## name_monthNovember    390.66     275.22   1.419   0.1562    
## name_monthOctober     620.72     263.30   2.357   0.0187 *  
## name_monthSeptember   368.25     285.93   1.288   0.1982    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1453 on 718 degrees of freedom
## Multiple R-squared:  0.4469, Adjusted R-squared:  0.4377 
## F-statistic: 48.35 on 12 and 718 DF,  p-value: < 2.2e-16

B. R-Square Difference

Based on the comparison of R-squared and Adjusted R-squared values, Model 2 has a better ability to explain data variations compared to Model 1. The R-squared value in Model 2 increased from 39.06% to 44.69%, and the Adjusted R-squared value also increased from 38.13% to 43.77%. This increase indicates that the addition of the temperature variable in Model 2 contributes to increasing the accuracy of the model. Therefore, Model 2 is more recommended because it provides better results in explaining the relationship between variables in the data.

C. Comparison of Coefficient Estimates

coef(Model1)["name_monthJanuary"]

## name_monthJanuary 
##         -2308.561

coef(Model2)["name_monthJanuary"]

## name_monthJanuary 
##         -852.3127

The difference in the coefficient estimates of name_of_monthJanuary between Model 1 and Model 2 shows that the addition of the temperature variable affects the regression results. In Model 1, the influence of January appears larger because there is no additional variable explaining the variation in the data. However, when temperature is entered into Model 2, some of the variation previously associated with the month is now explained by temperature, so that the coefficient value of January becomes smaller. This shows that temperature has a contribution in explaining changes in the data, so that Model 2 is more able to capture the factors that influence the response of the variable more accurately.

D. January Month Prediction When Temperature is 0.25

Prediction_januari_temp_025 <- 1554.39 + (-852.31) + (6235.14*0.25)

cat("Prediction cnt for January with temp 0.25 = ", Prediction_januari_temp_025, "\n")

## Prediction cnt for January with temp 0.25 =  2260.865

So the total number of rental bicycles including regular and registered bicycles is 2260,865 units.

LS0tDQp0aXRsZTogIkNPVVJTRSAxMCAtIE1PRFVMIDEiDQpkYXRlOiAiMjAyNS0wMy0yOSINCm91dHB1dDoNCiAgcm1kZm9ybWF0czo6cmVhZHRoZWRvd246DQogICAgc2VsZl9jb250YWluZWQ6IHRydWUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQogICAgdG9jX2RlcHRoOiA0DQogICAgZGZfcHJpbnQ6IHBhZ2VkDQogICAgY29kZV9mb2xkaW5nOiBoaWRlDQotLS0NCg0KDQo8Y2VudGVyPjxoMj4gKipSRUdSRVNTSU9OIEFMR09SSVRITSBGT1IgVEVTVElORyBBTkQgUFJFRElDVElORyBEQVRBKio8L2gyPjwvY2VudGVyPg0KDQpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0NCmtuaXRyOjpvcHRzX2NodW5rJHNldChlY2hvID0gVFJVRSkNCmBgYA0KDQojIERhdGENCmBgYHtyIGNhcnMxfQ0KbGlicmFyeShyZWFkcikNCmRmIDwtIHJlYWRfZGVsaW0oIkM6L1VzZXJzL2hwL0Rvd25sb2Fkcy9kYXkuY3N2IiwgDQogICAgICAgICAgICAgICAgICBkZWxpbSA9ICI7IiwgZXNjYXBlX2RvdWJsZSA9IEZBTFNFLCB0cmltX3dzID0gVFJVRSkNCg0KIyBDb252ZXJ0ICdkdGVkYXknIGNvbHVtbiB0byBkYXRlIGZvcm1hdA0KZGYkZHRlZGF5IDwtIGFzLkRhdGUoZGYkZHRlZGF5LCBmb3JtYXQ9IiVZLSVtLSVkIikgIA0KDQojIEV4dHJhY3QgbW9udGggbmFtZSBhcyBzdHJpbmcNCmRmJG5hbWVfbW9udGggPC0gZm9ybWF0KGRmJGR0ZWRheSwgIiVCIikgIA0KDQojIFNlbGVjdCBvbmx5IHRoZSByZXF1aXJlZCBjb2x1bW5zDQpkZl9zZWxlY3RlZCA8LSBkZlssIGMoIm5hbWVfbW9udGgiLCAidGVtcCIsICJjbnQiKV0NCg0KIyBTYXZlIHRoZSByZXN1bHRpbmcgZGF0YXNldCB0byBhIG5ldyBDU1YgZmlsZS4NCndyaXRlLmNzdihkZl9zZWxlY3RlZCwgImRheV9wcm9jZXNzZWQuY3N2Iiwgcm93Lm5hbWVzID0gRkFMU0UpDQoNCiMgU2hvdyBtdWx0aXBsZSByb3dzIG9mIHJlc3VsdHMNCmhlYWQoZGZfc2VsZWN0ZWQpDQpwcmludChkZikNCmBgYA0KDQojIE1vZGVsIDENCiMjIEEuIFNpbXBsZSBMaW5lYXIgUmVncmVzc2lvbiBNb2RlbA0KYGBge3IgY2FyczJ9DQpNb2RlbDEgPC0gbG0oY250IH4gbmFtZV9tb250aCwgZGF0YSA9IGRmKQ0Kc3VtbWFyeShNb2RlbDEpDQpgYGANCg0KIyMgQi4gSW50ZXJwcmV0YXNpIFItU3F1YXJlDQo8cCBzdHlsZT0idGV4dC1hbGlnbjoganVzdGlmeTsiPg0KQmFzZWQgb24gdGhlIHJlZ3Jlc3Npb24gb3V0cHV0LCB0aGUgUi1zcXVhcmVkIChSwrIpIHZhbHVlIG9mIDAuMzkwNiBpbmRpY2F0ZXMgdGhhdCB0aGlzIG1vZGVsIGlzIGFibGUgdG8gZXhwbGFpbiBhYm91dCAzOS4wNiUgb2YgdGhlIHZhcmlhdGlvbiBpbiB0aGUgZGVwZW5kZW50IHZhcmlhYmxlLCB3aGlsZSB0aGUgcmVtYWluaW5nIDYwLjk0JSBpcyBpbmZsdWVuY2VkIGJ5IG90aGVyIGZhY3RvcnMgbm90IGluY2x1ZGVkIGluIHRoZSBtb2RlbC4gTWVhbndoaWxlLCB0aGUgQWRqdXN0ZWQgUi1zcXVhcmVkIG9mIDAuMzgxMyBpbmRpY2F0ZXMgdGhhdCBhZnRlciBhZGp1c3RpbmcgdGhlIG51bWJlciBvZiB2YXJpYWJsZXMgaW4gdGhlIG1vZGVsLCB0aGUgbW9kZWwncyBwcmVkaWN0aXZlIGFiaWxpdHkgZGVjcmVhc2VkIHNsaWdodGx5LCBpbmRpY2F0aW5nIHRoYXQgc29tZSB2YXJpYWJsZXMgbWF5IG5vdCBjb250cmlidXRlIHNpZ25pZmljYW50bHkuIE92ZXJhbGwsIHRoaXMgbW9kZWwgaGFzIG1vZGVyYXRlIHByZWRpY3RpdmUgYWJpbGl0eSwgYnV0IGNhbiBzdGlsbCBiZSBpbXByb3ZlZCBieSBjb25zaWRlcmluZyBhZGRpdGlvbmFsIHZhcmlhYmxlcyBvciBtb3JlIGFwcHJvcHJpYXRlIG1vZGVsaW5nIG1ldGhvZHMgdG8gaW1wcm92ZSBwcmVkaWN0aW9uIGFjY3VyYWN5Lg0KPHAgc3R5bGU9InRleHQtYWxpZ246IGp1c3RpZnk7Ij4NCg0KIyMgQy4gTW9vbiBSZWZlcmVuY2UNCjxwIHN0eWxlPSJ0ZXh0LWFsaWduOiBqdXN0aWZ5OyI+DQpTaW5jZSBBcHJpbCBkb2VzIG5vdCBhcHBlYXIgaW4gdGhlIHN1bW1hcnkgb2YgdGhlIHJlZ3Jlc3Npb24gcmVzdWx0cywgaXQgaXMgdXNlZCBhcyB0aGUgcmVmZXJlbmNlIG1vbnRoIGluIHRoZSBtb2RlbC4gVGhpcyBtZWFucyB0aGF0IHRoZSBjb2VmZmljaWVudHMgZm9yIHRoZSBvdGhlciBtb250aHMgc2hvdyBkaWZmZXJlbnQgdmFsdWVzIGNvbXBhcmVkIHRvIEFwcmlsIGFzIHRoZSByZWZlcmVuY2UgbW9udGguDQo8cCBzdHlsZT0idGV4dC1hbGlnbjoganVzdGlmeTsiPg0KYGBge3IgY2FyczR9DQppbnRlcmNlcHQgPC0gY29lZihNb2RlbDEpWyIoSW50ZXJjZXB0KSJdDQpjYXQoIkNudCBwcmVkaWN0aW9uIGZvciBBcHJpbDoiLCBpbnRlcmNlcHQsIlxuIikNCmBgYA0KPHAgc3R5bGU9InRleHQtYWxpZ246IGp1c3RpZnk7Ij4NCkJhc2VkIG9uIHRoZSByZXN1bHRzIGFib3ZlLCBpdCB3YXMgZm91bmQgdGhhdCB0aGUgcHJlZGljdGVkIHRvdGFsIG51bWJlciBvZiByZW50YWwgYmljeWNsZXMgaW5jbHVkaW5nIHJlZ3VsYXIgYmljeWNsZXMgYW5kIHJlZ2lzdGVyZWQgb25lcyBmb3IgQXByaWwgd2FzIDQ0ODQsOSB1bml0cy4NCjxwIHN0eWxlPSJ0ZXh0LWFsaWduOiBqdXN0aWZ5OyI+DQoNCiMjIEQuIEphbnVhcnkgYW5kIEp1bmUgTW9udGggUHJlZGljdGlvbnMNCmBgYHtyIGNhcnM1fQ0KcHJlZGljdGVkIDwtIGRhdGEuZnJhbWUobmFtZV9tb250aCA9IGMoIkphbnVhcnkiLCAiSnVuZSIpKQ0KcHJlZGljdGVkJHByZWRpY3RlZCA8LSBwcmVkaWN0KE1vZGVsMSwgbmV3ZGF0YSA9IHByZWRpY3RlZCkNCnByaW50KHByZWRpY3RlZCkNCmBgYA0KPHAgc3R5bGU9InRleHQtYWxpZ246IGp1c3RpZnk7Ij4NClNvIHRoZSBwcmVkaWN0ZWQgcmVzdWx0cyBmb3IgSmFudWFyeSBhcmUgMjE3NiwzMzkgYW5kIEp1bmUgYXJlIDU3NzIsMzY3Lg0KPHAgc3R5bGU9InRleHQtYWxpZ246IGp1c3RpZnk7Ij4NCg0KIyBNb2RlbCAyDQojIyBBLiBTaW1wbGUgTGluZWFyIFJlZ3Jlc3Npb24gTW9kZWwNCmBgYHtyIGNhcnM2fQ0KTW9kZWwyIDwtIGxtKGNudCB+IHRlbXAgKyBuYW1lX21vbnRoLCBkYXRhID0gZGYpDQpzdW1tYXJ5KE1vZGVsMikNCmBgYA0KDQojIyBCLiBSLVNxdWFyZSBEaWZmZXJlbmNlDQo8cCBzdHlsZT0idGV4dC1hbGlnbjoganVzdGlmeTsiPg0KQmFzZWQgb24gdGhlIGNvbXBhcmlzb24gb2YgUi1zcXVhcmVkIGFuZCBBZGp1c3RlZCBSLXNxdWFyZWQgdmFsdWVzLCBNb2RlbCAyIGhhcyBhIGJldHRlciBhYmlsaXR5IHRvIGV4cGxhaW4gZGF0YSB2YXJpYXRpb25zIGNvbXBhcmVkIHRvIE1vZGVsIDEuIFRoZSBSLXNxdWFyZWQgdmFsdWUgaW4gTW9kZWwgMiBpbmNyZWFzZWQgZnJvbSAzOS4wNiUgdG8gNDQuNjklLCBhbmQgdGhlIEFkanVzdGVkIFItc3F1YXJlZCB2YWx1ZSBhbHNvIGluY3JlYXNlZCBmcm9tIDM4LjEzJSB0byA0My43NyUuIFRoaXMgaW5jcmVhc2UgaW5kaWNhdGVzIHRoYXQgdGhlIGFkZGl0aW9uIG9mIHRoZSB0ZW1wZXJhdHVyZSB2YXJpYWJsZSBpbiBNb2RlbCAyIGNvbnRyaWJ1dGVzIHRvIGluY3JlYXNpbmcgdGhlIGFjY3VyYWN5IG9mIHRoZSBtb2RlbC4gVGhlcmVmb3JlLCBNb2RlbCAyIGlzIG1vcmUgcmVjb21tZW5kZWQgYmVjYXVzZSBpdCBwcm92aWRlcyBiZXR0ZXIgcmVzdWx0cyBpbiBleHBsYWluaW5nIHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiB2YXJpYWJsZXMgaW4gdGhlIGRhdGEuDQo8cCBzdHlsZT0idGV4dC1hbGlnbjoganVzdGlmeTsiPg0KDQojIyBDLiBDb21wYXJpc29uIG9mIENvZWZmaWNpZW50IEVzdGltYXRlcw0KYGBge3IgY2Fyczh9DQpjb2VmKE1vZGVsMSlbIm5hbWVfbW9udGhKYW51YXJ5Il0NCmNvZWYoTW9kZWwyKVsibmFtZV9tb250aEphbnVhcnkiXQ0KYGBgDQo8cCBzdHlsZT0idGV4dC1hbGlnbjoganVzdGlmeTsiPg0KVGhlIGRpZmZlcmVuY2UgaW4gdGhlIGNvZWZmaWNpZW50IGVzdGltYXRlcyBvZiBuYW1lX29mX21vbnRoSmFudWFyeSBiZXR3ZWVuIE1vZGVsIDEgYW5kIE1vZGVsIDIgc2hvd3MgdGhhdCB0aGUgYWRkaXRpb24gb2YgdGhlIHRlbXBlcmF0dXJlIHZhcmlhYmxlIGFmZmVjdHMgdGhlIHJlZ3Jlc3Npb24gcmVzdWx0cy4gSW4gTW9kZWwgMSwgdGhlIGluZmx1ZW5jZSBvZiBKYW51YXJ5IGFwcGVhcnMgbGFyZ2VyIGJlY2F1c2UgdGhlcmUgaXMgbm8gYWRkaXRpb25hbCB2YXJpYWJsZSBleHBsYWluaW5nIHRoZSB2YXJpYXRpb24gaW4gdGhlIGRhdGEuIEhvd2V2ZXIsIHdoZW4gdGVtcGVyYXR1cmUgaXMgZW50ZXJlZCBpbnRvIE1vZGVsIDIsIHNvbWUgb2YgdGhlIHZhcmlhdGlvbiBwcmV2aW91c2x5IGFzc29jaWF0ZWQgd2l0aCB0aGUgbW9udGggaXMgbm93IGV4cGxhaW5lZCBieSB0ZW1wZXJhdHVyZSwgc28gdGhhdCB0aGUgY29lZmZpY2llbnQgdmFsdWUgb2YgSmFudWFyeSBiZWNvbWVzIHNtYWxsZXIuIFRoaXMgc2hvd3MgdGhhdCB0ZW1wZXJhdHVyZSBoYXMgYSBjb250cmlidXRpb24gaW4gZXhwbGFpbmluZyBjaGFuZ2VzIGluIHRoZSBkYXRhLCBzbyB0aGF0IE1vZGVsIDIgaXMgbW9yZSBhYmxlIHRvIGNhcHR1cmUgdGhlIGZhY3RvcnMgdGhhdCBpbmZsdWVuY2UgdGhlIHJlc3BvbnNlIG9mIHRoZSB2YXJpYWJsZSBtb3JlIGFjY3VyYXRlbHkuDQo8cCBzdHlsZT0idGV4dC1hbGlnbjoganVzdGlmeTsiPg0KDQojIyBELiBKYW51YXJ5IE1vbnRoIFByZWRpY3Rpb24gV2hlbiBUZW1wZXJhdHVyZSBpcyAwLjI1DQpgYGB7ciBjYXJzOX0NClByZWRpY3Rpb25famFudWFyaV90ZW1wXzAyNSA8LSAxNTU0LjM5ICsgKC04NTIuMzEpICsgKDYyMzUuMTQqMC4yNSkNCg0KY2F0KCJQcmVkaWN0aW9uIGNudCBmb3IgSmFudWFyeSB3aXRoIHRlbXAgMC4yNSA9ICIsIFByZWRpY3Rpb25famFudWFyaV90ZW1wXzAyNSwgIlxuIikNCmBgYA0KPHAgc3R5bGU9InRleHQtYWxpZ246IGp1c3RpZnk7Ij4NClNvIHRoZSB0b3RhbCBudW1iZXIgb2YgcmVudGFsIGJpY3ljbGVzIGluY2x1ZGluZyByZWd1bGFyIGFuZCByZWdpc3RlcmVkIGJpY3ljbGVzIGlzIDIyNjAsODY1IHVuaXRzLg0KPHAgc3R5bGU9InRleHQtYWxpZ246IGp1c3RpZnk7Ij4NCg==