Part 1: Linear Regression

Understanding regression

Predicting Medical Expenses

Step 4: Evaluating model performance

Step 5: Improving model performance

Part 2: Regression Trees and Model Trees

Understanding regression trees and model trees

Example: Calculating SDR

# set up the data
tee <- c(1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 7, 7, 7)
at1 <- c(1, 1, 1, 2, 2, 3, 4, 5, 5)
at2 <- c(6, 6, 7, 7, 7, 7)
bt1 <- c(1, 1, 1, 2, 2, 3, 4)
bt2 <- c(5, 5, 6, 6, 7, 7, 7, 7)
# compute the SDR
sdr_a <- sd(tee) - (length(at1) / length(tee) * sd(at1) + length(at2) / length(tee) * sd(at2))
sdr_b <- sd(tee) - (length(bt1) / length(tee) * sd(bt1) + length(bt2) / length(tee) * sd(bt2))
# compare the SDR for each split
sdr_a
[1] 1.202815
sdr_b
[1] 1.392751
LS0tDQp0aXRsZTogIkNoYXB0ZXIgNjogUmVncmVzc2lvbiBNZXRob2RzIg0Kb3V0cHV0OiBodG1sX25vdGVib29rDQotLS0NCg0KDQojIyMjIFBhcnQgMTogTGluZWFyIFJlZ3Jlc3Npb24NCg0KDQojIyBVbmRlcnN0YW5kaW5nIHJlZ3Jlc3Npb24NCg0KDQoNCmBgYHtyfQ0KIyMgRXhhbXBsZTogU3BhY2UgU2h1dHRsZSBMYXVuY2ggRGF0YQ0KbGF1bmNoIDwtIHJlYWQuY3N2KCJjaGFsbGVuZ2VyLmNzdiIpDQpgYGANCg0KDQpgYGB7cn0NCiMgZXN0aW1hdGUgYmV0YSBtYW51YWxseQ0KYiA8LSBjb3YobGF1bmNoJHRlbXBlcmF0dXJlLCBsYXVuY2gkZGlzdHJlc3NfY3QpIC8gdmFyKGxhdW5jaCR0ZW1wZXJhdHVyZSkNCmINCmBgYA0KDQoNCmBgYHtyfQ0KIyBlc3RpbWF0ZSBhbHBoYSBtYW51YWxseQ0KYSA8LSBtZWFuKGxhdW5jaCRkaXN0cmVzc19jdCkgLSBiICogbWVhbihsYXVuY2gkdGVtcGVyYXR1cmUpDQphDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyBjYWxjdWxhdGUgdGhlIGNvcnJlbGF0aW9uIG9mIGxhdW5jaCBkYXRhDQpyIDwtIGNvdihsYXVuY2gkdGVtcGVyYXR1cmUsIGxhdW5jaCRkaXN0cmVzc19jdCkgLw0KICAgICAgIChzZChsYXVuY2gkdGVtcGVyYXR1cmUpICogc2QobGF1bmNoJGRpc3RyZXNzX2N0KSkNCnINCmBgYA0KDQoNCmBgYHtyfQ0KY29yKGxhdW5jaCR0ZW1wZXJhdHVyZSwgbGF1bmNoJGRpc3RyZXNzX2N0KQ0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0KIyBjb21wdXRpbmcgdGhlIHNsb3BlIHVzaW5nIGNvcnJlbGF0aW9uDQpyICogKHNkKGxhdW5jaCRkaXN0cmVzc19jdCkgLyBzZChsYXVuY2gkdGVtcGVyYXR1cmUpKQ0KYGBgDQoNCg0KYGBge3J9DQojIGNvbmZpcm1pbmcgdGhlIHJlZ3Jlc3Npb24gbGluZSB1c2luZyB0aGUgbG0gZnVuY3Rpb24gKG5vdCBpbiB0ZXh0KQ0KbW9kZWwgPC0gbG0oZGlzdHJlc3NfY3QgfiB0ZW1wZXJhdHVyZSwgZGF0YSA9IGxhdW5jaCkNCm1vZGVsDQpgYGANCg0KDQoNCmBgYHtyfQ0Kc3VtbWFyeShtb2RlbCkNCmBgYA0KDQoNCmBgYHtyfQ0KIyBjcmVhdGluZyBhIHNpbXBsZSBtdWx0aXBsZSByZWdyZXNzaW9uIGZ1bmN0aW9uDQpyZWcgPC0gZnVuY3Rpb24oeSwgeCkgew0KICB4IDwtIGFzLm1hdHJpeCh4KQ0KICB4IDwtIGNiaW5kKEludGVyY2VwdCA9IDEsIHgpDQogIGIgPC0gc29sdmUodCh4KSAlKiUgeCkgJSolIHQoeCkgJSolIHkNCiAgY29sbmFtZXMoYikgPC0gImVzdGltYXRlIg0KICBwcmludChiKQ0KfQ0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0KIyBleGFtaW5lIHRoZSBsYXVuY2ggZGF0YQ0Kc3RyKGxhdW5jaCkNCmBgYA0KDQoNCg0KYGBge3J9DQojIHRlc3QgcmVncmVzc2lvbiBtb2RlbCB3aXRoIHNpbXBsZSBsaW5lYXIgcmVncmVzc2lvbg0KcmVnKHkgPSBsYXVuY2gkZGlzdHJlc3NfY3QsIHggPSBsYXVuY2hbMl0pDQpgYGANCg0KDQpgYGB7cn0NCiMgdXNlIHJlZ3Jlc3Npb24gbW9kZWwgd2l0aCBtdWx0aXBsZSByZWdyZXNzaW9uDQpyZWcoeSA9IGxhdW5jaCRkaXN0cmVzc19jdCwgeCA9IGxhdW5jaFsyOjRdKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCiMgY29uZmlybWluZyB0aGUgbXVsdGlwbGUgcmVncmVzc2lvbiByZXN1bHQgdXNpbmcgdGhlIGxtIGZ1bmN0aW9uIChub3QgaW4gdGV4dCkNCm1vZGVsIDwtIGxtKGRpc3RyZXNzX2N0IH4gdGVtcGVyYXR1cmUgKyBmaWVsZF9jaGVja19wcmVzc3VyZSArIGZsaWdodF9udW0sIGRhdGEgPSBsYXVuY2gpDQptb2RlbA0KYGBgDQoNCg0KIyMgUHJlZGljdGluZyBNZWRpY2FsIEV4cGVuc2VzDQoNCmBgYHtyfQ0KIyMgU3RlcCAyOiBFeHBsb3JpbmcgYW5kIHByZXBhcmluZyB0aGUgZGF0YSAtLS0tDQppbnN1cmFuY2UgPC0gcmVhZC5jc3YoImluc3VyYW5jZS5jc3YiLCBzdHJpbmdzQXNGYWN0b3JzID0gVFJVRSkNCnN0cihpbnN1cmFuY2UpDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyBzdW1tYXJpemUgdGhlIGNoYXJnZXMgdmFyaWFibGUNCnN1bW1hcnkoaW5zdXJhbmNlJGV4cGVuc2VzKQ0KYGBgDQoNCg0KYGBge3J9DQojIGhpc3RvZ3JhbSBvZiBpbnN1cmFuY2UgY2hhcmdlcw0KaGlzdChpbnN1cmFuY2UkZXhwZW5zZXMpDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyB0YWJsZSBvZiByZWdpb24NCnRhYmxlKGluc3VyYW5jZSRyZWdpb24pDQpgYGANCg0KDQoNCmBgYHtyfQ0KIyBleHBsb3JpbmcgcmVsYXRpb25zaGlwcyBhbW9uZyBmZWF0dXJlczogY29ycmVsYXRpb24gbWF0cml4DQpjb3IoaW5zdXJhbmNlW2MoImFnZSIsICJibWkiLCAiY2hpbGRyZW4iLCAiZXhwZW5zZXMiKV0pDQpgYGANCg0KDQpgYGB7cn0NCiMgdmlzdWFsaW5nIHJlbGF0aW9uc2hpcHMgYW1vbmcgZmVhdHVyZXM6IHNjYXR0ZXJwbG90IG1hdHJpeA0KcGFpcnMoaW5zdXJhbmNlW2MoImFnZSIsICJibWkiLCAiY2hpbGRyZW4iLCAiZXhwZW5zZXMiKV0pDQpgYGANCg0KDQoNCg0KYGBge3J9DQojIyBTdGVwIDM6IFRyYWluaW5nIGEgbW9kZWwgb24gdGhlIGRhdGEgLS0tLQ0KaW5zX21vZGVsIDwtIGxtKGV4cGVuc2VzIH4gYWdlICsgY2hpbGRyZW4gKyBibWkgKyBzZXggKyBzbW9rZXIgKyByZWdpb24sDQogICAgICAgICAgICAgICAgZGF0YSA9IGluc3VyYW5jZSkNCmluc19tb2RlbCA8LSBsbShleHBlbnNlcyB+IC4sIGRhdGEgPSBpbnN1cmFuY2UpICMgdGhpcyBpcyBlcXVpdmFsZW50IHRvIGFib3ZlDQoNCiMgc2VlIHRoZSBlc3RpbWF0ZWQgYmV0YSBjb2VmZmljaWVudHMNCmluc19tb2RlbA0KYGBgDQoNCg0KIyMgU3RlcCA0OiBFdmFsdWF0aW5nIG1vZGVsIHBlcmZvcm1hbmNlDQoNCmBgYHtyfQ0KIyBzZWUgbW9yZSBkZXRhaWwgYWJvdXQgdGhlIGVzdGltYXRlZCBiZXRhIGNvZWZmaWNpZW50cw0Kc3VtbWFyeShpbnNfbW9kZWwpDQpgYGANCg0KDQojIyBTdGVwIDU6IEltcHJvdmluZyBtb2RlbCBwZXJmb3JtYW5jZQ0KDQoNCg0KYGBge3J9DQojIGFkZCBhIGhpZ2hlci1vcmRlciAiYWdlIiB0ZXJtDQppbnN1cmFuY2UkYWdlMiA8LSBpbnN1cmFuY2UkYWdlXjINCmBgYA0KDQoNCg0KYGBge3J9DQojIGFkZCBhbiBpbmRpY2F0b3IgZm9yIEJNSSA+PSAzMA0KaW5zdXJhbmNlJGJtaTMwIDwtIGlmZWxzZShpbnN1cmFuY2UkYm1pID49IDMwLCAxLCAwKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCiMgY3JlYXRlIGZpbmFsIG1vZGVsDQppbnNfbW9kZWwyIDwtIGxtKGV4cGVuc2VzIH4gYWdlICsgYWdlMiArIGNoaWxkcmVuICsgYm1pICsgc2V4ICsNCiAgICAgICAgICAgICAgICAgICBibWkzMCpzbW9rZXIgKyByZWdpb24sIGRhdGEgPSBpbnN1cmFuY2UpDQpgYGANCg0KDQpgYGB7cn0NCnN1bW1hcnkoaW5zX21vZGVsMikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBtYWtpbmcgcHJlZGljdGlvbnMgd2l0aCB0aGUgcmVncmVzc2lvbiBtb2RlbA0KaW5zdXJhbmNlJHByZWQgPC0gcHJlZGljdChpbnNfbW9kZWwyLCBpbnN1cmFuY2UpDQpjb3IoaW5zdXJhbmNlJHByZWQsIGluc3VyYW5jZSRleHBlbnNlcykNCmBgYA0KDQoNCg0KYGBge3J9DQpwbG90KGluc3VyYW5jZSRwcmVkLCBpbnN1cmFuY2UkZXhwZW5zZXMpDQphYmxpbmUoYSA9IDAsIGIgPSAxLCBjb2wgPSAicmVkIiwgbHdkID0gMywgbHR5ID0gMikNCmBgYA0KDQoNCg0KDQpgYGB7cn0gDQpwcmVkaWN0KGluc19tb2RlbDIsDQogICAgICAgIGRhdGEuZnJhbWUoYWdlID0gMjIsIGFnZTIgPSAyMl4yLCBjaGlsZHJlbiA9IDMsDQogICAgICAgICAgICAgICAgICAgYm1pID0gMjQsIHNleCA9ICJmZW1hbGUiLCBibWkzMCA9IDAsDQogICAgICAgICAgICAgICAgICAgc21va2VyID0gIm5vIiwgcmVnaW9uID0gIm5vcnRod2VzdCIpKQ0KDQojQ2FzZSAxOiAgQWdlPTIyLCBDaGlsZHJlbj0zLGJtaT0yNCxzZXg9ZmVtYWxlLGJtaTMwPTAsc21va2VyPW5vLCByZWdpb249Tm9ydGh3ZXN0Lg0KI2NoYW5nZSB0aGUgZGF0YQ0KI1RoZSBpbnN1cmFuY2UgcXVvdGUgaXMgaW4gQToNCmBgYA0KYGBge3J9DQpwcmVkaWN0KGluc19tb2RlbDIsDQogICAgICAgIGRhdGEuZnJhbWUoYWdlID0gMzAsIGFnZTIgPSAzMF4yLCBjaGlsZHJlbiA9IDIsDQogICAgICAgICAgICAgICAgICAgYm1pID0gMzAsIHNleCA9ICJmZW1hbGUiLCBibWkzMCA9IDEsDQogICAgICAgICAgICAgICAgICAgc21va2VyID0gIm5vIiwgcmVnaW9uID0gIm5vcnRoZWFzdCIpKQ0KYGBgDQoNCg0KYGBge3J9DQpwcmVkaWN0KGluc19tb2RlbDIsDQogICAgICAgIGRhdGEuZnJhbWUoYWdlID0gMjIsIGFnZTIgPSAyMl4yLCBjaGlsZHJlbiA9IDEsDQogICAgICAgICAgICAgICAgICAgYm1pID0gMjcsIHNleCA9ICJtYWxlIiwgYm1pMzAgPSAwLA0KICAgICAgICAgICAgICAgICAgIHNtb2tlciA9ICJ5ZXMiLCByZWdpb24gPSAic291dGh3ZXN0IikpDQojQ2hhbmdlIHRoZSBkYXRhDQojQ2FzZSAxOiAgQWdlPTIyLCBDaGlsZHJlbj0xLGJtaT0yNyxzZXg9bWFsZSxibWkzMD0wLHNtb2tlcj15ZXMsIHJlZ2lvbj1Tb3V0aGVhc3QuDQojVGhlIGluc3VyYW5jZSBxdW90ZSBpcyBpbiBCOg0KYGBgDQoNCg0KYGBge3J9DQpwcmVkaWN0KGluc19tb2RlbDIsDQogICAgICAgIGRhdGEuZnJhbWUoYWdlID0gMzAsIGFnZTIgPSAzMF4yLCBjaGlsZHJlbiA9IDAsDQogICAgICAgICAgICAgICAgICAgYm1pID0gMzAsIHNleCA9ICJmZW1hbGUiLCBibWkzMCA9IDEsDQogICAgICAgICAgICAgICAgICAgc21va2VyID0gIm5vIiwgcmVnaW9uID0gIm5vcnRoZWFzdCIpKQ0KYGBgDQoNCg0KDQojIyMjIFBhcnQgMjogUmVncmVzc2lvbiBUcmVlcyBhbmQgTW9kZWwgVHJlZXMNCg0KIyMgVW5kZXJzdGFuZGluZyByZWdyZXNzaW9uIHRyZWVzIGFuZCBtb2RlbCB0cmVlcw0KDQojIyBFeGFtcGxlOiBDYWxjdWxhdGluZyBTRFINCg0KYGBge3J9DQojIHNldCB1cCB0aGUgZGF0YQ0KdGVlIDwtIGMoMSwgMSwgMSwgMiwgMiwgMywgNCwgNSwgNSwgNiwgNiwgNywgNywgNywgNykNCmF0MSA8LSBjKDEsIDEsIDEsIDIsIDIsIDMsIDQsIDUsIDUpDQphdDIgPC0gYyg2LCA2LCA3LCA3LCA3LCA3KQ0KYnQxIDwtIGMoMSwgMSwgMSwgMiwgMiwgMywgNCkNCmJ0MiA8LSBjKDUsIDUsIDYsIDYsIDcsIDcsIDcsIDcpDQpgYGANCg0KDQoNCg0KYGBge3J9DQojIGNvbXB1dGUgdGhlIFNEUg0Kc2RyX2EgPC0gc2QodGVlKSAtIChsZW5ndGgoYXQxKSAvIGxlbmd0aCh0ZWUpICogc2QoYXQxKSArIGxlbmd0aChhdDIpIC8gbGVuZ3RoKHRlZSkgKiBzZChhdDIpKQ0Kc2RyX2IgPC0gc2QodGVlKSAtIChsZW5ndGgoYnQxKSAvIGxlbmd0aCh0ZWUpICogc2QoYnQxKSArIGxlbmd0aChidDIpIC8gbGVuZ3RoKHRlZSkgKiBzZChidDIpKQ0KYGBgDQoNCg0KDQpgYGB7cn0NCiMgY29tcGFyZSB0aGUgU0RSIGZvciBlYWNoIHNwbGl0DQpzZHJfYQ0Kc2RyX2INCmBgYA0KDQoNCg0KDQo=