# diamonds inside ggplot2 
library(ggplot2)
Stackoverflow is a great place to get help:
http://stackoverflow.com/tags/ggplot2.
library(dplyr)

Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union
diamonds %>% glimpse()
Observations: 53,940
Variables: 10
$ carat   (dbl) 0.23, 0.21, 0.23, 0.29, 0.31, 0.24, 0.24, 0.26, ...
$ cut     (fctr) Ideal, Premium, Good, Premium, Good, Very Good,...
$ color   (fctr) E, E, E, I, J, J, I, H, E, H, J, J, F, J, E, E,...
$ clarity (fctr) SI2, SI1, VS1, VS2, SI2, VVS2, VVS1, SI1, VS2, ...
$ depth   (dbl) 61.5, 59.8, 56.9, 62.4, 63.3, 62.8, 62.3, 61.9, ...
$ table   (dbl) 55, 61, 65, 58, 58, 57, 57, 55, 61, 61, 55, 56, ...
$ price   (int) 326, 326, 327, 334, 335, 336, 336, 337, 337, 338...
$ x       (dbl) 3.95, 3.89, 4.05, 4.20, 4.34, 3.94, 3.95, 4.07, ...
$ y       (dbl) 3.98, 3.84, 4.07, 4.23, 4.35, 3.96, 3.98, 4.11, ...
$ z       (dbl) 2.43, 2.31, 2.31, 2.63, 2.75, 2.48, 2.47, 2.53, ...
# Fit lm model between price and all other attributes
model <- lm(price ~ ., diamonds)
# lm in R automatically split categorical variables as many dummy variables
print(model)

Call:
lm(formula = price ~ ., data = diamonds)

Coefficients:
(Intercept)        carat        cut.L        cut.Q        cut.C  
   5753.762    11256.978      584.457     -301.908      148.035  
      cut^4      color.L      color.Q      color.C      color^4  
    -20.794    -1952.160     -672.054     -165.283       38.195  
    color^5      color^6    clarity.L    clarity.Q    clarity.C  
    -95.793      -48.466     4097.431    -1925.004      982.205  
  clarity^4    clarity^5    clarity^6    clarity^7        depth  
   -364.918      233.563        6.883       90.640      -63.806  
      table            x            y            z  
    -26.474    -1008.261        9.609      -50.119  
# Predict on full data: p
p <- predict(model, diamonds)
print(p %>% head())
         1          2          3          4          5          6 
-1346.3643  -664.5954   211.1071  -830.7372 -3459.2242 -1380.4876 
# Compute errors: error
error <- p - diamonds$price 
# Calculate RMSE
sqrt(mean(error^2))
[1] 1129.843

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Cmd+Option+I.

When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Cmd+Shift+K to preview the HTML file).

LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKCmBgYHtyfQojIGRpYW1vbmRzIGluc2lkZSBnZ3Bsb3QyIApsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoZHBseXIpCgpkaWFtb25kcyAlPiUgZ2xpbXBzZSgpCgojIEZpdCBsbSBtb2RlbCBiZXR3ZWVuIHByaWNlIGFuZCBhbGwgb3RoZXIgYXR0cmlidXRlcwptb2RlbCA8LSBsbShwcmljZSB+IC4sIGRpYW1vbmRzKQoKIyBsbSBpbiBSIGF1dG9tYXRpY2FsbHkgc3BsaXQgY2F0ZWdvcmljYWwgdmFyaWFibGVzIGFzIG1hbnkgZHVtbXkgdmFyaWFibGVzCnByaW50KG1vZGVsKQoKIyBQcmVkaWN0IG9uIGZ1bGwgZGF0YTogcApwIDwtIHByZWRpY3QobW9kZWwsIGRpYW1vbmRzKQpwcmludChwICU+JSBoZWFkKCkpCgojIENvbXB1dGUgZXJyb3JzOiBlcnJvcgplcnJvciA8LSBwIC0gZGlhbW9uZHMkcHJpY2UgCgojIENhbGN1bGF0ZSBSTVNFCnNxcnQobWVhbihlcnJvcl4yKSkKCmBgYAoKCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpBZGQgYSBuZXcgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpJbnNlcnQgQ2h1bmsqIGJ1dHRvbiBvbiB0aGUgdG9vbGJhciBvciBieSBwcmVzc2luZyAqQ21kK09wdGlvbitJKi4KCldoZW4geW91IHNhdmUgdGhlIG5vdGVib29rLCBhbiBIVE1MIGZpbGUgY29udGFpbmluZyB0aGUgY29kZSBhbmQgb3V0cHV0IHdpbGwgYmUgc2F2ZWQgYWxvbmdzaWRlIGl0IChjbGljayB0aGUgKlByZXZpZXcqIGJ1dHRvbiBvciBwcmVzcyAqQ21kK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuCg==