names(diamonds)
## [1] "weight" "clarity" "color" "value"
and what kind of data is available in the data set?
str(diamonds)
## 'data.frame': 150 obs. of 4 variables:
## $ weight : num 9.35 11.1 8.65 10.43 10.62 ...
## $ clarity: num 0.88 1.05 0.85 1.15 0.92 0.44 1.09 1.43 0.95 1.05 ...
## $ color : num 4 5 6 5 5 4 6 4 6 5 ...
## $ value : num 182 191 176 195 182 ...
head(diamonds)
## weight clarity color value
## 1 9.35 0.88 4 182.5
## 2 11.10 1.05 5 191.2
## 3 8.65 0.85 6 175.7
## 4 10.43 1.15 5 195.2
## 5 10.62 0.92 5 181.6
## 6 12.35 0.44 4 182.9
diamonds.lm <- lm(formula = value ~ weight + clarity + color, data=diamonds)
diamonds.lm
##
## Call:
## lm(formula = value ~ weight + clarity + color, data = diamonds)
##
## Coefficients:
## (Intercept) weight clarity color
## 148.3354 2.1894 21.6922 -0.4549
summary(diamonds.lm)
##
## Call:
## lm(formula = value ~ weight + clarity + color, data = diamonds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.4046 -3.5473 -0.1134 3.2552 11.0464
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 148.3354 3.6253 40.917 <2e-16 ***
## weight 2.1894 0.2000 10.948 <2e-16 ***
## clarity 21.6922 2.1429 10.123 <2e-16 ***
## color -0.4549 0.3646 -1.248 0.214
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.672 on 146 degrees of freedom
## Multiple R-squared: 0.6373, Adjusted R-squared: 0.6298
## F-statistic: 85.49 on 3 and 146 DF, p-value: < 2.2e-16
fitted(diamonds.lm)
## 1 2 3 4 5 6 7 8
## 186.0758 193.1401 182.9826 193.8424 189.2692 183.0995 193.8593 198.9261
## 9 10 11 12 13 14 15 16
## 192.1142 189.7465 192.1629 188.3562 190.3111 175.8357 183.5867 193.1738
## 17 18 19 20 21 22 23 24
## 182.4543 183.3600 186.0564 172.1894 190.8769 191.2587 191.9566 187.3536
## 25 26 27 28 29 30 31 32
## 195.0996 189.1340 191.6832 188.9047 198.0162 187.7150 197.4707 186.7516
## 33 34 35 36 37 38 39 40
## 194.0486 182.9471 197.6990 181.8749 189.0689 192.3530 181.4667 199.4478
## 41 42 43 44 45 46 47 48
## 192.6166 195.5992 186.2338 188.6963 180.4489 191.4205 188.6512 195.4749
## 49 50 51 52 53 54 55 56
## 186.4460 182.0177 183.9607 182.3691 194.3771 179.6129 198.1373 189.8638
## 57 58 59 60 61 62 63 64
## 189.4808 195.4418 195.2050 187.5684 195.2332 174.1453 188.9927 190.8006
## 65 66 67 68 69 70 71 72
## 197.1481 193.9149 187.2343 188.9602 191.7916 196.3235 202.1371 197.8254
## 73 74 75 76 77 78 79 80
## 194.2708 192.7675 179.0534 182.9247 196.1669 185.3859 193.3181 187.2589
## 81 82 83 84 85 86 87 88
## 201.0018 198.4333 184.3770 192.0118 183.3099 189.2979 190.9494 193.6273
## 89 90 91 92 93 94 95 96
## 196.9222 196.0434 198.0521 186.6527 178.9600 187.5789 190.2013 183.8228
## 97 98 99 100 101 102 103 104
## 181.8627 196.5341 194.6557 182.6076 189.7448 186.4550 203.3765 193.2738
## 105 106 107 108 109 110 111 112
## 187.7032 184.5395 190.0623 183.7670 182.1457 196.8296 186.3046 183.5932
## 113 114 115 116 117 118 119 120
## 196.1479 193.8122 201.6535 189.7461 187.3012 186.4676 189.2750 189.6210
## 121 122 123 124 125 126 127 128
## 190.4658 186.7303 176.4423 188.1299 187.0176 187.1431 187.2087 183.3231
## 129 130 131 132 133 134 135 136
## 196.9590 177.9258 181.6754 180.9373 190.5306 186.5017 198.1243 175.8418
## 137 138 139 140 141 142 143 144
## 195.7068 202.0438 190.5316 186.6816 183.6889 194.2321 182.3883 192.3458
## 145 146 147 148 149 150
## 194.7501 190.5253 189.2768 190.8429 187.7496 186.6139
diamonds$value.lm <- diamonds.lm$fitted.values
head(diamonds)
## weight clarity color value value.lm
## 1 9.35 0.88 4 182.5 186.0758
## 2 11.10 1.05 5 191.2 193.1401
## 3 8.65 0.85 6 175.7 182.9826
## 4 10.43 1.15 5 195.2 193.8424
## 5 10.62 0.92 5 181.6 189.2692
## 6 12.35 0.44 4 182.9 183.0995
plot(x=diamonds$value, y=diamonds.lm$fitted.values)
plot(x=diamonds$value, y=diamonds.lm$fitted.values, main = "Regression fits of diamond values", xlab = "True Diamond Values", ylab = "Linear Model Fitted Values")
plot(x=diamonds$value, y=diamonds.lm$fitted.values, main = "Regression fits of diamond values", xlab = "True Diamond Values", ylab = "Linear Model Fitted Values")
abline(b=1,a=0)