This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
setwd("~")
winequality <- read.csv("winequality-red.csv",header=TRUE)
names(winequality)
## [1] "fixed.acidity" "volatile.acidity" "citric.acid"
## [4] "residual.sugar" "chlorides" "free.sulfur.dioxide"
## [7] "total.sulfur.dioxide" "density" "pH"
## [10] "sulphates" "alcohol" "quality"
head(winequality)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
## quality
## 1 5
## 2 5
## 3 5
## 4 6
## 5 5
## 6 5
summary(winequality)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## Min. :0.01200 Min. : 1.00 Min. : 6.00
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00
## Median :0.07900 Median :14.00 Median : 38.00
## Mean :0.08747 Mean :15.87 Mean : 46.47
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00
## Max. :0.61100 Max. :72.00 Max. :289.00
## density pH sulphates alcohol
## Min. :0.9901 Min. :2.740 Min. :0.3300 Min. : 8.40
## 1st Qu.:0.9956 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50
## Median :0.9968 Median :3.310 Median :0.6200 Median :10.20
## Mean :0.9967 Mean :3.311 Mean :0.6581 Mean :10.42
## 3rd Qu.:0.9978 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10
## Max. :1.0037 Max. :4.010 Max. :2.0000 Max. :14.90
## quality
## Min. :3.000
## 1st Qu.:5.000
## Median :6.000
## Mean :5.636
## 3rd Qu.:6.000
## Max. :8.000
plot(winequality)
plot(winequality$density,winequality$fixed.acidity)
plot(winequality$fixed.acidity,winequality$pH)
hist(winequality$density)
hist(winequality$chlorides)
hist(winequality$pH)
boxplot(winequality$density)
boxplot(winequality$chlorides)
cor(winequality$density,winequality$fixed.acidity)
## [1] 0.6680473
cor(winequality$pH,winequality$fixed.acidity)
## [1] -0.6829782
lm(formula = quality ~ density + pH + fixed.acidity, data = winequality)
##
## Call:
## lm(formula = quality ~ density + pH + fixed.acidity, data = winequality)
##
## Coefficients:
## (Intercept) density pH fixed.acidity
## 213.1960 -213.1606 0.8242 0.2618
cor.test(winequality$pH,winequality$fixed.acidity)
##
## Pearson's product-moment correlation
##
## data: winequality$pH and winequality$fixed.acidity
## t = -37.3659, df = 1597, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.7082857 -0.6559174
## sample estimates:
## cor
## -0.6829782
cor.test(winequality$density,winequality$fixed.acidity)
##
## Pearson's product-moment correlation
##
## data: winequality$density and winequality$fixed.acidity
## t = 35.8771, df = 1597, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6399847 0.6943302
## sample estimates:
## cor
## 0.6680473
attach(winequality)
model1<-lm(quality~fixed.acidity+residual.sugar+total.sulfur.dioxide+sulphates+volatile.acidity+chlorides+density+alcohol+citric.acid+free.sulfur.dioxide+pH)
detach(winequality)
model1
##
## Call:
## lm(formula = quality ~ fixed.acidity + residual.sugar + total.sulfur.dioxide +
## sulphates + volatile.acidity + chlorides + density + alcohol +
## citric.acid + free.sulfur.dioxide + pH)
##
## Coefficients:
## (Intercept) fixed.acidity residual.sugar
## 21.965208 0.024991 0.016331
## total.sulfur.dioxide sulphates volatile.acidity
## -0.003265 0.916334 -1.083590
## chlorides density alcohol
## -1.874225 -17.881164 0.276198
## citric.acid free.sulfur.dioxide pH
## -0.182564 0.004361 -0.413653
summary(model1)
##
## Call:
## lm(formula = quality ~ fixed.acidity + residual.sugar + total.sulfur.dioxide +
## sulphates + volatile.acidity + chlorides + density + alcohol +
## citric.acid + free.sulfur.dioxide + pH)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.68911 -0.36652 -0.04699 0.45202 2.02498
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.197e+01 2.119e+01 1.036 0.3002
## fixed.acidity 2.499e-02 2.595e-02 0.963 0.3357
## residual.sugar 1.633e-02 1.500e-02 1.089 0.2765
## total.sulfur.dioxide -3.265e-03 7.287e-04 -4.480 8.00e-06 ***
## sulphates 9.163e-01 1.143e-01 8.014 2.13e-15 ***
## volatile.acidity -1.084e+00 1.211e-01 -8.948 < 2e-16 ***
## chlorides -1.874e+00 4.193e-01 -4.470 8.37e-06 ***
## density -1.788e+01 2.163e+01 -0.827 0.4086
## alcohol 2.762e-01 2.648e-02 10.429 < 2e-16 ***
## citric.acid -1.826e-01 1.472e-01 -1.240 0.2150
## free.sulfur.dioxide 4.361e-03 2.171e-03 2.009 0.0447 *
## pH -4.137e-01 1.916e-01 -2.159 0.0310 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.648 on 1587 degrees of freedom
## Multiple R-squared: 0.3606, Adjusted R-squared: 0.3561
## F-statistic: 81.35 on 11 and 1587 DF, p-value: < 2.2e-16
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.