rm(list=ls())
clothing<-read.csv('https://raw.githubusercontent.com/VioletaStoyanova/Data605/master/Clothing.csv', header=TRUE)
head(clothing)
## tsale sales margin nown nfull npart naux hoursw hourspw
## 1 750000 4411.765 41 1 1.0000 1.0000 1.5357 76 16.75596
## 2 1926395 4280.878 39 2 2.0000 3.0000 1.5357 192 22.49376
## 3 1250000 4166.667 40 1 2.0000 2.2222 1.4091 114 17.19120
## 4 694227 2670.104 40 1 1.0000 1.2833 1.3673 100 21.50260
## 5 750000 15000.000 44 2 1.9556 1.2833 1.3673 104 15.74279
## 6 400000 4444.444 41 2 1.9556 1.2833 1.3673 72 10.89885
## inv1 inv2 ssize start
## 1 17166.67 27177.04 170 1984
## 2 17166.67 27177.04 450 1972
## 3 292857.20 71570.55 300 1952
## 4 22207.04 15000.00 260 1966
## 5 22207.04 10000.00 50 1996
## 6 22207.04 22859.85 90 1947
summary(clothing)
## tsale sales margin nown
## Min. : 50000 Min. : 300 Min. :16.00 Min. : 1.000
## 1st Qu.: 495340 1st Qu.: 3904 1st Qu.:37.00 1st Qu.: 1.000
## Median : 694227 Median : 5279 Median :39.00 Median : 1.000
## Mean : 833584 Mean : 6335 Mean :38.77 Mean : 1.284
## 3rd Qu.: 976817 3rd Qu.: 7740 3rd Qu.:41.00 3rd Qu.: 1.295
## Max. :5000000 Max. :27000 Max. :66.00 Max. :10.000
## nfull npart naux hoursw
## Min. :1.000 Min. :1.000 Min. :1.000 Min. : 32.0
## 1st Qu.:1.923 1st Qu.:1.283 1st Qu.:1.333 1st Qu.: 80.0
## Median :1.956 Median :1.283 Median :1.367 Median :104.0
## Mean :2.069 Mean :1.566 Mean :1.390 Mean :121.1
## 3rd Qu.:2.066 3rd Qu.:2.000 3rd Qu.:1.367 3rd Qu.:145.2
## Max. :8.000 Max. :9.000 Max. :4.000 Max. :582.0
## hourspw inv1 inv2 ssize
## Min. : 5.708 Min. : 1000 Min. : 350 Min. : 16.0
## 1st Qu.:13.541 1st Qu.: 20000 1st Qu.: 10000 1st Qu.: 80.0
## Median :17.745 Median : 22207 Median : 22860 Median : 120.0
## Mean :18.955 Mean : 58257 Mean : 27829 Mean : 151.1
## 3rd Qu.:24.303 3rd Qu.: 62269 3rd Qu.: 22860 3rd Qu.: 190.0
## Max. :43.326 Max. :1500000 Max. :400000 Max. :1214.0
## start
## Min. :1945
## 1st Qu.:1959
## Median :1978
## Mean :1978
## 3rd Qu.:1996
## Max. :2015
hist(clothing$sales, main = "Histogram of Sales")
hist(clothing$margin, main = "Histogram of Margin")
plot(clothing$sales~ clothing$margin, main = "Sales vs Margin")
# Simple linear regression model
slm <- lm(clothing$margin ~ clothing$sales)
summary(slm)
##
## Call:
## lm(formula = clothing$margin ~ clothing$sales)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.5566 -2.2463 0.6059 2.7198 27.9833
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.756e+01 5.095e-01 73.718 < 2e-16 ***
## clothing$sales 1.917e-04 6.929e-05 2.766 0.00593 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.175 on 398 degrees of freedom
## Multiple R-squared: 0.01886, Adjusted R-squared: 0.0164
## F-statistic: 7.653 on 1 and 398 DF, p-value: 0.005933
plot(clothing$margin ~ clothing$sales,
xlab='Sales',
ylab='Margin',
main='Sales vs Margin')
abline(slm)
# Residuals
plot(slm$residuals, ylab='Residuals')
abline(a=0, b=0)
# Q-Q plot
qqnorm(slm$residuals)
qqline(slm$residuals)
#Summary The initial scatter plot shows are weak relationship between the margin and the sales. Furthermore R^2=0.0164 which suppports that sales affects the margin and the linear model is not appropriate in this case.