Discussion 11

rm(list=ls())
clothing<-read.csv('https://raw.githubusercontent.com/VioletaStoyanova/Data605/master/Clothing.csv', header=TRUE)
head(clothing)
##     tsale     sales margin nown  nfull  npart   naux hoursw  hourspw
## 1  750000  4411.765     41    1 1.0000 1.0000 1.5357     76 16.75596
## 2 1926395  4280.878     39    2 2.0000 3.0000 1.5357    192 22.49376
## 3 1250000  4166.667     40    1 2.0000 2.2222 1.4091    114 17.19120
## 4  694227  2670.104     40    1 1.0000 1.2833 1.3673    100 21.50260
## 5  750000 15000.000     44    2 1.9556 1.2833 1.3673    104 15.74279
## 6  400000  4444.444     41    2 1.9556 1.2833 1.3673     72 10.89885
##        inv1     inv2 ssize start
## 1  17166.67 27177.04   170  1984
## 2  17166.67 27177.04   450  1972
## 3 292857.20 71570.55   300  1952
## 4  22207.04 15000.00   260  1966
## 5  22207.04 10000.00    50  1996
## 6  22207.04 22859.85    90  1947
summary(clothing)
##      tsale             sales           margin           nown       
##  Min.   :  50000   Min.   :  300   Min.   :16.00   Min.   : 1.000  
##  1st Qu.: 495340   1st Qu.: 3904   1st Qu.:37.00   1st Qu.: 1.000  
##  Median : 694227   Median : 5279   Median :39.00   Median : 1.000  
##  Mean   : 833584   Mean   : 6335   Mean   :38.77   Mean   : 1.284  
##  3rd Qu.: 976817   3rd Qu.: 7740   3rd Qu.:41.00   3rd Qu.: 1.295  
##  Max.   :5000000   Max.   :27000   Max.   :66.00   Max.   :10.000  
##      nfull           npart            naux           hoursw     
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   : 32.0  
##  1st Qu.:1.923   1st Qu.:1.283   1st Qu.:1.333   1st Qu.: 80.0  
##  Median :1.956   Median :1.283   Median :1.367   Median :104.0  
##  Mean   :2.069   Mean   :1.566   Mean   :1.390   Mean   :121.1  
##  3rd Qu.:2.066   3rd Qu.:2.000   3rd Qu.:1.367   3rd Qu.:145.2  
##  Max.   :8.000   Max.   :9.000   Max.   :4.000   Max.   :582.0  
##     hourspw            inv1              inv2            ssize       
##  Min.   : 5.708   Min.   :   1000   Min.   :   350   Min.   :  16.0  
##  1st Qu.:13.541   1st Qu.:  20000   1st Qu.: 10000   1st Qu.:  80.0  
##  Median :17.745   Median :  22207   Median : 22860   Median : 120.0  
##  Mean   :18.955   Mean   :  58257   Mean   : 27829   Mean   : 151.1  
##  3rd Qu.:24.303   3rd Qu.:  62269   3rd Qu.: 22860   3rd Qu.: 190.0  
##  Max.   :43.326   Max.   :1500000   Max.   :400000   Max.   :1214.0  
##      start     
##  Min.   :1945  
##  1st Qu.:1959  
##  Median :1978  
##  Mean   :1978  
##  3rd Qu.:1996  
##  Max.   :2015
hist(clothing$sales, main = "Histogram of Sales")

hist(clothing$margin, main = "Histogram of Margin")

plot(clothing$sales~ clothing$margin, main = "Sales vs Margin")

# Simple linear regression model
slm <- lm(clothing$margin ~ clothing$sales)
summary(slm)
## 
## Call:
## lm(formula = clothing$margin ~ clothing$sales)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.5566  -2.2463   0.6059   2.7198  27.9833 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.756e+01  5.095e-01  73.718  < 2e-16 ***
## clothing$sales 1.917e-04  6.929e-05   2.766  0.00593 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.175 on 398 degrees of freedom
## Multiple R-squared:  0.01886,    Adjusted R-squared:  0.0164 
## F-statistic: 7.653 on 1 and 398 DF,  p-value: 0.005933
plot(clothing$margin ~ clothing$sales, 
     xlab='Sales',
     ylab='Margin',
     main='Sales vs Margin')
abline(slm)

# Residuals
plot(slm$residuals, ylab='Residuals')
abline(a=0, b=0)

# Q-Q plot
qqnorm(slm$residuals)
qqline(slm$residuals)

#Summary The initial scatter plot shows are weak relationship between the margin and the sales. Furthermore R^2=0.0164 which suppports that sales affects the margin and the linear model is not appropriate in this case.