A study of the impact of the month and the day of the week on the sales revenue of a sports shop.

Reading Data into r

sports <- read.csv("sportss.csv")
 View(sports)

Summarising the variables of the dataset

library(psych)
summary(sports)
##       YEAR           MONTH           DAYWEEK       SALESREV     
##  Min.   :86.00   Min.   : 1.000   Min.   :1.0   Min.   :  17.0  
##  1st Qu.:86.00   1st Qu.: 5.000   1st Qu.:2.0   1st Qu.: 384.8  
##  Median :87.00   Median : 8.000   Median :3.0   Median : 610.0  
##  Mean   :86.61   Mean   : 7.179   Mean   :3.5   Mean   : 743.5  
##  3rd Qu.:87.00   3rd Qu.:10.000   3rd Qu.:5.0   3rd Qu.: 932.0  
##  Max.   :87.00   Max.   :12.000   Max.   :6.0   Max.   :4613.0
describe(sports)
##          vars   n   mean     sd median trimmed    mad min  max range  skew
## YEAR        1 468  86.61   0.49     87   86.64   0.00  86   87     1 -0.45
## MONTH       2 468   7.18   3.12      8    7.34   2.97   1   12    11 -0.37
## DAYWEEK     3 468   3.50   1.71      3    3.50   2.97   1    6     5  0.00
## SALESREV    4 468 743.47 536.34    610  662.24 369.91  17 4613  4596  2.17
##          kurtosis    se
## YEAR        -1.81  0.02
## MONTH       -0.84  0.14
## DAYWEEK     -1.28  0.08
## SALESREV     7.96 24.79

Boxplot for Month

boxplot(sports$SALESREV, main="Boxplot for sales revenue",horizontal = TRUE)

##Scatter plots

library(car)
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
scatterplot(sports$MONTH,sports$SALESREV,main="Sales revenue vs month",ylab = "Sales revenue", xlab="Month")

##Sales vs Day of the week

scatterplot(sports$DAYWEEK,sports$SALESREV,main="Sales revenue vs day of the week",ylab = "Sales revenue", xlab="Day of the week")

##Jitter plots ##Sales vs Month

plot(jitter(sports$SALESREV),jitter(sports$MONTH),main="Sales revenue vs month",ylab = "Month ", xlab="Sales revenue")

##Sales vs Day of the week

plot(jitter(sports$SALESREV),jitter(sports$DAYWEEK),main="Sales revenue vs Day of the week",ylab = "Day of the week ", xlab="Sales revenue")

##BW PLOTS

 library(lattice)
bwplot(MONTH~SALESREV, data = sports,main="Sales revenue vs month",ylab = "Month ", xlab="Sales revenue" )

 library(lattice)
bwplot(DAYWEEK~SALESREV, data = sports,main="Sales revenue vs month",ylab = "Day of the week", xlab="Sales revenue" )

##SCATTERPLOT MATRIX

scatterplotMatrix(
    sports[
        ,c("SALESREV","MONTH","DAYWEEK")], 
     spread=FALSE, smoother.args=list(lty=2),
   main="Scatter Plot Matrix", diagonal = "histogram")

##CORRGRAM

 library(corrgram)
 
temp<-data.frame(sports$SALESREV, sports$MONTH, sports$DAYWEEK)
 corrgram(temp, order=TRUE, lower.panel=panel.shade,
          upper.panel=panel.pie, text.panel=panel.txt,
          main="Corrgram of Sales Revenue")

##t-test

t.test(sports$SALESREV,sports$MONTH)
## 
##  Welch Two Sample t-test
## 
## data:  sports$SALESREV and sports$MONTH
## t = 29.698, df = 467.03, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  687.5753 785.0144
## sample estimates:
##  mean of x  mean of y 
## 743.474359   7.179487
t.test(sports$SALESREV,sports$DAYWEEK)
## 
##  Welch Two Sample t-test
## 
## data:  sports$SALESREV and sports$DAYWEEK
## t = 29.846, df = 467.01, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  691.2554 788.6934
## sample estimates:
## mean of x mean of y 
##  743.4744    3.5000

Regression model

reg<-lm(SALESREV~MONTH+DAYWEEK-1, data = sports)
 summary(reg)
## 
## Call:
## lm(formula = SALESREV ~ MONTH + DAYWEEK - 1, data = sports)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -842.2 -321.5  -15.4  325.2 3972.8 
## 
## Coefficients:
##         Estimate Std. Error t value Pr(>|t|)    
## MONTH     72.986      6.001  12.162  < 2e-16 ***
## DAYWEEK   43.112     12.062   3.574 0.000388 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 572.8 on 466 degrees of freedom
## Multiple R-squared:  0.611,  Adjusted R-squared:  0.6094 
## F-statistic:   366 on 2 and 466 DF,  p-value: < 2.2e-16
reg$coefficients
##    MONTH  DAYWEEK 
## 72.98636 43.11209