sports <- read.csv("sportss.csv")
View(sports)
library(psych)
summary(sports)
## YEAR MONTH DAYWEEK SALESREV
## Min. :86.00 Min. : 1.000 Min. :1.0 Min. : 17.0
## 1st Qu.:86.00 1st Qu.: 5.000 1st Qu.:2.0 1st Qu.: 384.8
## Median :87.00 Median : 8.000 Median :3.0 Median : 610.0
## Mean :86.61 Mean : 7.179 Mean :3.5 Mean : 743.5
## 3rd Qu.:87.00 3rd Qu.:10.000 3rd Qu.:5.0 3rd Qu.: 932.0
## Max. :87.00 Max. :12.000 Max. :6.0 Max. :4613.0
describe(sports)
## vars n mean sd median trimmed mad min max range skew
## YEAR 1 468 86.61 0.49 87 86.64 0.00 86 87 1 -0.45
## MONTH 2 468 7.18 3.12 8 7.34 2.97 1 12 11 -0.37
## DAYWEEK 3 468 3.50 1.71 3 3.50 2.97 1 6 5 0.00
## SALESREV 4 468 743.47 536.34 610 662.24 369.91 17 4613 4596 2.17
## kurtosis se
## YEAR -1.81 0.02
## MONTH -0.84 0.14
## DAYWEEK -1.28 0.08
## SALESREV 7.96 24.79
boxplot(sports$SALESREV, main="Boxplot for sales revenue",horizontal = TRUE)
##Scatter plots
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(sports$MONTH,sports$SALESREV,main="Sales revenue vs month",ylab = "Sales revenue", xlab="Month")
##Sales vs Day of the week
scatterplot(sports$DAYWEEK,sports$SALESREV,main="Sales revenue vs day of the week",ylab = "Sales revenue", xlab="Day of the week")
##Jitter plots ##Sales vs Month
plot(jitter(sports$SALESREV),jitter(sports$MONTH),main="Sales revenue vs month",ylab = "Month ", xlab="Sales revenue")
##Sales vs Day of the week
plot(jitter(sports$SALESREV),jitter(sports$DAYWEEK),main="Sales revenue vs Day of the week",ylab = "Day of the week ", xlab="Sales revenue")
##BW PLOTS
library(lattice)
bwplot(MONTH~SALESREV, data = sports,main="Sales revenue vs month",ylab = "Month ", xlab="Sales revenue" )
library(lattice)
bwplot(DAYWEEK~SALESREV, data = sports,main="Sales revenue vs month",ylab = "Day of the week", xlab="Sales revenue" )
##SCATTERPLOT MATRIX
scatterplotMatrix(
sports[
,c("SALESREV","MONTH","DAYWEEK")],
spread=FALSE, smoother.args=list(lty=2),
main="Scatter Plot Matrix", diagonal = "histogram")
##CORRGRAM
library(corrgram)
temp<-data.frame(sports$SALESREV, sports$MONTH, sports$DAYWEEK)
corrgram(temp, order=TRUE, lower.panel=panel.shade,
upper.panel=panel.pie, text.panel=panel.txt,
main="Corrgram of Sales Revenue")
##t-test
t.test(sports$SALESREV,sports$MONTH)
##
## Welch Two Sample t-test
##
## data: sports$SALESREV and sports$MONTH
## t = 29.698, df = 467.03, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 687.5753 785.0144
## sample estimates:
## mean of x mean of y
## 743.474359 7.179487
t.test(sports$SALESREV,sports$DAYWEEK)
##
## Welch Two Sample t-test
##
## data: sports$SALESREV and sports$DAYWEEK
## t = 29.846, df = 467.01, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 691.2554 788.6934
## sample estimates:
## mean of x mean of y
## 743.4744 3.5000
reg<-lm(SALESREV~MONTH+DAYWEEK-1, data = sports)
summary(reg)
##
## Call:
## lm(formula = SALESREV ~ MONTH + DAYWEEK - 1, data = sports)
##
## Residuals:
## Min 1Q Median 3Q Max
## -842.2 -321.5 -15.4 325.2 3972.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## MONTH 72.986 6.001 12.162 < 2e-16 ***
## DAYWEEK 43.112 12.062 3.574 0.000388 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 572.8 on 466 degrees of freedom
## Multiple R-squared: 0.611, Adjusted R-squared: 0.6094
## F-statistic: 366 on 2 and 466 DF, p-value: < 2.2e-16
reg$coefficients
## MONTH DAYWEEK
## 72.98636 43.11209