## load in data and seperate by game
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union

library(car)

Loading required package: carData


Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

library(tidyverse)

── Attaching packages ───────────────────────────────────────────────────── tidyverse 1.2.1 ──

✔ ggplot2 3.0.0     ✔ readr   1.1.1
✔ tibble  1.4.2     ✔ purrr   0.2.5
✔ tidyr   0.8.1     ✔ stringr 1.3.1
✔ ggplot2 3.0.0     ✔ forcats 0.3.0

── Conflicts ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ car::recode()   masks dplyr::recode()
✖ purrr::some()   masks car::some()

library(knitr)
library(stringr)
library(PMCMRplus)
library(PMCMR)

PMCMR is superseded by PMCMRplus and will be no longer maintained. You may wish to install PMCMRplus instead.

library(DescTools)


Attaching package: 'DescTools'

The following object is masked from 'package:car':

    Recode

game1 <- read.csv("~/Desktop/Clippers/Clippers_game1.csv")
game1 <- game1[,c(1,2,3,8)]
game2 <- read.csv("~/Desktop/Clippers/Clippers_game2.csv")
game2 <- game2[,c(1,2,3,8)]
game3 <- read.csv("~/Desktop/Clippers/Clippers_game3.csv")
game3 <- game3[,c(1,2,3,8)]
game4 <- read.csv("~/Desktop/Clippers/Clippers_game4.csv")
game4 <- game4[,c(1,2,3,8)]
game5 <- read.csv("~/Desktop/Clippers/Clippers_game5.csv")
game5 <- game5[,c(1,2,3,8)]


g.1 <- rep(1,length(game1$Section))
game1 <- cbind(game1,g.1)
colnames(game1) <- c("Section", "Row", "Price", "Max Number of Tickets", "Group")


g.2 <- rep(2, length(game2$Section))
game2 <- cbind(game2,g.2)
colnames(game2) <- c("Section", "Row", "Price", "Max Number of Tickets", "Group")


g.3 <- rep(3, length(game3$Section))
game3 <- cbind(game3,g.3)
colnames(game3) <- c("Section", "Row", "Price", "Max Number of Tickets", "Group")


g.4 <- rep(4,length(game4$Section))
game4 <- cbind(game4,g.4)
colnames(game4) <- c("Section", "Row", "Price", "Max Number of Tickets", "Group")



g.5 <- rep(5,length(game5$Section))
game5 <- cbind(game5,g.5)
colnames(game5) <- c("Section", "Row", "Price", "Max Number of Tickets", "Group")


# combine games
total.game <- rbind(game1,game2,game3,game4,game5)

1)

1A) Number of Tickets Posted

There were 2382 types of tickets you could buy, ranging from 1-18 tickets each for a total of 9277 total tickets on sale.

length(total.game$Section)

[1] 2382

sum(total.game$`Max Number of Tickets`)

[1] 9277

1B) The average ticket price was $174.38

round(mean(total.game$Price),2)

[1] 174.38

1C) I will use the filter function to sort by section (100,200,…) and use grep to remove Courtside as we will create its own group.

section.100 <- total.game %>% filter(str_detect(total.game$Section, "1.."))
section.100 <- section.100[- grep("Courtside", section.100$Section),]

section.200 <- total.game %>% filter(str_detect(Section, "2.."))

section.300 <- total.game %>% filter(str_detect(Section, "3.."))

courtside <- total.game %>% filter(str_detect(Section, "Courtside"))

Minimum Price by level

min.price.courtside <- min(courtside$Price)
min.price.section100 <- min(section.100$Price)
min.price.section200 <- min(section.200$Price)
min.price.section300 <- min(section.300$Price)

sectionDataFrame <- data.frame(
  courtside = min.price.courtside,
  section100 = min.price.section100,
  section200 = min.price.section200,
  section300 = min.price.section300
)
rownames(sectionDataFrame) <- "Price"
kable(sectionDataFrame,digits = 2)

	courtside	section100	section200	section300
Price	720	32.54	28.8	6.12

1D) Here are the types of sections that have tickets posted for a total of 102 different sections.

levels(total.game$Section)

  [1] "Baseline Floor 106 - Rows B-M" "Baseline Floor 115 - Rows B-M"
  [3] "Baseline Floor 117 - Rows B-M" "Courtside 102"                
  [5] "Courtside 111"                 "Courtside 112"                
  [7] "Courtside 119"                 "Lexus Club"                   
  [9] "Lower Baseline 103"            "Lower Baseline 118"           
 [11] "Lower Center 101"              "Lower Center 102"             
 [13] "Lower Center 111"              "Lower Center 112"             
 [15] "Lower Center 119"              "Lower Corner 105"             
 [17] "Lower Corner 108"              "Lower Corner 114"             
 [19] "Lower Corner 117"              "Lower End 106"                
 [21] "Lower End 107"                 "Lower End 115"                
 [23] "Lower End 116"                 "Mid Level 206"                
 [25] "Mid Level 207"                 "Mid Level 208"                
 [27] "Mid Level 209"                 "Mid Level 210"                
 [29] "Mid Level 214"                 "Mid Level 215"                
 [31] "Mid Level 216"                 "Mid Level 217"                
 [33] "Mid Level 218"                 "Mid Level 219"                
 [35] "Premier 1"                     "Premier 10"                   
 [37] "Premier 11"                    "Premier 12"                   
 [39] "Premier 13"                    "Premier 14"                   
 [41] "Premier 15"                    "Premier 16"                   
 [43] "Premier 17"                    "Premier 18"                   
 [45] "Premier 2"                     "Premier 3"                    
 [47] "Premier 4"                     "Premier 5"                    
 [49] "Premier 6"                     "Premier 7"                    
 [51] "Premier 8"                     "Premier 9"                    
 [53] "San Manuel Club Tables 12"     "Suite B 3"                    
 [55] "Suite B 4"                     "Suite B 40"                   
 [57] "Suite B 42"                    "Suite C 1"                    
 [59] "Suite C 2"                     "Suite C 23"                   
 [61] "Suite C 30"                    "Suite C 41"                   
 [63] "Suite C 42"                    "Suite C 45"                   
 [65] "Suite C 5"                     "Upper Center 301"             
 [67] "Upper Center 302"              "Upper Center 303"             
 [69] "Upper Center 316"              "Upper Center 317"             
 [71] "Upper Center 318"              "Upper Center 319"             
 [73] "Upper Center 333"              "Upper Center 334"             
 [75] "Upper Corner 304"              "Upper Corner 305"             
 [77] "Upper Corner 306"              "Upper Corner 313"             
 [79] "Upper Corner 314"              "Upper Corner 315"             
 [81] "Upper Corner 321"              "Upper Corner 322"             
 [83] "Upper Corner 332"              "Upper End 307"                
 [85] "Upper End 308"                 "Upper End 309"                
 [87] "Upper End 310"                 "Upper End 311"                
 [89] "Upper End 312"                 "Suite B 20"                   
 [91] "Suite B 23"                    "Suite B 59"                   
 [93] "Suite C 25"                    "Suite C 26"                   
 [95] "Suite C 3"                     "Suite C 4"                    
 [97] "Upper End 328"                 "Baseline Floor 116 - Rows B-M"
 [99] "Courtside 101"                 "Upper End 324"                
[101] "Lower Corner 104"              "Upper Corner 331"

2)

Number of Tickets Posted.

There is a summary table below which you will see the quartiles of the combined total Maximum number of Tickets sold.

The first boxplot is the combined total with the maximum number of tickets sold for a certain section which was 18. The second boxplot is broken down by each of the first 5 Clippers games.

summary(total.game$`Max Number of Tickets`)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   2.000   3.000   3.895   4.000  18.000

s <- ggplot(data = total.game, aes(x = "", y = total.game$`Max Number of Tickets`))
s <- s + geom_boxplot() + xlab("Group") 
s

s <- ggplot(data = total.game, aes(x = factor(Group), y = total.game$`Max Number of Tickets` ,color=factor(Group)))
s <- s + geom_boxplot() + xlab("Group") 
s <- s + geom_jitter(shape=16,position = position_jitter(0.0)) 
s <- s+ labs(color = "Group")
s

Average Ticket Price

There is a summary table below describing the combined game price with a mean of $174.38 and a maximum of $6240.

Also below is a histogram of the combined price to get in a game with the red line being the density.

summary(total.game$Price)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   6.12   46.08   77.28  174.38  144.00 6240.00

hist(total.game$Price, probability = T, breaks = 1000)
lines(density(total.game$Price), col = "red")

These plots show the price by Section (100,200,300,Courtside) for each of the first five Clippers games.

s <- ggplot(data = section.100, aes(x = factor(Group), y = Price,color=factor(Group)))
s <- s + geom_jitter(shape=16,position = position_jitter(0.0)) 
s <- s+ labs(color = "Group")+ ggtitle("Level 100")
s <- s + theme(plot.title = element_text(hjust = 0.5))
s <- s + xlab("Game") 
s

s <- ggplot(data = section.200, aes(x = factor(Group), y = Price,color=factor(Group)))
s <- s + geom_jitter(shape=16,position = position_jitter(0.0)) 
s <- s+ labs(color = "Group")+ ggtitle("Level 200")
s <- s + theme(plot.title = element_text(hjust = 0.5))
s <- s + xlab("Game") 
s

s <- ggplot(data = section.300, aes(x = factor(Group), y = Price,color=factor(Group)))
s <- s + geom_jitter(shape=16,position = position_jitter(0.0)) 
s <- s+ labs(color = "Group")+ ggtitle("Level 300")
s <- s + theme(plot.title = element_text(hjust = 0.5))
s <- s + xlab("Game") 
s

s <- ggplot(data = courtside, aes(x = factor(Group), y = Price,color=factor(Group)))
s <- s + geom_jitter(shape=16,position = position_jitter(0.0)) 
s <- s+ labs(color = "Group")+ ggtitle("Courtside")
s <- s + theme(plot.title = element_text(hjust = 0.5))
s <- s + xlab("Game") 
s

3)

Scraping was done using “Data Miner” which lets you create “recipes” that you can use over and over again. I was able to download each recipe to a CSV file and filter using some “IF” statements in Excel. From there I will import the data into R for analysis.

4) Summary

Below you will see an ANOVA was run to see if there was a significant difference between each level (100,200,300,Courtside). The results were significant but when I plotted the data it is clear that it is not distributed normally. Levene’s test for homogeneity of variance also fails, which confirms that the data is not normally distributed.

A Kruskal Wallace test was run which is a non-parametric test (as opposed to the parametric test ANOVA) that does not need to assume normality. The Kruskal Wallace test was significant so finally a Conover Test was run, which is more powerful than Dunn or the Mann-Whitney tests, and does multiple comparisons to see which pairs are significant.

The Conover Test finally showed us that each group was statistically significant from every other group, concluding that there is a significant difference in price between each level.

## split up by section
total.section <- rep(1, length(section.100$Section))
section.100 <- cbind(section.100, total.section)
total.section <- rep(2, length(section.200$Section))
section.200 <- cbind(section.200, total.section)
total.section <- rep(3, length(section.300$Section))
section.300 <- cbind(section.300, total.section)
total.section <- rep(4, length(courtside$Section))
courtside <- cbind(courtside, total.section)

all.sections.analysis <- rbind(section.100, section.200, section.300, courtside)
all.sections.analysis$total.section <- as.factor(all.sections.analysis$total.section)

aovOut <- aov(Price~ total.section, data = all.sections.analysis)
summary(aovOut)

                Df    Sum Sq  Mean Sq F value Pr(>F)    
total.section    3 265224465 88408155    1155 <2e-16 ***
Residuals     1681 128617227    76512                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

leveneTest(aovOut)

Levene's Test for Homogeneity of Variance (center = median)
        Df F value    Pr(>F)    
group    3  608.72 < 2.2e-16 ***
      1681                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

plot(aovOut)

kruskal.test(Price~total.section, data = all.sections.analysis)


    Kruskal-Wallis rank sum test

data:  Price by total.section
Kruskal-Wallis chi-squared = 868.75, df = 3, p-value < 2.2e-16

ConoverTest(Price~total.section, data = all.sections.analysis)


 Conover's test of multiple comparisons : holm  

    mean.rank.diff   pval    
2-1      -385.8604 <2e-16 ***
3-1      -721.7436 <2e-16 ***
4-1       506.5661 <2e-16 ***
3-2      -335.8832 <2e-16 ***
4-2       892.4265 <2e-16 ***
4-3      1228.3097 <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Clippers

Justin Cowden

September 17, 2018

1)

1A) Number of Tickets Posted

1B) The average ticket price was $174.38

1C) I will use the filter function to sort by section (100,200,…) and use grep to remove Courtside as we will create its own group.

Minimum Price by level

1D) Here are the types of sections that have tickets posted for a total of 102 different sections.

2)

Number of Tickets Posted.

There is a summary table below which you will see the quartiles of the combined total Maximum number of Tickets sold.

The first boxplot is the combined total with the maximum number of tickets sold for a certain section which was 18. The second boxplot is broken down by each of the first 5 Clippers games.

Average Ticket Price

There is a summary table below describing the combined game price with a mean of $174.38 and a maximum of $6240.

Also below is a histogram of the combined price to get in a game with the red line being the density.

These plots show the price by Section (100,200,300,Courtside) for each of the first five Clippers games.

3)

Scraping was done using “Data Miner” which lets you create “recipes” that you can use over and over again. I was able to download each recipe to a CSV file and filter using some “IF” statements in Excel. From there I will import the data into R for analysis.

4) Summary

The Conover Test finally showed us that each group was statistically significant from every other group, concluding that there is a significant difference in price between each level.