This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
# Predictive Model for Los Angeles Dodgers Promotion and Attendance (R)
library(car) # special functions for linear regression
## Loading required package: carData
library(lattice) # graphics package
# read in data and create a data frame called dodgers
dodgers <- read.csv("DodgersData.csv")
print(str(dodgers)) # check the structure of the data frame
## 'data.frame': 81 obs. of 12 variables:
## $ month : chr "APR" "APR" "APR" "APR" ...
## $ day : int 10 11 12 13 14 15 23 24 25 27 ...
## $ attend : int 56000 29729 28328 31601 46549 38359 26376 44014 26345 44807 ...
## $ day_of_week: chr "Tuesday" "Wednesday" "Thursday" "Friday" ...
## $ opponent : chr "Pirates" "Pirates" "Pirates" "Padres" ...
## $ temp : int 67 58 57 54 57 65 60 63 64 66 ...
## $ skies : chr "Clear " "Cloudy" "Cloudy" "Cloudy" ...
## $ day_night : chr "Day" "Night" "Night" "Night" ...
## $ cap : chr "NO" "NO" "NO" "NO" ...
## $ shirt : chr "NO" "NO" "NO" "NO" ...
## $ fireworks : chr "NO" "NO" "NO" "YES" ...
## $ bobblehead : chr "NO" "NO" "NO" "NO" ...
## NULL
# define an ordered day-of-week variable
# for plots and data summaries
dodgers$ordered_day_of_week <- with(data=dodgers,
ifelse ((day_of_week == "Monday"),1,
ifelse ((day_of_week == "Tuesday"),2,
ifelse ((day_of_week == "Wednesday"),3,
ifelse ((day_of_week == "Thursday"),4,
ifelse ((day_of_week == "Friday"),5,
ifelse ((day_of_week == "Saturday"),6,7)))))))
dodgers$ordered_day_of_week <- factor(dodgers$ordered_day_of_week, levels=1:7,
labels=c("Mon", "Tue", "Wed", "Thur", "Fri", "Sat", "Sun"))
# exploratory data analysis with standard graphics: attendance by day of week
with(data=dodgers,plot(ordered_day_of_week, attend/1000,
xlab = "Day of Week", ylab = "Attendance (thousands)",
col = "violet", las = 1))
# when do the Dodgers use bobblehead promotions
with(dodgers, table(bobblehead,ordered_day_of_week)) # bobbleheads on Tuesday
## ordered_day_of_week
## bobblehead Mon Tue Wed Thur Fri Sat Sun
## NO 12 7 12 3 13 11 12
## YES 0 6 0 2 0 2 1
# define an ordered month variable
# for plots and data summaries
dodgers$ordered_month <- with(data=dodgers,
ifelse ((month == "APR"),4,
ifelse ((month == "MAY"),5,
ifelse ((month == "JUN"),6,
ifelse ((month == "JUL"),7,
ifelse ((month == "AUG"),8,
ifelse ((month == "SEP"),9,10)))))))
dodgers$ordered_month <- factor(dodgers$ordered_month, levels=4:10,
labels = c("April", "May", "June", "July", "Aug", "Sept", "Oct"))
# exploratory data analysis with standard R graphics: attendance by month
with(data=dodgers,plot(ordered_month,attend/1000, xlab = "Month",
ylab = "Attendance (thousands)", col = "light blue", las = 1))
# exploratory data analysis displaying many variables
# looking at attendance and conditioning on day/night
# the skies and whether or not fireworks are displayed
library(lattice) # used for plotting
# let us prepare a graphical summary of the dodgers data
group.labels <- c("No Fireworks","Fireworks")
group.symbols <- c(21,24)
group.colors <- c("black","black")
group.fill <- c("black","red")
xyplot(attend/1000 ~ temp | skies + day_night,
data = dodgers, groups = fireworks, pch = group.symbols,
aspect = 1, cex = 1.5, col = group.colors, fill = group.fill,
layout = c(2, 2), type = c("p","g"),
strip=strip.custom(strip.levels=TRUE,strip.names=FALSE, style=1),
xlab = "Temperature (Degrees Fahrenheit)",
ylab = "Attendance (thousands)",
key = list(space = "top",
text = list(rev(group.labels),col = rev(group.colors)),
points = list(pch = rev(group.symbols), col = rev(group.colors),
fill = rev(group.fill))))
# attendance by opponent and day/night game
group.labels <- c("Day","Night")
group.symbols <- c(1,20)
group.symbols.size <- c(2,2.75)
bwplot(opponent ~ attend/1000, data = dodgers, groups = day_night,
xlab = "Attendance (thousands)",
panel = function(x, y, groups, subscripts, ...)
{panel.grid(h = (length(levels(dodgers$opponent)) - 1), v = -1)
panel.stripplot(x, y, groups = groups, subscripts = subscripts,
cex = group.symbols.size, pch = group.symbols, col = "darkblue")
},
key = list(space = "top",
text = list(group.labels,col = "black"),
points = list(pch = group.symbols, cex = group.symbols.size,
col = "darkblue")))
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.