points<-read.csv("points.csv", header=TRUE)
attach(points)
library(ggplot2)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 0.8.3 v dplyr 0.8.3
## v readr 1.3.1 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
names(points)
## [1] "X" "rallyid" "server" "returner" "winner"
## [6] "reason" "serve" "strokes" "totaltime" "x"
## [11] "y" "score"
glm.fit=glm(points$winner~points$strokes, family = "binomial")
glm.fit
##
## Call: glm(formula = points$winner ~ points$strokes, family = "binomial")
##
## Coefficients:
## (Intercept) points$strokes
## -0.20068 -0.05967
##
## Degrees of Freedom: 141 Total (i.e. Null); 140 Residual
## Null Deviance: 187.6
## Residual Deviance: 185.6 AIC: 189.6
summary(glm.fit)
##
## Call:
## glm(formula = points$winner ~ points$strokes, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.0690 -0.9974 -0.8625 1.3160 1.7175
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.20068 0.28280 -0.710 0.478
## points$strokes -0.05967 0.04322 -1.381 0.167
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 187.63 on 141 degrees of freedom
## Residual deviance: 185.60 on 140 degrees of freedom
## AIC: 189.6
##
## Number of Fisher Scoring iterations: 4
points$winner1<- ifelse(points$winner=="Djokovic", 1, 0)
ggplot(points, aes(x=points$strokes, y=points$winner1)) + geom_point() + stat_smooth(method="glm", method.args=list(family="binomial"(link = logit)), se=TRUE)

glm.fit1=glm(points$winner1~points$server, family = "binomial")
summary(glm.fit1)
##
## Call:
## glm(formula = points$winner1 ~ points$server, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8271 -1.0969 0.6462 0.6462 1.2601
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.4604 0.3079 4.744 2.10e-06 ***
## points$serverNadal -1.6528 0.3874 -4.266 1.99e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 187.63 on 141 degrees of freedom
## Residual deviance: 167.31 on 140 degrees of freedom
## AIC: 171.31
##
## Number of Fisher Scoring iterations: 4
glm.fit2=glm(points$winner1~points$server+points$strokes, family = "binomial")
summary(glm.fit2)
##
## Call:
## glm(formula = points$winner1 ~ points$server + points$strokes,
## family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1686 -1.0033 0.6298 0.7310 1.3918
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.11648 0.38034 2.935 0.00333 **
## points$serverNadal -1.67444 0.39127 -4.279 1.87e-05 ***
## points$strokes 0.06676 0.04607 1.449 0.14733
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 187.63 on 141 degrees of freedom
## Residual deviance: 165.09 on 139 degrees of freedom
## AIC: 171.09
##
## Number of Fisher Scoring iterations: 4
glm.fit3=glm(points$winner1~points$reason+points$server + points$strokes + points$totaltime, family = "binomial")
summary(glm.fit3)
##
## Call:
## glm(formula = points$winner1 ~ points$reason + points$server +
## points$strokes + points$totaltime, family = "binomial")
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3070 -0.9453 0.6066 0.7620 1.5071
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.45509 0.75180 1.935 0.0529 .
## points$reasondouble_fault 15.75252 1029.12175 0.015 0.9878
## points$reasonnet -0.74017 0.87586 -0.845 0.3981
## points$reasonout -0.48643 0.82444 -0.590 0.5552
## points$reasonwinner -0.41239 0.84412 -0.489 0.6252
## points$serverNadal -1.75866 0.40343 -4.359 1.31e-05 ***
## points$strokes 0.11712 0.08303 1.411 0.1584
## points$totaltime -0.02634 0.07611 -0.346 0.7293
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 187.63 on 141 degrees of freedom
## Residual deviance: 160.02 on 134 degrees of freedom
## AIC: 176.02
##
## Number of Fisher Scoring iterations: 14
points$reason1=as.numeric(points$reason)
ggplot(points, aes(x=reason, fill=winner)) +
geom_bar()+
facet_grid(.~winner)

ggplot(points, aes(x=reason, fill=winner)) +
geom_bar()

ggplot(points, aes(x=reason, fill=winner)) +
geom_bar(position="fill")

points$sWin<-points$winner==points$server
ggplot(points, aes(x=sWin, fill=winner)) +
geom_bar(position="fill")

events<-read.csv("events.csv", header=TRUE)
View(events)
merged <- merge(points, events, by="rallyid")
View(merged)
ggplot(events, aes(x=hitter, fill=stroke)) +
geom_bar(position="fill")

ggplot(events, aes(x=hitter, fill=type)) +
geom_bar(position="fill")
