points<-read.csv("points.csv", header=TRUE)
attach(points)


library(ggplot2)
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v tibble  2.1.3     v purrr   0.3.2
## v tidyr   0.8.3     v dplyr   0.8.3
## v readr   1.3.1     v stringr 1.4.0
## v tibble  2.1.3     v forcats 0.4.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
names(points)
##  [1] "X"         "rallyid"   "server"    "returner"  "winner"   
##  [6] "reason"    "serve"     "strokes"   "totaltime" "x"        
## [11] "y"         "score"
glm.fit=glm(points$winner~points$strokes, family = "binomial")
glm.fit
## 
## Call:  glm(formula = points$winner ~ points$strokes, family = "binomial")
## 
## Coefficients:
##    (Intercept)  points$strokes  
##       -0.20068        -0.05967  
## 
## Degrees of Freedom: 141 Total (i.e. Null);  140 Residual
## Null Deviance:       187.6 
## Residual Deviance: 185.6     AIC: 189.6
summary(glm.fit)
## 
## Call:
## glm(formula = points$winner ~ points$strokes, family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0690  -0.9974  -0.8625   1.3160   1.7175  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -0.20068    0.28280  -0.710    0.478
## points$strokes -0.05967    0.04322  -1.381    0.167
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 187.63  on 141  degrees of freedom
## Residual deviance: 185.60  on 140  degrees of freedom
## AIC: 189.6
## 
## Number of Fisher Scoring iterations: 4
points$winner1<- ifelse(points$winner=="Djokovic", 1, 0)
ggplot(points, aes(x=points$strokes, y=points$winner1)) + geom_point() + stat_smooth(method="glm", method.args=list(family="binomial"(link = logit)), se=TRUE)

glm.fit1=glm(points$winner1~points$server, family = "binomial")
summary(glm.fit1)
## 
## Call:
## glm(formula = points$winner1 ~ points$server, family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8271  -1.0969   0.6462   0.6462   1.2601  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          1.4604     0.3079   4.744 2.10e-06 ***
## points$serverNadal  -1.6528     0.3874  -4.266 1.99e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 187.63  on 141  degrees of freedom
## Residual deviance: 167.31  on 140  degrees of freedom
## AIC: 171.31
## 
## Number of Fisher Scoring iterations: 4
glm.fit2=glm(points$winner1~points$server+points$strokes, family = "binomial")
summary(glm.fit2)
## 
## Call:
## glm(formula = points$winner1 ~ points$server + points$strokes, 
##     family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1686  -1.0033   0.6298   0.7310   1.3918  
## 
## Coefficients:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         1.11648    0.38034   2.935  0.00333 ** 
## points$serverNadal -1.67444    0.39127  -4.279 1.87e-05 ***
## points$strokes      0.06676    0.04607   1.449  0.14733    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 187.63  on 141  degrees of freedom
## Residual deviance: 165.09  on 139  degrees of freedom
## AIC: 171.09
## 
## Number of Fisher Scoring iterations: 4
glm.fit3=glm(points$winner1~points$reason+points$server + points$strokes + points$totaltime, family = "binomial")
summary(glm.fit3)
## 
## Call:
## glm(formula = points$winner1 ~ points$reason + points$server + 
##     points$strokes + points$totaltime, family = "binomial")
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3070  -0.9453   0.6066   0.7620   1.5071  
## 
## Coefficients:
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  1.45509    0.75180   1.935   0.0529 .  
## points$reasondouble_fault   15.75252 1029.12175   0.015   0.9878    
## points$reasonnet            -0.74017    0.87586  -0.845   0.3981    
## points$reasonout            -0.48643    0.82444  -0.590   0.5552    
## points$reasonwinner         -0.41239    0.84412  -0.489   0.6252    
## points$serverNadal          -1.75866    0.40343  -4.359 1.31e-05 ***
## points$strokes               0.11712    0.08303   1.411   0.1584    
## points$totaltime            -0.02634    0.07611  -0.346   0.7293    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 187.63  on 141  degrees of freedom
## Residual deviance: 160.02  on 134  degrees of freedom
## AIC: 176.02
## 
## Number of Fisher Scoring iterations: 14
points$reason1=as.numeric(points$reason)



ggplot(points, aes(x=reason, fill=winner)) + 
  geom_bar()+
  facet_grid(.~winner)

ggplot(points, aes(x=reason, fill=winner)) + 
  geom_bar()

ggplot(points, aes(x=reason, fill=winner)) + 
  geom_bar(position="fill")

points$sWin<-points$winner==points$server



ggplot(points, aes(x=sWin, fill=winner)) + 
  geom_bar(position="fill")

events<-read.csv("events.csv", header=TRUE)
View(events)

merged <- merge(points, events, by="rallyid")
View(merged)


ggplot(events, aes(x=hitter, fill=stroke)) + 
  geom_bar(position="fill")

ggplot(events, aes(x=hitter, fill=type)) + 
  geom_bar(position="fill")