Checking numbers of observations used in analysis

Load packages

library(magrittr)
library(survival)

Linear models

cdata <- foreign::read.dta("http://www.ats.ucla.edu/stat/data/crime.dta")
ols <- lm(crime ~ poverty + single, data = cdata)
## The residual degrees of freedom + n of parameters is the n of obs
## 48 + 3 = 51
summary(ols)
## 
## Call:
## lm(formula = crime ~ poverty + single, data = cdata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -811.14 -114.27  -22.44  121.86  689.82 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1368.189    187.205  -7.308 2.48e-09 ***
## poverty         6.787      8.989   0.755    0.454    
## single        166.373     19.423   8.566 3.12e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 243.6 on 48 degrees of freedom
## Multiple R-squared:  0.7072, Adjusted R-squared:  0.695 
## F-statistic: 57.96 on 2 and 48 DF,  p-value: 1.578e-13
## Or you can check the row count of the design matrix.
nrow(model.matrix(ols))
## [1] 51
## Or you can check the fitted predicted value length
length(ols$fitted)
## [1] 51
## Or you can fit on-the-fly and check length
length(predict(ols))
## [1] 51

Generalized linear models

## Logistic regression
mydata <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")
mydata$rank <- factor(mydata$rank)
mylogit <- glm(admit ~ gre + gpa + rank, data = mydata, family = "binomial")
## The null deviance's degree of freedom is the number of obs - 1
## 399 + 1 = 400
summary(mylogit)
## 
## Call:
## glm(formula = admit ~ gre + gpa + rank, family = "binomial", 
##     data = mydata)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6268  -0.8662  -0.6388   1.1490   2.0790  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -3.989979   1.139951  -3.500 0.000465 ***
## gre          0.002264   0.001094   2.070 0.038465 *  
## gpa          0.804038   0.331819   2.423 0.015388 *  
## rank2       -0.675443   0.316490  -2.134 0.032829 *  
## rank3       -1.340204   0.345306  -3.881 0.000104 ***
## rank4       -1.551464   0.417832  -3.713 0.000205 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 499.98  on 399  degrees of freedom
## Residual deviance: 458.52  on 394  degrees of freedom
## AIC: 470.52
## 
## Number of Fisher Scoring iterations: 4
## Or you can check the row count of the design matrix.
nrow(model.matrix(mylogit))
## [1] 400
## Or you can check the fitted predicted value length
length(mylogit$fitted)
## [1] 400
## Or you can fit on-the-fly and check length
length(predict(mylogit))
## [1] 400