1a. numeric outcome with numeric predictor 1b. numeric outcome with categorical predictor 2a. categorical outcome with numeric predictor 2b. categorical outcome with categorical predictor
“logistic regression is a type of regression analysis used for predicting a categorical outcome based on one or more predictor variables.”
crimedata = data.frame(lnPopulation = c(3.87, 3.61, 4.33, 3.43, 3.81, 3.83, 3.46, 3.76,3.50, 3.58, 4.19, 3.78, 3.71, 3.73,3.78),
lnCrimeRate = c(4.87, 3.93, 6.46, 3.33, 4.38, 4.70, 3.50, 4.50, 3.58, 3.64, 5.90, 4.43, 4.38, 4.42, 4.25))
plot(lnCrimeRate ~ lnPopulation, data = crimedata, xlab = "Population Density on log scale", ylab = "Crime Rate on log scale", main = "Crime Rate in Taiwan")
abline(lm(lnCrimeRate ~ lnPopulation, data = crimedata), col="blue")
crimedata = data.frame(PartyColor = c("B", "B", "G", "G", "B", "G", "G", "B","B", "G", "G", "G", "G", "B","B"),
lnCrimeRate = c(4.87, 3.93, 6.46, 3.33, 4.38, 4.70, 3.50, 4.50, 3.58, 3.64, 5.90, 4.43, 4.38, 4.42, 4.25))
plot(lnCrimeRate ~ PartyColor, data = crimedata, xlab = "Party Color of the Leader", ylab = "Crime Rate on log scale", main = "Crime Rate in Taiwan")
abline(glm(lnCrimeRate ~ PartyColor, data = crimedata), col="red")
Use xtabs() when you want to numerically study the distribution of one categorical variable, or the relationship between two categorical variables. Categorical variables are also called “factor” variables in R.
mydata <- data.frame( Status = c("Accepted", "Accepted", "Rejected", "Accepted", "Rejected", "Accepted", "Rejected", "Rejected", "Accepted", "Accepted"), Gender = c("Female", "Male", "Male", "Female", "Female", "Female", "Male", "Female", "Female", "Female") )
xtabs(~Status+Gender, data=mydata)
## Gender
## Status Female Male
## Accepted 5 1
## Rejected 2 2
If we use the crosstable function from package gmodels
library(gmodels)
CrossTable(mydata$Status, mydata$Gender)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 10
##
##
## | mydata$Gender
## mydata$Status | Female | Male | Row Total |
## --------------|-----------|-----------|-----------|
## Accepted | 5 | 1 | 6 |
## | 0.152 | 0.356 | |
## | 0.833 | 0.167 | 0.600 |
## | 0.714 | 0.333 | |
## | 0.500 | 0.100 | |
## --------------|-----------|-----------|-----------|
## Rejected | 2 | 2 | 4 |
## | 0.229 | 0.533 | |
## | 0.500 | 0.500 | 0.400 |
## | 0.286 | 0.667 | |
## | 0.200 | 0.200 | |
## --------------|-----------|-----------|-----------|
## Column Total | 7 | 3 | 10 |
## | 0.700 | 0.300 | |
## --------------|-----------|-----------|-----------|
##
##
Use as.numeric when you want to convert two level categorical variables to numbers. Categorical variables are also called “factor” variables in R.
x = c(52,65,47,66,66,71,64,13,26,75,35,10,39,67,55,33,54,66,23,45)
y = c("F","P","F","P","P","P","P","F","F","P","F","F","F","P","F","F","F","P","F","F")
y1 = as.factor(y)
z = as.numeric(y1)
plot(x,z,xlab = "Final Grade", ylab = "Fail or Pass", main = "Categorical Outcome with numeric predictor")
sigmoid = function(x) {1 + 1 / (1 + exp(-x))}
x1 <- seq(-60, 20, 0.1)
plot(x1, sigmoid(x1), col='blue',xlab = "Normalized Final Grade", ylab = "Fail or Pass", main = "Sigmoid Model - Nonlinear Regression")