set.seed(234)
GenderAndProduct=expand.grid(Gender=c("M","F"),Product=c(LETTERS[1:3]))
Price =rpois(6,4)
DataSet=cbind(GenderAndProduct,Price)
DataSet
## Gender Product Price
## 1 M A 5
## 2 F A 5
## 3 M B 1
## 4 F B 5
## 5 M C 1
## 6 F C 5
str(DataSet)
## 'data.frame': 6 obs. of 3 variables:
## $ Gender : Factor w/ 2 levels "M","F": 1 2 1 2 1 2
## $ Product: Factor w/ 3 levels "A","B","C": 1 1 2 2 3 3
## $ Price : int 5 5 1 5 1 5
Data set is already factorized!
library(vcdExtra)
## Warning: package 'vcdExtra' was built under R version 3.4.4
## Loading required package: vcd
## Warning: package 'vcd' was built under R version 3.4.4
## Loading required package: grid
## Loading required package: gnm
## Warning: package 'gnm' was built under R version 3.4.4
data("WomenQueue", package="vcdExtra")
## Warning in data("WomenQueue", package = "vcdExtra"): data set 'WomenQueue'
## not found
WomenQueue
## nWomen
## 0 1 2 3 4 5 6 7 8 9 10
## 1 3 4 23 25 19 18 5 1 1 0
barplot(WomenQueue)
Women_Ord=Ord_plot(obj=WomenQueue,main="Women Queue")
Women_Ord
## Intercept Slope
## 11.471435 -1.121733
Since slope is Negative and Intercept is positive, it is determined that this is a Binomial Distribution.
library("vcd")
data("Hospital",package="vcd")
Hospital
## Length of stay
## Visit frequency 2-9 10-19 20+
## Regular 43 16 3
## Less than monthly 6 11 10
## Never 9 18 16
prop.table(Hospital,1)
## Length of stay
## Visit frequency 2-9 10-19 20+
## Regular 0.6935484 0.2580645 0.0483871
## Less than monthly 0.2222222 0.4074074 0.3703704
## Never 0.2093023 0.4186047 0.3720930
#See Rows
prop.table(Hospital,2)
## Length of stay
## Visit frequency 2-9 10-19 20+
## Regular 0.7413793 0.3555556 0.1034483
## Less than monthly 0.1034483 0.2444444 0.3448276
## Never 0.1551724 0.4000000 0.5517241
#See Columns
chisq.test(Hospital)
##
## Pearson's Chi-squared test
##
## data: Hospital
## X-squared = 35.171, df = 4, p-value = 4.284e-07
As the p-value is smaller than the .05 significance level, we reject the null hypothesis. Therefore variables are associated, there is no dependency.
chisq.test(Hospital, simulate.p.value = TRUE)
##
## Pearson's Chi-squared test with simulated p-value (based on 2000
## replicates)
##
## data: Hospital
## X-squared = 35.171, df = NA, p-value = 0.0004998
Still under the Monte Carlo Simulation, we see that the p-value is smaller than alpha.It can be reassured that the variables are associated.
library(MASS)
## Warning: package 'MASS' was built under R version 3.4.4
MASS::loglm(~1+2,data = Hospital,fitted=TRUE)
## Call:
## MASS::loglm(formula = ~1 + 2, data = Hospital, fitted = TRUE)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 38.35297 4 9.475535e-08
## Pearson 35.17109 4 4.284198e-07
assocstats(Hospital)
## X^2 df P(> X^2)
## Likelihood Ratio 38.353 4 9.4755e-08
## Pearson 35.171 4 4.2842e-07
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.459
## Cramer's V : 0.365
Cramer’s V shows that there is association.
CMHtest(Hospital)
## Cochran-Mantel-Haenszel Statistics for Visit frequency by Length of stay
##
## AltHypothesis Chisq Df Prob
## cor Nonzero correlation 29.138 1 6.7393e-08
## rmeans Row mean scores differ 34.391 2 3.4044e-08
## cmeans Col mean scores differ 29.607 2 3.7233e-07
## general General association 34.905 4 4.8596e-07
Nonzero correlation - Data set considerd both rows and columns as ordinal (ordered) Rmeans Row - Data set only considerd Row as oridnals. Rmeans Col - Data set only considerd Columns as oridnals. General Association - Data set is not considered ordinal at all.
mosaic(Hospital,shade = TRUE, labeling=labeling_residuals)
library(ca)
## Warning: package 'ca' was built under R version 3.4.4
ca(Hospital)
##
## Principal inertias (eigenvalues):
## 1 2
## Value 0.266408 4e-05
## Percentage 99.98% 0.02%
##
##
## Rows:
## Regular Less than monthly Never
## Mass 0.469697 0.204545 0.325758
## ChiDist 0.548379 0.472911 0.493871
## Inertia 0.141247 0.045745 0.079455
## Dim. 1 -1.062448 0.915982 0.956750
## Dim. 2 0.015355 -1.746386 1.074428
##
##
## Columns:
## 2-9 10-19 20+
## Mass 0.439394 0.340909 0.219697
## ChiDist 0.544465 0.228999 0.733852
## Inertia 0.130255 0.017878 0.118315
## Dim. 1 -1.054853 0.443377 1.421707
## Dim. 2 -0.403915 1.317858 -1.237123
plot(ca(Hospital))
data("Titanic")
Titanic
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0 0
## 2nd 0 0
## 3rd 35 17
## Crew 0 0
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118 4
## 2nd 154 13
## 3rd 387 89
## Crew 670 3
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5 1
## 2nd 11 13
## 3rd 13 14
## Crew 0 0
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57 140
## 2nd 14 80
## 3rd 75 76
## Crew 192 20
Titanic = Titanic + 0.2
Titanic
## , , Age = Child, Survived = No
##
## Sex
## Class Male Female
## 1st 0.2 0.2
## 2nd 0.2 0.2
## 3rd 35.2 17.2
## Crew 0.2 0.2
##
## , , Age = Adult, Survived = No
##
## Sex
## Class Male Female
## 1st 118.2 4.2
## 2nd 154.2 13.2
## 3rd 387.2 89.2
## Crew 670.2 3.2
##
## , , Age = Child, Survived = Yes
##
## Sex
## Class Male Female
## 1st 5.2 1.2
## 2nd 11.2 13.2
## 3rd 13.2 14.2
## Crew 0.2 0.2
##
## , , Age = Adult, Survived = Yes
##
## Sex
## Class Male Female
## 1st 57.2 140.2
## 2nd 14.2 80.2
## 3rd 75.2 76.2
## Crew 192.2 20.2
Hospitals_Model1= loglm(~Age+Sex+Class+Survived, data = Titanic)
Hospitals_Model1
## Call:
## loglm(formula = ~Age + Sex + Class + Survived, data = Titanic)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 1229.854 25 0
## Pearson 1625.300 25 0
Hospitals_Model2= loglm(~(Age*Sex*Class)+Survived, data=Titanic)
Hospitals_Model2
## Call:
## loglm(formula = ~(Age * Sex * Class) + Survived, data = Titanic)
##
## Statistics:
## X^2 df P(> X^2)
## Likelihood Ratio 665.5888 15 0
## Pearson 647.1235 15 0
plot(Hospitals_Model2, main = "Sex, Age, and Class jointly Indepnedent of Survival")