# Set working directory
# getwd()
new_data <- read.table('class11.dat', sep=' ')
names(new_data) <- c('R0000100','R0536300','R1482600','Z0520200')
# Handle missing values
new_data[new_data == -1] = NA # Refused
new_data[new_data == -2] = NA # Dont know
new_data[new_data == -3] = NA # Invalid missing
new_data[new_data == -4] = NA # Valid missing
new_data[new_data == -5] = NA # Non-interview
# If there are values not categorized they will be represented as NA
vallabels = function(data) {
data$R0000100 <- cut(data$R0000100, c(0.0,1.0,1000.0,2000.0,3000.0,4000.0,5000.0,6000.0,7000.0,8000.0,9000.0,9999.0), labels=c("0","1 TO 999","1000 TO 1999","2000 TO 2999","3000 TO 3999","4000 TO 4999","5000 TO 5999","6000 TO 6999","7000 TO 7999","8000 TO 8999","9000 TO 9999"), right=FALSE)
data$R0536300 <- factor(data$R0536300, levels=c(1.0,2.0,0.0), labels=c("Male","Female","No Information"))
data$R1482600 <- factor(data$R1482600, levels=c(1.0,2.0,3.0,4.0), labels=c("Black","Hispanic","Mixed Race (Non-Hispanic)","Non-Black / Non-Hispanic"))
data$Z0520200 <- factor(data$Z0520200, levels=c(1.0,0.0), labels=c("YES","NO"))
return(data)
}
varlabels <- c( "PUBID - YTH ID CODE 1997",
"KEY!SEX (SYMBOL) 1997",
"KEY!RACE_ETHNICITY (SYMBOL) 1997",
"SP/PAR STILL PAYING OFF EDUC LOANS?"
)
# Use qnames rather than rnums
qnames = function(data) {
names(data) <- c("PUBID_1997","KEY_SEX_1997","KEY_RACE_ETHNICITY_1997","YAST30-5040_NEW_COMB_XRND")
return(data)
}
#********************************************************************************************************
# Remove the '#' before the following line to create a data file called "categories" with value labels.
categories <- vallabels(new_data)
# Remove the '#' before the following lines to rename variables using Qnames instead of Reference Numbers
new_data <- qnames(new_data)
categories <- qnames(categories)
# Produce summaries for the raw (uncategorized) data file
summary(new_data)
## PUBID_1997 KEY_SEX_1997 KEY_RACE_ETHNICITY_1997
## Min. : 1 Min. :1.000 Min. :1.000
## 1st Qu.:2249 1st Qu.:1.000 1st Qu.:1.000
## Median :4502 Median :1.000 Median :4.000
## Mean :4504 Mean :1.488 Mean :2.788
## 3rd Qu.:6758 3rd Qu.:2.000 3rd Qu.:4.000
## Max. :9022 Max. :2.000 Max. :4.000
##
## YAST30-5040_NEW_COMB_XRND
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :0.237
## 3rd Qu.:0.000
## Max. :1.000
## NA's :5627
# Remove the '#' before the following lines to produce summaries for the "categories" data file.
#categories <- vallabels(new_data)
#summary(categories)
#************************************************************************************************************
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
nls <- tbl_df(new_data)
summary(nls)
## PUBID_1997 KEY_SEX_1997 KEY_RACE_ETHNICITY_1997
## Min. : 1 Min. :1.000 Min. :1.000
## 1st Qu.:2249 1st Qu.:1.000 1st Qu.:1.000
## Median :4502 Median :1.000 Median :4.000
## Mean :4504 Mean :1.488 Mean :2.788
## 3rd Qu.:6758 3rd Qu.:2.000 3rd Qu.:4.000
## Max. :9022 Max. :2.000 Max. :4.000
##
## YAST30-5040_NEW_COMB_XRND
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :0.237
## 3rd Qu.:0.000
## Max. :1.000
## NA's :5627
require(magrittr)
## Loading required package: magrittr
nls<-nls%>%filter(`YAST30-5040_NEW_COMB_XRND`>=0)
summary(nls)
## PUBID_1997 KEY_SEX_1997 KEY_RACE_ETHNICITY_1997
## Min. : 3 Min. :1.00 Min. :1.000
## 1st Qu.:2321 1st Qu.:1.00 1st Qu.:2.000
## Median :4372 Median :2.00 Median :4.000
## Mean :4395 Mean :1.52 Mean :2.988
## 3rd Qu.:6390 3rd Qu.:2.00 3rd Qu.:4.000
## Max. :9022 Max. :2.00 Max. :4.000
## YAST30-5040_NEW_COMB_XRND
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2371
## 3rd Qu.:0.0000
## Max. :1.0000
1.Compute in R a three???way contingency table that is a crosstabulation of the three categorical variables you selected.
povtable <- xtabs(~nls$KEY_SEX_1997+nls$KEY_RACE_ETHNICITY_1997+nls$`YAST30-5040_NEW_COMB_XRND`)
ftable(povtable)
## nls$`YAST30-5040_NEW_COMB_XRND` 0 1
## nls$KEY_SEX_1997 nls$KEY_RACE_ETHNICITY_1997
## 1 1 241 89
## 2 276 77
## 3 12 3
## 4 618 297
## 2 1 235 55
## 2 344 55
## 3 16 1
## 4 819 219
summary(povtable)
## Call: xtabs(formula = ~nls$KEY_SEX_1997 + nls$KEY_RACE_ETHNICITY_1997 +
## nls$`YAST30-5040_NEW_COMB_XRND`)
## Number of cases in table: 3357
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 81.37, df = 10, p-value = 2.709e-13
## Chi-squared approximation may be incorrect
State the null hypothesis and its alternative. H0: x^2 = 0 H1: x^2 ≠ 0
Select a test statistic. Use cross tabulation analysis among three variables.
Establish a level of α ??? the probability of type 1 error ???that is acceptable. Set α = .05.
Collect the data and calculate the text statistic. Chisq = 81.37, df = 10, p-value = 2.709e-13 alternative hypothesis: x^2 is not equal to 0
Make a decision based on the value of the test statistic. The p-value associated with a chi-squared of 81.37 equals 2.709e-13 which is > α. Therefore, null cannot be rejected at α = .05.
Report the decision to reject or fail to reject the null hypothesis. Sex, race, and education loan are independent.