df <- read.csv("C:\\Users\\Charls\\Documents\\CunyMSDS\\Data622\\Assignments\\HW1\\Qn1\\data.csv")
kable(df)
| age.group | networth | status | credit_rating | classprospect |
|---|---|---|---|---|
| youth | high | employed | fair | no |
| youth | high | employed | excellent | no |
| middle | high | employed | fair | yes |
| senior | medium | employed | fair | yes |
| senior | low | unemployed | fair | yes |
| senior | low | unemployed | excellent | no |
| middle | low | unemployed | excellent | yes |
| youth | medium | employed | fair | no |
| youth | low | unemployed | fair | yes |
| senior | medium | unemployed | fair | yes |
| youth | medium | unemployed | excellent | yes |
| middle | medium | employed | excellent | yes |
| middle | high | unemployed | fair | yes |
| senior | medium | employed | excellent | no |
P(Yes) P(No)
This calculated without seeing the entire dataset and is determined with total no of each lables in the dataset.
p_yes <- 9/14
p_no <- 5/14
cond_prob(df, col:val, lbl_val )
cond_prob <- function(df, A, B){
A <- unlist(strsplit(A, ':'))
B <- unlist(strsplit(B, ':'))
df1 <- df[(df[A[1]] == A[2] & df[B[1]] == B[2]), ]
df2 <- df[(df[B[1]] == B[2]), ]
print(paste(toString(nrow(df1)), toString(nrow(df2)), sep='/'))
nrow(df1)/nrow(df2)
}
P_age_group_youth_prospect_yes <- cond_prob(df, 'age.group:youth', 'classprospect:yes')
## [1] "2/9"
print(paste("P(age-group=youth|prospect=yes) = ", toString(round(P_age_group_youth_prospect_yes, 2))))
## [1] "P(age-group=youth|prospect=yes) = 0.22"
P_age_group_middle_prospect_yes <- cond_prob(df, 'age.group:middle', 'classprospect:yes')
## [1] "4/9"
print(paste("P(age-group=middle|prospect=yes) = ", toString(round(P_age_group_middle_prospect_yes, 2))))
## [1] "P(age-group=middle|prospect=yes) = 0.44"
P_age_group_senior_prospect_yes <- cond_prob(df, 'age.group:senior', 'classprospect:yes')
## [1] "3/9"
print(paste("P(age-group=senior|prospect=yes) = ", toString(round(P_age_group_senior_prospect_yes, 2))))
## [1] "P(age-group=senior|prospect=yes) = 0.33"
P_age_group_youth_prospect_no<- cond_prob(df, 'age.group:youth', 'classprospect:no')
## [1] "3/5"
print(paste("P(age-group=youth|prospect=no) = ", toString(round(P_age_group_youth_prospect_no, 2))))
## [1] "P(age-group=youth|prospect=no) = 0.6"
P_age_group_middle_prospect_no <- cond_prob(df, 'age.group:middle', 'classprospect:no')
## [1] "0/5"
print(paste("P(age-group=middle|prospect=no) = ", toString(round(P_age_group_middle_prospect_no, 2))))
## [1] "P(age-group=middle|prospect=no) = 0"
P_age_group_senior_prospect_no <- cond_prob(df, 'age.group:senior', 'classprospect:no')
## [1] "2/5"
print(paste("P(age-group=senior|prospect=no) = ", toString(round(P_age_group_senior_prospect_no, 2))))
## [1] "P(age-group=senior|prospect=no) = 0.4"
P_networth_high_prospect_yes <- cond_prob(df, 'networth:high', 'classprospect:yes')
## [1] "2/9"
print(paste("P(networth=high|prospect=yes) = ", toString(round(P_networth_high_prospect_yes, 2))))
## [1] "P(networth=high|prospect=yes) = 0.22"
P_networth_low_prospect_yes <- cond_prob(df, 'networth:low', 'classprospect:yes')
## [1] "3/9"
print(paste("P(networth=low|prospect=yes) = ", toString(round(P_networth_low_prospect_yes, 2))))
## [1] "P(networth=low|prospect=yes) = 0.33"
P_networth_medium_prospect_yes <- cond_prob(df, 'networth:medium', 'classprospect:yes')
## [1] "4/9"
print(paste("P(networth=medium|prospect=yes) = ", toString(round(P_networth_medium_prospect_yes, 2))))
## [1] "P(networth=medium|prospect=yes) = 0.44"
P_networth_high_prospect_no <- cond_prob(df, 'networth:high', 'classprospect:no')
## [1] "2/5"
print(paste("P(networth=high|prospect=no) = ", toString(round(P_networth_high_prospect_no, 2))))
## [1] "P(networth=high|prospect=no) = 0.4"
P_networth_low_prospect_no <- cond_prob(df, 'networth:low', 'classprospect:no')
## [1] "1/5"
print(paste("P(networth=low|prospect=no) = ", toString(round(P_networth_low_prospect_no, 2))))
## [1] "P(networth=low|prospect=no) = 0.2"
P_networth_medium_prospect_no <- cond_prob(df, 'networth:medium', 'classprospect:no')
## [1] "2/5"
print(paste("P(networth=medium|prospect=no) = ", toString(round(P_networth_medium_prospect_no, 2))))
## [1] "P(networth=medium|prospect=no) = 0.4"
P_status_employed_prospect_yes <- cond_prob(df, 'status:employed', 'classprospect:yes')
## [1] "3/9"
print(paste("P(status=employed|prospect=yes) = ", toString(round(P_status_employed_prospect_yes, 2))))
## [1] "P(status=employed|prospect=yes) = 0.33"
P_status_employed_prospect_no <- cond_prob(df, 'status:employed', 'classprospect:no')
## [1] "4/5"
print(paste("P(status=employed|prospect=no) = ", toString(round(P_status_employed_prospect_no, 2))))
## [1] "P(status=employed|prospect=no) = 0.8"
P_status_unemployed_prospect_yes <- cond_prob(df, 'status:unemployed', 'classprospect:yes')
## [1] "6/9"
print(paste("P(status=unemployed|prospect=yes) = ", toString(round(P_status_unemployed_prospect_yes, 2))))
## [1] "P(status=unemployed|prospect=yes) = 0.67"
P_status_unemployed_prospect_no <- cond_prob(df, 'status:unemployed', 'classprospect:no')
## [1] "1/5"
print(paste("P(status=unemployed|prospect=no) = ", toString(round(P_status_unemployed_prospect_no, 2))))
## [1] "P(status=unemployed|prospect=no) = 0.2"
P_credit_fair_prospect_yes <- cond_prob(df, 'credit_rating:fair', 'classprospect:yes')
## [1] "6/9"
print(paste("P(credit=fair|prospect=yes) = ", toString(round(P_credit_fair_prospect_yes, 2))))
## [1] "P(credit=fair|prospect=yes) = 0.67"
P_credit_fair_prospect_no <- cond_prob(df, 'credit_rating:fair', 'classprospect:no')
## [1] "2/5"
print(paste("P(credit=fair|prospect=no) = ", toString(round(P_credit_fair_prospect_no, 2))))
## [1] "P(credit=fair|prospect=no) = 0.4"
P_credit_excellent_prospect_yes <- cond_prob(df, 'credit_rating:excellent', 'classprospect:yes')
## [1] "3/9"
print(paste("P(credit=excellent|prospect=yes) = ", toString(round(P_credit_excellent_prospect_yes, 2))))
## [1] "P(credit=excellent|prospect=yes) = 0.33"
P_credit_excellent_prospect_no <- cond_prob(df, 'credit_rating:excellent', 'classprospect:no')
## [1] "3/5"
print(paste("P(credit=excellent|prospect=no) = ", toString(round(P_credit_excellent_prospect_no, 2))))
## [1] "P(credit=excellent|prospect=no) = 0.6"