Data622_Assignment1

age.group	networth	status	credit_rating	classprospect
youth	high	employed	fair	no
youth	high	employed	excellent	no
middle	high	employed	fair	yes
senior	medium	employed	fair	yes
senior	low	unemployed	fair	yes
senior	low	unemployed	excellent	no
middle	low	unemployed	excellent	yes
youth	medium	employed	fair	no
youth	low	unemployed	fair	yes
senior	medium	unemployed	fair	yes
youth	medium	unemployed	excellent	yes
middle	medium	employed	excellent	yes
middle	high	unemployed	fair	yes
senior	medium	employed	excellent	no

Calculate the prior probabilties

P(Yes) P(No)

This calculated without seeing the entire dataset and is determined with total no of each lables in the dataset.

p_yes <- 9/14
p_no <- 5/14

cond_prob(df, col:val, lbl_val )

cond_prob <- function(df, A, B){
  A <- unlist(strsplit(A, ':'))
  B <- unlist(strsplit(B, ':'))
  df1 <- df[(df[A[1]] == A[2] & df[B[1]] == B[2]), ]
  df2 <- df[(df[B[1]] == B[2]), ]
  print(paste(toString(nrow(df1)),  toString(nrow(df2)), sep='/'))
  nrow(df1)/nrow(df2)
}

P_age_group_youth_prospect_yes <- cond_prob(df, 'age.group:youth', 'classprospect:yes')

## [1] "2/9"

print(paste("P(age-group=youth|prospect=yes) = ", toString(round(P_age_group_youth_prospect_yes, 2))))

## [1] "P(age-group=youth|prospect=yes) =  0.22"

P_age_group_middle_prospect_yes <- cond_prob(df, 'age.group:middle', 'classprospect:yes')

## [1] "4/9"

print(paste("P(age-group=middle|prospect=yes) = ", toString(round(P_age_group_middle_prospect_yes, 2))))

## [1] "P(age-group=middle|prospect=yes) =  0.44"

P_age_group_senior_prospect_yes <- cond_prob(df, 'age.group:senior', 'classprospect:yes')

## [1] "3/9"

print(paste("P(age-group=senior|prospect=yes) = ", toString(round(P_age_group_senior_prospect_yes, 2))))

## [1] "P(age-group=senior|prospect=yes) =  0.33"

P_age_group_youth_prospect_no<- cond_prob(df, 'age.group:youth', 'classprospect:no')

## [1] "3/5"

print(paste("P(age-group=youth|prospect=no) = ", toString(round(P_age_group_youth_prospect_no, 2))))

## [1] "P(age-group=youth|prospect=no) =  0.6"

P_age_group_middle_prospect_no <- cond_prob(df, 'age.group:middle', 'classprospect:no')

## [1] "0/5"

print(paste("P(age-group=middle|prospect=no) = ", toString(round(P_age_group_middle_prospect_no, 2))))

## [1] "P(age-group=middle|prospect=no) =  0"

P_age_group_senior_prospect_no <- cond_prob(df, 'age.group:senior', 'classprospect:no')

## [1] "2/5"

print(paste("P(age-group=senior|prospect=no) = ", toString(round(P_age_group_senior_prospect_no, 2))))

## [1] "P(age-group=senior|prospect=no) =  0.4"

P_networth_high_prospect_yes <- cond_prob(df, 'networth:high', 'classprospect:yes')

## [1] "2/9"

print(paste("P(networth=high|prospect=yes) = ", toString(round(P_networth_high_prospect_yes, 2))))

## [1] "P(networth=high|prospect=yes) =  0.22"

P_networth_low_prospect_yes <-  cond_prob(df, 'networth:low', 'classprospect:yes')

## [1] "3/9"

print(paste("P(networth=low|prospect=yes) = ", toString(round(P_networth_low_prospect_yes, 2))))

## [1] "P(networth=low|prospect=yes) =  0.33"

P_networth_medium_prospect_yes <- cond_prob(df, 'networth:medium', 'classprospect:yes')

## [1] "4/9"

print(paste("P(networth=medium|prospect=yes) = ", toString(round(P_networth_medium_prospect_yes, 2))))

## [1] "P(networth=medium|prospect=yes) =  0.44"

P_networth_high_prospect_no <-  cond_prob(df, 'networth:high', 'classprospect:no')

## [1] "2/5"

print(paste("P(networth=high|prospect=no) = ", toString(round(P_networth_high_prospect_no, 2))))

## [1] "P(networth=high|prospect=no) =  0.4"

P_networth_low_prospect_no <-  cond_prob(df, 'networth:low', 'classprospect:no')

## [1] "1/5"

print(paste("P(networth=low|prospect=no) = ", toString(round(P_networth_low_prospect_no, 2))))

## [1] "P(networth=low|prospect=no) =  0.2"

P_networth_medium_prospect_no <- cond_prob(df, 'networth:medium', 'classprospect:no')

## [1] "2/5"

print(paste("P(networth=medium|prospect=no) = ", toString(round(P_networth_medium_prospect_no, 2))))

## [1] "P(networth=medium|prospect=no) =  0.4"

P_status_employed_prospect_yes <- cond_prob(df, 'status:employed', 'classprospect:yes')

## [1] "3/9"

print(paste("P(status=employed|prospect=yes) = ", toString(round(P_status_employed_prospect_yes, 2))))

## [1] "P(status=employed|prospect=yes) =  0.33"

P_status_employed_prospect_no <- cond_prob(df, 'status:employed', 'classprospect:no')

## [1] "4/5"

print(paste("P(status=employed|prospect=no) = ", toString(round(P_status_employed_prospect_no, 2))))

## [1] "P(status=employed|prospect=no) =  0.8"

P_status_unemployed_prospect_yes <- cond_prob(df, 'status:unemployed', 'classprospect:yes')

## [1] "6/9"

print(paste("P(status=unemployed|prospect=yes) = ", toString(round(P_status_unemployed_prospect_yes, 2))))

## [1] "P(status=unemployed|prospect=yes) =  0.67"

P_status_unemployed_prospect_no <- cond_prob(df, 'status:unemployed', 'classprospect:no')

## [1] "1/5"

print(paste("P(status=unemployed|prospect=no) = ", toString(round(P_status_unemployed_prospect_no, 2))))

## [1] "P(status=unemployed|prospect=no) =  0.2"

P_credit_fair_prospect_yes <- cond_prob(df, 'credit_rating:fair', 'classprospect:yes')

## [1] "6/9"

print(paste("P(credit=fair|prospect=yes) = ", toString(round(P_credit_fair_prospect_yes, 2))))

## [1] "P(credit=fair|prospect=yes) =  0.67"

P_credit_fair_prospect_no  <- cond_prob(df, 'credit_rating:fair', 'classprospect:no')

## [1] "2/5"

print(paste("P(credit=fair|prospect=no) = ", toString(round(P_credit_fair_prospect_no, 2))))

## [1] "P(credit=fair|prospect=no) =  0.4"

P_credit_excellent_prospect_yes  <- cond_prob(df, 'credit_rating:excellent', 'classprospect:yes')

## [1] "3/9"

print(paste("P(credit=excellent|prospect=yes) = ", toString(round(P_credit_excellent_prospect_yes, 2))))

## [1] "P(credit=excellent|prospect=yes) =  0.33"

P_credit_excellent_prospect_no   <- cond_prob(df, 'credit_rating:excellent', 'classprospect:no')

## [1] "3/5"

print(paste("P(credit=excellent|prospect=no) = ", toString(round(P_credit_excellent_prospect_no, 2))))

## [1] "P(credit=excellent|prospect=no) =  0.6"

Data622_Assignment1

Charls Joseph

March 25, 2020

R Markdown

Calculate the prior probabilties