Assignment 1

Question 1

You have been hired by a local electronics retailer and the above dataset has been given to you.Manager Bayes Jr.9th wants to create a spreadsheet to predict is a customer is likely prospect. To that end

1)Compute prior probabilities for the Prospect Yes/No

  1. Compute the conditional probabilities: P(age-group=youth|prospect=yes) and P(age-group=youth|prospect=no)

where age-group is a predictor variable. Compute the conditional probabilities for each predictor variable namely

  1. Assuming the assumptions of Naive Bayes are met P(prospect|X) where X is one of the predictor variable.
# import the required dataset
data<-read.csv("https://raw.githubusercontent.com/maharjansudhan/DATA622/master/HW1-Q1-40.csv",header=TRUE,sep=",")
head(data)
##   age.group networth     status credit_rating class.prospect
## 1     youth     high   employed          fair             no
## 2     youth     high   employed     excellent             no
## 3    middle     high   employed          fair            yes
## 4    senior   medium   employed          fair            yes
## 5    senior      low unemployed          fair            yes
## 6    senior      low unemployed     excellent             no

1) Compute prior probabilities for the Prospect Yes/No

# Prior Probabilities: 
#age.group: senior
prior_age.group_senior<-nrow(data[data$age.group=="senior",]) / nrow(data)
prior_age.group_senior<-round(prior_age.group_senior,2)
prior_age.group_senior
## [1] 0.36
#age.group:middle
prior_age.group_middle<-nrow(data[data$age.group=="middle",]) / nrow(data)
prior_age.group_middle<-round(prior_age.group_middle,2)
prior_age.group_middle
## [1] 0.29
#age.group:youth
prior_age.group_youth<-nrow(data[data$age.group=="youth",]) / nrow(data)
prior_age.group_youth<-round(prior_age.group_youth,2)
prior_age.group_youth
## [1] 0.36
#networth:high
prior_networth_high<-nrow(data[data$networth=="high",]) / nrow(data)
prior_networth_high<-round(prior_networth_high,2)
prior_networth_high
## [1] 0.29
#networth:medium
prior_networth_medium<-nrow(data[data$networth=="medium",]) / nrow(data)
prior_networth_medium<-round(prior_networth_medium,2)
prior_networth_medium
## [1] 0.43
#networth:low
prior_networth_low<-nrow(data[data$networth=="low",]) / nrow(data)
prior_networth_low<-round(prior_networth_low,2)
prior_networth_low
## [1] 0.29
#status:employed
prior_status_employed<-nrow(data[data$status=="employed",]) / nrow(data)
prior_status_employed<-round(prior_status_employed,2)
prior_status_employed
## [1] 0.5
#status:unemployed
prior_status_unemployed<-nrow(data[data$status=="unemployed",]) / nrow(data)
prior_status_unemployed<-round(prior_status_unemployed,2)
prior_status_unemployed
## [1] 0.5
#credit_rating:excellent
prior_credit_rating_excellent<-nrow(data[data$credit_rating=="excellent",]) / nrow(data)
prior_credit_rating_excellent<-round(prior_credit_rating_excellent,2)
prior_credit_rating_excellent
## [1] 0.43
#credit_rating:fair
prior_credit_rating_fair<-nrow(data[data$credit_rating=="fair",]) / nrow(data)
prior_credit_rating_fair<-round(prior_credit_rating_fair,2)
prior_credit_rating_fair
## [1] 0.57
#class.prospect:yes
prior_class.prospect_yes<-nrow(data[data$class.prospect=="yes",]) / nrow(data)
prior_class.prospect_yes<-round(prior_class.prospect_yes,2)
prior_class.prospect_yes
## [1] 0.64
#class.prospect:no
prior_class.prospect_no<-nrow(data[data$class.prospect=="no",]) / nrow(data)
prior_class.prospect_no<-round(prior_class.prospect_no,2)
prior_class.prospect_no
## [1] 0.36

2) Compute the conditional probabilities

#conditional probabilities:
#P(age.group=senior|prospect=yes)
conditional_age.group_senior_class.prosepct_yes<-nrow(data[data$age.group=="senior" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_age.group_senior_class.prosepct_yes<-round(conditional_age.group_senior_class.prosepct_yes,2)
conditional_age.group_senior_class.prosepct_yes
## [1] 0.33
#P(age.group=middle|prospect=yes)
conditional_age.group_middle_class.prosepct_yes<-nrow(data[data$age.group=="middle" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_age.group_middle_class.prosepct_yes<-round(conditional_age.group_middle_class.prosepct_yes,2)
conditional_age.group_middle_class.prosepct_yes
## [1] 0.44
#P(age.group=youth|prospect=yes)
conditional_age.group_youth_class.prosepct_yes<-nrow(data[data$age.group=="youth" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_age.group_youth_class.prosepct_yes<-round(conditional_age.group_youth_class.prosepct_yes,2)
conditional_age.group_youth_class.prosepct_yes
## [1] 0.22
#P(age.group=senior|prospect=no)
conditional_age.group_senior_class.prosepct_no<-nrow(data[data$age.group=="senior" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_age.group_senior_class.prosepct_no<-round(conditional_age.group_senior_class.prosepct_no,2)
conditional_age.group_senior_class.prosepct_no
## [1] 0.4
#P(age.group=middle|prospect=no)
conditional_age.group_middle_class.prosepct_no<-nrow(data[data$age.group=="middle" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_age.group_middle_class.prosepct_no<-round(conditional_age.group_middle_class.prosepct_no,2)
conditional_age.group_middle_class.prosepct_no
## [1] 0
#P(age.group=youth|prospect=no)
conditional_age.group_youth_class.prosepct_no<-nrow(data[data$age.group=="youth" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_age.group_youth_class.prosepct_no<-round(conditional_age.group_youth_class.prosepct_no,2)
conditional_age.group_youth_class.prosepct_no
## [1] 0.6
#P(networth=high|prospect=yes)
conditional_networth_high_prospect_yes<-nrow(data[data$networth=="high" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_networth_high_prospect_yes<-round(conditional_networth_high_prospect_yes,2)
conditional_networth_high_prospect_yes
## [1] 0.22
#P(networth=medium|prospect=yes)
conditional_networth_medium_prospect_yes<-nrow(data[data$networth=="medium" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_networth_medium_prospect_yes<-round(conditional_networth_medium_prospect_yes,2)
conditional_networth_medium_prospect_yes
## [1] 0.44
#P(networth=low|prospect=yes)
conditional_networth_low_prospect_yes<-nrow(data[data$networth=="low" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_networth_low_prospect_yes<-round(conditional_networth_low_prospect_yes,2)
conditional_networth_low_prospect_yes
## [1] 0.33
#P(networth=high|prospect=no)
conditional_networth_high_prospect_no<-nrow(data[data$networth=="high" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_networth_high_prospect_no<-round(conditional_networth_high_prospect_no,2)
conditional_networth_high_prospect_no
## [1] 0.4
#P(networth=medium|prospect=no)
conditional_networth_medium_prospect_no<-nrow(data[data$networth=="medium" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_networth_medium_prospect_no<-round(conditional_networth_medium_prospect_no,2)
conditional_networth_medium_prospect_no
## [1] 0.4
#P(networth=low|prospect=no)
conditional_networth_low_prospect_no<-nrow(data[data$networth=="low" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_networth_low_prospect_no<-round(conditional_networth_low_prospect_no,2)
conditional_networth_low_prospect_no
## [1] 0.2
#P(status=employed|prospect=yes)
conditional_status_employed_prospect_yes<-nrow(data[data$status=="employed" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_status_employed_prospect_yes<-round(conditional_status_employed_prospect_yes,2)
conditional_status_employed_prospect_yes
## [1] 0.33
#P(status=unemployed|prospect=yes)
conditional_status_unemployed_prospect_yes<-nrow(data[data$status=="unemployed" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_status_unemployed_prospect_yes<-round(conditional_status_unemployed_prospect_yes,2)
conditional_status_unemployed_prospect_yes
## [1] 0.67
#P(status=employed|prospect=no)
conditional_status_employed_prospect_no<-nrow(data[data$status=="employed" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_status_employed_prospect_no<-round(conditional_status_employed_prospect_no,2)
conditional_status_employed_prospect_no
## [1] 0.8
#P(status=unemployed|prospect=no)
conditional_status_unemployed_prospect_no<-nrow(data[data$status=="unemployed" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_status_unemployed_prospect_no<-round(conditional_status_unemployed_prospect_no,2)
conditional_status_unemployed_prospect_no
## [1] 0.2
#P(credit=excellent|prospect=yes)
conditional_credit_rating_excellent_prospect_yes<-nrow(data[data$credit_rating=="excellent" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_credit_rating_excellent_prospect_yes<-round(conditional_credit_rating_excellent_prospect_yes,2)
conditional_credit_rating_excellent_prospect_yes
## [1] 0.33
#P(credit=fair|prospect=yes)
conditional_credit_rating_fair_prospect_yes<-nrow(data[data$credit_rating=="fair" & data$class.prospect=="yes",]) / nrow(data[data$class.prospect=="yes",])
conditional_credit_rating_fair_prospect_yes<-round(conditional_credit_rating_fair_prospect_yes,2)
conditional_credit_rating_fair_prospect_yes
## [1] 0.67
#P(credit=excellent|prospect=no)
conditional_credit_rating_excellent_prospect_no<-nrow(data[data$credit_rating=="excellent" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_credit_rating_excellent_prospect_no<-round(conditional_credit_rating_excellent_prospect_no,2)
conditional_credit_rating_excellent_prospect_no
## [1] 0.6
#P(credit=fair|prospect=no)
conditional_credit_rating_fair_prospect_no<-nrow(data[data$credit_rating=="fair" & data$class.prospect=="no",]) / nrow(data[data$class.prospect=="no",])
conditional_credit_rating_fair_prospect_no<-round(conditional_credit_rating_fair_prospect_no,2)
conditional_credit_rating_fair_prospect_no
## [1] 0.4

3) Compute posterior probabilities

#posterior probabilities:
#P(prospect=yes|age.group=senior)
post_prospect_yes_age.group_senior<-(conditional_age.group_senior_class.prosepct_yes * prior_class.prospect_yes) / prior_age.group_senior
post_prospect_yes_age.group_senior<-round(post_prospect_yes_age.group_senior,2)
post_prospect_yes_age.group_senior
## [1] 0.59
#P(prospect=no|age.group=senior)
post_prospect_no_age.group_senior<-(conditional_age.group_senior_class.prosepct_no * prior_class.prospect_no) / prior_age.group_senior
post_prospect_no_age.group_senior<-round(post_prospect_no_age.group_senior,2)
post_prospect_no_age.group_senior
## [1] 0.4
#P(prospect=yes|age.group=middle)
post_prospect_yes_age.group_middle<-(conditional_age.group_middle_class.prosepct_yes * prior_class.prospect_yes) / prior_age.group_middle
post_prospect_yes_age.group_middle<-round(post_prospect_yes_age.group_middle,2)
post_prospect_yes_age.group_middle
## [1] 0.97
#P(prospect=no|age.group=middle)
post_prospect_no_age.group_middle<-(conditional_age.group_middle_class.prosepct_no * prior_class.prospect_no) / prior_age.group_middle
post_prospect_no_age.group_middle<-round(post_prospect_no_age.group_middle,2)
post_prospect_no_age.group_middle
## [1] 0
#P(prospect=yes|age.group=youth)
post_prospect_yes_age.group_youth<-(conditional_age.group_youth_class.prosepct_yes * prior_class.prospect_yes) / prior_age.group_youth
post_prospect_yes_age.group_youth<-round(post_prospect_yes_age.group_youth,2)
post_prospect_yes_age.group_youth
## [1] 0.39
#P(prospect=no|age.group=youth)
post_prospect_no_age.group_youth<-(conditional_age.group_youth_class.prosepct_no * prior_class.prospect_no) / prior_age.group_youth
post_prospect_no_age.group_youth<-round(post_prospect_no_age.group_youth,2)
post_prospect_no_age.group_youth
## [1] 0.6
#P(prospect=yes|networth=high)
post_prospect_yes_networth_high<-(conditional_networth_high_prospect_yes * prior_class.prospect_yes) / prior_networth_high
post_prospect_yes_networth_high<-round(post_prospect_yes_networth_high,2)
post_prospect_yes_networth_high
## [1] 0.49
#P(prospect=no|networth=high)
post_prospect_no_networth_high<-(conditional_networth_high_prospect_no * prior_class.prospect_no) / prior_networth_high
post_prospect_no_networth_high<-round(post_prospect_no_networth_high,2)
post_prospect_no_networth_high
## [1] 0.5
#P(prospect=yes|networth=medium)
post_prospect_yes_networth_medium<-(conditional_networth_medium_prospect_yes * prior_class.prospect_yes) / prior_networth_medium
post_prospect_yes_networth_medium<-round(post_prospect_yes_networth_medium,2)
post_prospect_yes_networth_medium
## [1] 0.65
#P(prospect=no|networth=medium)
post_prospect_no_networth_medium<-(conditional_networth_medium_prospect_no * prior_class.prospect_no) / prior_networth_medium
post_prospect_no_networth_medium<-round(post_prospect_no_networth_medium,2)
post_prospect_no_networth_medium
## [1] 0.33
#P(prospect=yes|networth=low)
post_prospect_yes_networth_low<-(conditional_networth_low_prospect_yes * prior_class.prospect_yes) / prior_networth_low
post_prospect_yes_networth_low<-round(post_prospect_yes_networth_low,2)
post_prospect_yes_networth_low
## [1] 0.73
#P(prospect=no|networth=low)
post_prospect_no_networth_low<-(conditional_networth_low_prospect_no * prior_class.prospect_no) / prior_networth_low
post_prospect_no_networth_low<-round(post_prospect_no_networth_low,2)
post_prospect_no_networth_low
## [1] 0.25

Question 2

There are two datasets junk1.txt and junk2.csv They have two options 1. They can go back to the client and ask for more data to remedy problems with the data. 2. They can accept the data and undertake a major analytics exercise.

The team is relying on your dsc skills to determine how they should proceed.

Can you explore the data and recommend actions for each file enumerating the reasons.

# import the required datasets
junk1<-read.delim("https://raw.githubusercontent.com/maharjansudhan/DATA622/master/junk1.txt",header=TRUE,sep=" ")

junk2<-read.csv("https://raw.githubusercontent.com/maharjansudhan/DATA622/master/junk2.csv",header=TRUE,sep=",")
#import required libraries and let's have a look at the datasets
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1     ✔ purrr   0.3.2
## ✔ tibble  2.1.3     ✔ dplyr   0.8.3
## ✔ tidyr   1.0.0     ✔ stringr 1.4.0
## ✔ readr   1.3.1     ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
head(junk1)
##           a         b class
## 1 1.6204214 3.0036241     1
## 2 1.4340220 0.7852487     1
## 3 2.4766615 0.9367761     1
## 4 0.5283093 0.1196222     1
## 5 1.0054081 0.7872866     1
## 6 1.1032636 0.7330594     1
head(junk2)
##            a           b class
## 1  3.1886481  0.92917735     0
## 2  0.8224527  0.04760314     0
## 3  0.8147247  0.02910931     0
## 4 -1.5065362  3.13231360     0
## 5  0.4426887  2.84942822     0
## 6  0.8564405 -0.66143851     0
dim(junk1)
## [1] 100   3
dim(junk2)
## [1] 4000    3
glimpse(junk1)
## Observations: 100
## Variables: 3
## $ a     <dbl> 1.6204214, 1.4340220, 2.4766615, 0.5283093, 1.0054081, 1.1…
## $ b     <dbl> 3.00362413, 0.78524873, 0.93677611, 0.11962219, 0.78728662…
## $ class <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
glimpse(junk2)
## Observations: 4,000
## Variables: 3
## $ a     <dbl> 3.18864809, 0.82245267, 0.81472472, -1.50653621, 0.4426886…
## $ b     <dbl> 0.92917735, 0.04760314, 0.02910931, 3.13231360, 2.84942822…
## $ class <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
summary(junk1)
##        a                  b                class    
##  Min.   :-2.29854   Min.   :-3.17174   Min.   :1.0  
##  1st Qu.:-0.85014   1st Qu.:-1.04712   1st Qu.:1.0  
##  Median :-0.04754   Median :-0.07456   Median :1.5  
##  Mean   : 0.04758   Mean   : 0.01324   Mean   :1.5  
##  3rd Qu.: 1.09109   3rd Qu.: 1.05342   3rd Qu.:2.0  
##  Max.   : 3.00604   Max.   : 3.10230   Max.   :2.0
summary(junk2)
##        a                  b                class       
##  Min.   :-4.16505   Min.   :-3.90472   Min.   :0.0000  
##  1st Qu.:-1.01447   1st Qu.:-0.89754   1st Qu.:0.0000  
##  Median : 0.08754   Median :-0.08358   Median :0.0000  
##  Mean   :-0.05126   Mean   : 0.05624   Mean   :0.0625  
##  3rd Qu.: 0.89842   3rd Qu.: 1.00354   3rd Qu.:0.0000  
##  Max.   : 4.62647   Max.   : 4.31052   Max.   :1.0000
count(junk1)
## # A tibble: 1 x 1
##       n
##   <int>
## 1   100
count(junk2)
## # A tibble: 1 x 1
##       n
##   <int>
## 1  4000
#let's see data spread for both datasets
hist(junk1$a)

hist(junk1$b)

hist(junk1$class)

hist(junk2$a)

hist(junk2$b)

hist(junk2$class)

#let's boxplot both datasets
boxplot(junk1, horizontal=TRUE, main="junk1")

boxplot(junk2, horizontal=TRUE, main="junk2")

Junk1 is a small dataset with no missing data. It has 100 records. The class has values 1 and 2.

Junk2 is a big dataset compared to Junk1 with no missing data. It has 4000 records. The class has 0 and 1. There are many outliers in this dataset.

Comparing these two datasets, we can work with Junk2 dataset which looks pretty huge dataset. But for Junk1 its better to ask the client for more data.

Question 3

Please find kNN.R

This R script requires a dataset, labelcol and K (number of nearest neighbors to be considered)

The dataset MUST Be numeric, except the labelcol The labelcol must be the last column in the data.frame All the other columns must be before the labelcol

To DO:

Please find icu.csv The formula to fit is “STA ~ TYP + COMA + AGE + INF”

Read the icu.csv subset it with these 5 features in the formula and STA is the labelcol.

Split the icu 70/30 train/test and run the kNN.R for K=(3,5,7,15,25,50)

submit the result confusionMatrix, Accuracy for each K

Plot Accuracy vs K.

write a short summary of your findings.

euclideanDist <- function(a, b){
  d = 0
  for(i in c(1:(length(a)) ))
  {
    d = d + (a[[i]]-b[[i]])^2
  }
  d = sqrt(d)
  return(d)
}

knn_predict2 <- function(test_data, train_data, k_value, labelcol){
  pred <- c()  #empty pred vector 
  #LOOP-1
  for(i in c(1:nrow(test_data))){   #looping over each record of test data
    eu_dist =c()          #eu_dist & eu_char empty  vector
    eu_char = c()
    good = 0              #good & bad variable initialization with 0 value
    bad = 0
    
    #LOOP-2-looping over train data 
    for(j in c(1:nrow(train_data))){
 
      #adding euclidean distance b/w test data point and train data to eu_dist vector
      eu_dist <- c(eu_dist, euclideanDist(test_data[i,-c(labelcol)], train_data[j,-c(labelcol)]))
 
      #adding class variable of training data in eu_char
      eu_char <- c(eu_char, as.character(train_data[j,][[labelcol]]))
    }
    
    eu <- data.frame(eu_char, eu_dist) #eu dataframe created with eu_char & eu_dist columns
 
    eu <- eu[order(eu$eu_dist),]       #sorting eu dataframe to gettop K neighbors
    eu <- eu[1:k_value,]               #eu dataframe with top K neighbors
 
    tbl.sm.df<-table(eu$eu_char)
    cl_label<-  names(tbl.sm.df)[[as.integer(which.max(tbl.sm.df))]]
    
    pred <- c(pred, cl_label)
    }
    return(pred) #return pred vector
  }
  

accuracy <- function(test_data,labelcol,predcol){
  correct = 0
  for(i in c(1:nrow(test_data))){
    if(test_data[i,labelcol] == test_data[i,predcol]){ 
      correct = correct+1
    }
  }
  accu = (correct/nrow(test_data)) * 100  
  return(accu)
}

#load data
icu<-read.csv("https://raw.githubusercontent.com/maharjansudhan/DATA622/master/icu.csv",header=TRUE,sep=",")
icu["COMA"]<-ifelse(icu$LOC==2,1,0)
knn.df<-icu[,c("TYP","COMA","AGE","INF","STA")]

labelcol <- 5 # for icu, STA is the fifth col 
predictioncol<-labelcol+1


# create train/test partitions
set.seed(123)
n<-nrow(knn.df)
knn.df<- knn.df[sample(n),]

train.df <- knn.df[1:as.integer(0.7*n),]

#K = 3 # number of neighbors to determine the class
table(knn.df[,labelcol])
## 
##   0   1 
## 160  40
test.df <- knn.df[as.integer(0.7*n +1):n,]
table(test.df[,labelcol])
## 
##  0  1 
## 55  5
df<-data.frame() #create empty dataframe 

for (K in c(3,5,7,15,25,50)) # loop the K values
{
test.df<-test.df <- knn.df[as.integer(0.7*n +1):n,]
predictions <- knn_predict2(test.df, train.df, K,labelcol) #calling knn_predict()

test.df[,predictioncol] <- predictions #Adding predictions in test data as 7th column
print(accuracy(test.df,labelcol,predictioncol))
print(table(test.df[[predictioncol]],test.df[[labelcol]]))

K<-c(K, accuracy(test.df,labelcol,predictioncol))
df<-rbind(df,K)
}
## [1] 75
##    
##      0  1
##   0 45  5
##   1 10  0
## [1] 81.66667
##    
##      0  1
##   0 49  5
##   1  6  0
## [1] 83.33333
##    
##      0  1
##   0 50  5
##   1  5  0
## [1] 86.66667
##    
##      0  1
##   0 52  5
##   1  3  0
## [1] 91.66667
##    
##      0  1
##   0 55  5
## [1] 91.66667
##    
##      0  1
##   0 55  5

plot the dataframe

plot(df, type="o", col="red", main="KNN Accurary plot", xlab="K value", ylab="Accuracy",lwd=2)
axis(side=1,at=df$X3)

After plotting the dataframe for K values: 3,5,7,15,25,50, the accuracy is highest at K=25 and for all the values above 25 it maintains the same value which is 91.66. K=3 has the lowest value at 75.