R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Importing the dataset

library(readr)
Germination <- read_csv("C:/GGTUAN/DREAMS/Yankee/TSU/MSc_TSU/Spring_2024/CS-583 Data Minning/Project_Data/Germination.csv")
## Rows: 21 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): seed
## dbl (3): rownames, n, y
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(Germination)

germ_df <- Germination

Checking out values of the datasets - Structures , NA etc

germ_df
## # A tibble: 21 × 4
##    rownames seed      n     y
##       <dbl> <chr> <dbl> <dbl>
##  1        1 O75      39    10
##  2        2 O75      62    23
##  3        3 O75      81    23
##  4        4 O75      51    26
##  5        5 O75      39    17
##  6        6 O75       6     5
##  7        7 O75      74    53
##  8        8 O75      72    55
##  9        9 O75      51    32
## 10       10 O75      79    46
## # ℹ 11 more rows
View(germ_df)
str(germ_df)
## spc_tbl_ [21 × 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ rownames: num [1:21] 1 2 3 4 5 6 7 8 9 10 ...
##  $ seed    : chr [1:21] "O75" "O75" "O75" "O75" ...
##  $ n       : num [1:21] 39 62 81 51 39 6 74 72 51 79 ...
##  $ y       : num [1:21] 10 23 23 26 17 5 53 55 32 46 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   rownames = col_double(),
##   ..   seed = col_character(),
##   ..   n = col_double(),
##   ..   y = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
dim(germ_df)
## [1] 21  4
germ_df$seed
##  [1] "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O73"
## [13] "O73" "O73" "O73" "O73" "O73" "O73" "O73" "O73" "O73"
#Change to factor to be sure it is just 2 as stated in the data notes
germ_df3 <- germ_df
germ_df3$seed <- factor(germ_df3$seed)
str(germ_df3$seed)
##  Factor w/ 2 levels "O73","O75": 2 2 2 2 2 2 2 2 2 2 ...
is.na(germ_df)
##       rownames  seed     n     y
##  [1,]    FALSE FALSE FALSE FALSE
##  [2,]    FALSE FALSE FALSE FALSE
##  [3,]    FALSE FALSE FALSE FALSE
##  [4,]    FALSE FALSE FALSE FALSE
##  [5,]    FALSE FALSE FALSE FALSE
##  [6,]    FALSE FALSE FALSE FALSE
##  [7,]    FALSE FALSE FALSE FALSE
##  [8,]    FALSE FALSE FALSE FALSE
##  [9,]    FALSE FALSE FALSE FALSE
## [10,]    FALSE FALSE FALSE FALSE
## [11,]    FALSE FALSE FALSE FALSE
## [12,]    FALSE FALSE FALSE FALSE
## [13,]    FALSE FALSE FALSE FALSE
## [14,]    FALSE FALSE FALSE FALSE
## [15,]    FALSE FALSE FALSE FALSE
## [16,]    FALSE FALSE FALSE FALSE
## [17,]    FALSE FALSE FALSE FALSE
## [18,]    FALSE FALSE FALSE FALSE
## [19,]    FALSE FALSE FALSE FALSE
## [20,]    FALSE FALSE FALSE FALSE
## [21,]    FALSE FALSE FALSE FALSE
sum(is.na(germ_df))
## [1] 0
table(germ_df$seed)
## 
## O73 O75 
##  10  11
prop.table(table(germ_df$seed))
## 
##       O73       O75 
## 0.4761905 0.5238095

Scaling the dataset to suppress the impact skewness or high values on others

Here we use “euclidean” because it is best suited for our task which are in continous value.

Manhattan is good for binary

germ_df1 <- germ_df[,-2]
germ_df1
## # A tibble: 21 × 3
##    rownames     n     y
##       <dbl> <dbl> <dbl>
##  1        1    39    10
##  2        2    62    23
##  3        3    81    23
##  4        4    51    26
##  5        5    39    17
##  6        6     6     5
##  7        7    74    53
##  8        8    72    55
##  9        9    51    32
## 10       10    79    46
## # ℹ 11 more rows
dim(germ_df1)
## [1] 21  3
View(germ_df1)
germ_df1_scale <- as.data.frame(scale(germ_df1))
str(germ_df1_scale)
## 'data.frame':    21 obs. of  3 variables:
##  $ rownames: num  -1.612 -1.45 -1.289 -1.128 -0.967 ...
##  $ n       : num  -0.0231 0.9071 1.6755 0.4622 -0.0231 ...
##  $ y       : num  -0.634 0.175 0.175 0.361 -0.198 ...
View(germ_df1_scale)
dist_mat <- dist(germ_df1_scale, method="euclidean")
View(dist_mat)

Comparison of cluster methods and Silhouttes coefficients

library(fpc)
## Warning: package 'fpc' was built under R version 4.3.3
library (cluster)
library (vegan)
## Warning: package 'vegan' was built under R version 4.3.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.3.3
## Loading required package: lattice
## This is vegan 2.6-4
h_germ_df1<- hclust(dist_mat, method='average')
h_germ_df1_single<- hclust(dist_mat, method='single')
h_germ_df1_complete<- hclust(dist_mat, method='complete')

#show summary of cluster for method='average'
h_germ_df1
## 
## Call:
## hclust(d = dist_mat, method = "average")
## 
## Cluster method   : average 
## Distance         : euclidean 
## Number of objects: 21
##fixing margin too large error
#par(mar)
par(mar = c(1, 1, 1, 1))
plot(h_germ_df1)

#show summary of cluster for method='single'
h_germ_df1_single
## 
## Call:
## hclust(d = dist_mat, method = "single")
## 
## Cluster method   : single 
## Distance         : euclidean 
## Number of objects: 21
par(mar = c(1, 1, 1, 1))
plot(h_germ_df1_single)

#show summary of cluster for method='complete'
h_germ_df1_complete
## 
## Call:
## hclust(d = dist_mat, method = "complete")
## 
## Cluster method   : complete 
## Distance         : euclidean 
## Number of objects: 21
par(mar = c(1, 1, 1, 1))
plot(h_germ_df1_complete)

### Comparison of silhouette coefficients
################

set.seed(1234)
#d <- dist(scale(iris[ , -5]))
d <- dist(germ_df1_scale, method="euclidean")
methds <- c('complete', 'single', 'average')
avgS <- matrix(NA, ncol=3, nrow=5,
               dimnames=list(2:6, methds))
for(k in 2:6)
  for(m in seq_along(methds)) {
    h <- hclust(d, meth=methds[m])
    c <- cutree(h, k)
    s <- silhouette(c, d)
    avgS[k-1, m] <- mean(s[ , 3])
    
  }
avgS
##    complete    single   average
## 2 0.4465135 0.3844378 0.4465135
## 3 0.3734599 0.4561129 0.4561129
## 4 0.4560353 0.4560353 0.4560353
## 5 0.4597658 0.3847092 0.3847092
## 6 0.4340711 0.2971553 0.3738207
####################3
h_germ_df1<- hclust(dist_mat, method='average')
#show summary of cluster
h_germ_df1
## 
## Call:
## hclust(d = dist_mat, method = "average")
## 
## Cluster method   : average 
## Distance         : euclidean 
## Number of objects: 21
##fixing margin too large error
#par(mar)
par(mar = c(1, 1, 1, 1))
plot(h_germ_df1)

##Checking out cluster … In this case, it is just 2

hca <- h_germ_df1
plot(hca)
rect.hclust(hca, k = 2, border = "red")
x <- rect.hclust(hca, h = 2, which = c(1,3), border = 2:5)

x
## [[1]]
##  [1]  6 11 12 13 14 15 16 17 18 19 20 21
## 
## [[2]]
## [1] 1 2 3 4 5 9
library(dendextend)
## Registered S3 method overwritten by 'dendextend':
##   method     from 
##   rev.hclust vegan
## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:permute':
## 
##     shuffle
## The following object is masked from 'package:stats':
## 
##     cutree
avg_dendogram_obj <- as.dendrogram(h_germ_df1)
avg_col_dendogram <- color_branches(avg_dendogram_obj, h=2)
plot(avg_col_dendogram)

hac_cut <- cutree(h_germ_df1, k=2)

Scale and compare original data to clustering classfication

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
germ_df_cl <- mutate(germ_df1_scale, cluster=hac_cut)
germ_df_cl
##      rownames           n          y cluster
## 1  -1.6116459 -0.02311077 -0.6338642       1
## 2  -1.4504813  0.90709781  0.1747569       1
## 3  -1.2893167  1.67553098  0.1747569       1
## 4  -1.1281521  0.46221544  0.3613618       1
## 5  -0.9669876 -0.02311077 -0.1984528       1
## 6  -0.8058230 -1.35775786 -0.9448723       2
## 7  -0.6446584  1.39242402  2.0408057       1
## 8  -0.4834938  1.31153632  2.1652089       1
## 9  -0.3223292  0.46221544  0.7345716       1
## 10 -0.1611646  1.59464328  1.6053943       1
## 11  0.0000000 -1.07465091 -0.6338642       2
## 12  0.1611646 -0.95331935 -0.7582674       2
## 13  0.3223292 -0.38710543 -0.6338642       2
## 14  0.4834938 -0.46799314 -0.7582674       2
## 15  0.6446584  0.21955234  0.1747569       2
## 16  0.8058230 -1.43864557 -1.2558804       2
## 17  0.9669876 -1.11509476 -1.0692756       2
## 18  1.1281521  0.05777693  0.1125553       2
## 19  1.2893167 -0.38710543 -0.3228561       2
## 20  1.4504813  0.46221544  0.7345716       2
## 21  1.6116459 -1.31731401 -1.0692756       2
## Count Cluster classification
count(germ_df_cl, cluster)
##   cluster  n
## 1       1  9
## 2       2 12
## compare with Original Dataset classification
count(germ_df, seed)
## # A tibble: 2 × 2
##   seed      n
##   <chr> <int>
## 1 O73      10
## 2 O75      11

Draw the plot

library(ggplot2)
##ggplot(seeds_df_cl, aes(x=seeds_df_cl$area, y=seeds_df_cl$perimeter , color=factor(cluster))) + geom_point()
ggplot(germ_df_cl, aes(x=n , y=y  , color=factor(cluster))) + geom_point()

#Use of `seeds_df_cl$perimeter` is discouraged.ℹ Use `perimeter` instead. 
##ggplot(seeds_df_cl, aes(x=area, y=perimeter , color=factor(cluster))) + geom_point()
ggplot(germ_df, aes(x=n, y=y , color=factor(germ_df$seed ))) + geom_point()

compute Dunn’s index

#Install clvalid
library(clValid)
dunn(dist_mat, hac_cut)       
## [1] 0.3412918
dist_mat[1:21]
##  [1] 1.2430326 1.9087032 1.2082146 0.7779251 1.5897671 3.1768951 3.2998213
##  [8] 1.9417764 3.1201447 1.9243542 2.0058966 1.9679309 2.1454619 2.4090792
## [15] 2.8696339 2.8339659 2.8408059 2.9402045 3.3889204 3.5005936 0.7851519

###KNN

set.seed(1234)
germ_knn <- kmeans( germ_df[,-2], center=2, iter.max=1)
germ_knn
## K-means clustering with 2 clusters of sizes 12, 9
## 
## Cluster means:
##    rownames        n        y
## 1 12.750000 22.08333  9.25000
## 2  8.666667 62.88889 34.77778
## 
## Clustering vector:
##  [1] 1 2 2 2 1 1 2 2 2 2 1 1 1 1 2 1 1 1 1 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 2979.417 3186.444
##  (between_SS / total_SS =  66.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
germ_knn$betweenss
## [1] 12000.52
germ_knn$withinss
## [1] 2979.417 3186.444
germ_knn$tot.withinss
## [1] 6165.861
table(germ_knn$cluster, germ_df$seed)
##    
##     O73 O75
##   1   8   4
##   2   2   7
cm <- table(germ_knn$cluster, germ_df$seed)
1 - sum(diag(cm))/sum(cm)
## [1] 0.2857143
#install.packages("clusterCrit")
#library(clusterCrit)
#dunns_coefficient <- dunn(germ_df[,-2], germ_knn$cluster)

# Print the result
#print(paste("Dunn's Coefficient:", dunns_coefficient))

#Compare with this

#Install clvalid
library(clValid)
dunn(dist_mat, hac_cut)       
## [1] 0.3412918
dist_mat[1:21]
##  [1] 1.2430326 1.9087032 1.2082146 0.7779251 1.5897671 3.1768951 3.2998213
##  [8] 1.9417764 3.1201447 1.9243542 2.0058966 1.9679309 2.1454619 2.4090792
## [15] 2.8696339 2.8339659 2.8408059 2.9402045 3.3889204 3.5005936 0.7851519

Note here that germ_df_train_labels1 and germ_df_train_labels2 are built to have both factors in our Train and Test Dataset because, with one factors it will not give real spectrum of test

####PREDICTION

prop.table(table(germ_df$seed))
## 
##       O73       O75 
## 0.4761905 0.5238095
#summary( wd1$radius_mean, wd1$area_mean, wd1$smoothness_mean)

summary(germ_df1[c("rownames", "n", "y")])
##     rownames        n               y        
##  Min.   : 1   Min.   : 4.00   Min.   : 0.00  
##  1st Qu.: 6   1st Qu.:16.00   1st Qu.: 8.00  
##  Median :11   Median :39.00   Median :17.00  
##  Mean   :11   Mean   :39.57   Mean   :20.19  
##  3rd Qu.:16   3rd Qu.:51.00   3rd Qu.:26.00  
##  Max.   :21   Max.   :81.00   Max.   :55.00
normalize <- function (x){
  return ((x - min(x))/(max(x) - min(x)))
}

germ_df1
## # A tibble: 21 × 3
##    rownames     n     y
##       <dbl> <dbl> <dbl>
##  1        1    39    10
##  2        2    62    23
##  3        3    81    23
##  4        4    51    26
##  5        5    39    17
##  6        6     6     5
##  7        7    74    53
##  8        8    72    55
##  9        9    51    32
## 10       10    79    46
## # ℹ 11 more rows
germ_df1_n <- as.data.frame(lapply(germ_df1,normalize))
germ_df1_n
##    rownames          n          y
## 1      0.00 0.45454545 0.18181818
## 2      0.05 0.75324675 0.41818182
## 3      0.10 1.00000000 0.41818182
## 4      0.15 0.61038961 0.47272727
## 5      0.20 0.45454545 0.30909091
## 6      0.25 0.02597403 0.09090909
## 7      0.30 0.90909091 0.96363636
## 8      0.35 0.88311688 1.00000000
## 9      0.40 0.61038961 0.58181818
## 10     0.45 0.97402597 0.83636364
## 11     0.50 0.11688312 0.18181818
## 12     0.55 0.15584416 0.14545455
## 13     0.60 0.33766234 0.18181818
## 14     0.65 0.31168831 0.14545455
## 15     0.70 0.53246753 0.41818182
## 16     0.75 0.00000000 0.00000000
## 17     0.80 0.10389610 0.05454545
## 18     0.85 0.48051948 0.40000000
## 19     0.90 0.33766234 0.27272727
## 20     0.95 0.61038961 0.58181818
## 21     1.00 0.03896104 0.05454545
summary(germ_df1_n[c("rownames", "n", "y")])
##     rownames          n                y         
##  Min.   :0.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.25   1st Qu.:0.1558   1st Qu.:0.1455  
##  Median :0.50   Median :0.4545   Median :0.3091  
##  Mean   :0.50   Mean   :0.4620   Mean   :0.3671  
##  3rd Qu.:0.75   3rd Qu.:0.6104   3rd Qu.:0.4727  
##  Max.   :1.00   Max.   :1.0000   Max.   :1.0000
germ_df$seed <- factor(germ_df$seed, levels=c(073, 075), labels=c("073", "075"))

str(germ_df$seed)
##  Factor w/ 2 levels "073","075": NA NA NA NA NA NA NA NA NA NA ...
dim(germ_df$seed)
## NULL
set.seed(1234)
germ_df_train1 <- germ_df1_n[1:9,]
germ_df_train2 <- germ_df1_n[14:21,]
germ_df_train  <- rbind(germ_df_train1, germ_df_train2)
germ_df_test  <- germ_df1_n[10:13,]



#table1 <- data.frame(ID = 1:3, Name = c("John", "Alice", "Bob"))
#table2 <- data.frame(ID = 4:6, Name = c("Charlie", "David", "Emily"))
#combined_table <- rbind(table1, table2)
#print(combined_table)



germ_df <- Germination

germ_df_train_labels1 <- germ_df[1:9,2]
germ_df_train_labels2 <- germ_df[14:21,2]
germ_df_train_labels <- rbind(germ_df_train_labels1, germ_df_train_labels2)
  
germ_df_test_labels <- germ_df[10:13,2]

germ_df_test_labels
## # A tibble: 4 × 1
##   seed 
##   <chr>
## 1 O75  
## 2 O75  
## 3 O73  
## 4 O73
unique(germ_df_train_labels)
## # A tibble: 2 × 1
##   seed 
##   <chr>
## 1 O75  
## 2 O73
germ_df_train_labels$seed
##  [1] "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O73" "O73" "O73"
## [13] "O73" "O73" "O73" "O73" "O73"
dim(germ_df_train_labels)
## [1] 17  1
head(germ_df_train_labels)
## # A tibble: 6 × 1
##   seed 
##   <chr>
## 1 O75  
## 2 O75  
## 3 O75  
## 4 O75  
## 5 O75  
## 6 O75
head(germ_df_test_labels)
## # A tibble: 4 × 1
##   seed 
##   <chr>
## 1 O75  
## 2 O75  
## 3 O73  
## 4 O73
germ_df_test_labels
## # A tibble: 4 × 1
##   seed 
##   <chr>
## 1 O75  
## 2 O75  
## 3 O73  
## 4 O73
germ_df[,2]
## # A tibble: 21 × 1
##    seed 
##    <chr>
##  1 O75  
##  2 O75  
##  3 O75  
##  4 O75  
##  5 O75  
##  6 O75  
##  7 O75  
##  8 O75  
##  9 O75  
## 10 O75  
## # ℹ 11 more rows
print(germ_df$seed[1:21])
##  [1] "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O73"
## [13] "O73" "O73" "O73" "O73" "O73" "O73" "O73" "O73" "O73"

Comparing K values

### Explaination 

### The team used 4 test and 3 passed while 1 prediction failed. the 2 "O73" were categorized as such but 1 "075" were categorized correctly as stated in the confussion matrix - CrossTable above.

### For the sake of the analysis we tried different values of K from 1, 2, 4 and got the same output at the CrossTable but when K was increased  to 21 2 passed and 2 failed out our 4 tests

#K=21

library(class)
germ_df_test_pred <- knn(train=germ_df_train, test=germ_df_test,cl=germ_df_train_labels$seed, k=21)
## Warning in knn(train = germ_df_train, test = germ_df_test, cl =
## germ_df_train_labels$seed, : k = 21 exceeds number 17 of patterns
germ_df_test_pred
## [1] O75 O75 O75 O75
## Levels: O73 O75
dim(germ_df_test_pred)
## NULL
germ_df_train
##    rownames          n          y
## 1      0.00 0.45454545 0.18181818
## 2      0.05 0.75324675 0.41818182
## 3      0.10 1.00000000 0.41818182
## 4      0.15 0.61038961 0.47272727
## 5      0.20 0.45454545 0.30909091
## 6      0.25 0.02597403 0.09090909
## 7      0.30 0.90909091 0.96363636
## 8      0.35 0.88311688 1.00000000
## 9      0.40 0.61038961 0.58181818
## 14     0.65 0.31168831 0.14545455
## 15     0.70 0.53246753 0.41818182
## 16     0.75 0.00000000 0.00000000
## 17     0.80 0.10389610 0.05454545
## 18     0.85 0.48051948 0.40000000
## 19     0.90 0.33766234 0.27272727
## 20     0.95 0.61038961 0.58181818
## 21     1.00 0.03896104 0.05454545
germ_df_test
##    rownames         n         y
## 10     0.45 0.9740260 0.8363636
## 11     0.50 0.1168831 0.1818182
## 12     0.55 0.1558442 0.1454545
## 13     0.60 0.3376623 0.1818182
germ_df_train_labels$seed
##  [1] "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O73" "O73" "O73"
## [13] "O73" "O73" "O73" "O73" "O73"
#Instal package gmodels and import the library(gmodels)

germ_df_test_pred
## [1] O75 O75 O75 O75
## Levels: O73 O75
germ_df_test_labels$seed
## [1] "O75" "O75" "O73" "O73"
library(gmodels)
## Warning: package 'gmodels' was built under R version 4.3.3
CrossTable(x=germ_df_test_labels$seed , y=germ_df_test_pred ,  prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  4 
## 
##  
##                          | germ_df_test_pred 
## germ_df_test_labels$seed |       O75 | Row Total | 
## -------------------------|-----------|-----------|
##                      O73 |         2 |         2 | 
##                          |     0.500 |           | 
## -------------------------|-----------|-----------|
##                      O75 |         2 |         2 | 
##                          |     0.500 |           | 
## -------------------------|-----------|-----------|
##             Column Total |         4 |         4 | 
## -------------------------|-----------|-----------|
## 
## 
# K = 2 

library(class)
germ_df_test_pred <- knn(train=germ_df_train, test=germ_df_test,cl=germ_df_train_labels$seed, k=2)

germ_df_test_pred
## [1] O75 O73 O73 O73
## Levels: O73 O75
dim(germ_df_test_pred)
## NULL
germ_df_train
##    rownames          n          y
## 1      0.00 0.45454545 0.18181818
## 2      0.05 0.75324675 0.41818182
## 3      0.10 1.00000000 0.41818182
## 4      0.15 0.61038961 0.47272727
## 5      0.20 0.45454545 0.30909091
## 6      0.25 0.02597403 0.09090909
## 7      0.30 0.90909091 0.96363636
## 8      0.35 0.88311688 1.00000000
## 9      0.40 0.61038961 0.58181818
## 14     0.65 0.31168831 0.14545455
## 15     0.70 0.53246753 0.41818182
## 16     0.75 0.00000000 0.00000000
## 17     0.80 0.10389610 0.05454545
## 18     0.85 0.48051948 0.40000000
## 19     0.90 0.33766234 0.27272727
## 20     0.95 0.61038961 0.58181818
## 21     1.00 0.03896104 0.05454545
germ_df_test
##    rownames         n         y
## 10     0.45 0.9740260 0.8363636
## 11     0.50 0.1168831 0.1818182
## 12     0.55 0.1558442 0.1454545
## 13     0.60 0.3376623 0.1818182
germ_df_train_labels$seed
##  [1] "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O75" "O73" "O73" "O73"
## [13] "O73" "O73" "O73" "O73" "O73"
#Instal package gmodels and import the library(gmodels)

germ_df_test_pred
## [1] O75 O73 O73 O73
## Levels: O73 O75
germ_df_test_labels$seed
## [1] "O75" "O75" "O73" "O73"
library(gmodels)

CrossTable(x=germ_df_test_labels$seed , y=germ_df_test_pred ,  prop.chisq=FALSE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  4 
## 
##  
##                          | germ_df_test_pred 
## germ_df_test_labels$seed |       O73 |       O75 | Row Total | 
## -------------------------|-----------|-----------|-----------|
##                      O73 |         2 |         0 |         2 | 
##                          |     1.000 |     0.000 |     0.500 | 
##                          |     0.667 |     0.000 |           | 
##                          |     0.500 |     0.000 |           | 
## -------------------------|-----------|-----------|-----------|
##                      O75 |         1 |         1 |         2 | 
##                          |     0.500 |     0.500 |     0.500 | 
##                          |     0.333 |     1.000 |           | 
##                          |     0.250 |     0.250 |           | 
## -------------------------|-----------|-----------|-----------|
##             Column Total |         3 |         1 |         4 | 
##                          |     0.750 |     0.250 |           | 
## -------------------------|-----------|-----------|-----------|
## 
##