Project 2

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(readxl)
gpa_iq <- read_excel("C:/Users/alyss/OneDrive/Desktop/gpa_iq.xlsx")
View(gpa_iq)

library(ggplot2)

ggplot(data = gpa_iq, mapping = aes(x = iq, y = gpa)) + geom_point(mapping = aes(color = iq))

gpa_iq1<-subset(gpa_iq, gpa_iq$gender == '1')

gpa_iq2<-subset(gpa_iq, gpa_iq$gender == '2')

ggplot(data = gpa_iq1, mapping = aes(x = iq, y = gpa)) + geom_point(mapping = aes(color = iq))

ggplot(data = gpa_iq2, mapping = aes(x = iq, y = gpa)) + geom_point(mapping = aes(color = iq))

gpa_aver1<- mean(gpa_iq1$gpa)

gpa_aver1

## [1] 7.696548

gpa_aver2<- mean(gpa_iq2$gpa)

gpa_aver2

## [1] 7.281638

iq_average1<-mean(gpa_iq1$iq)

iq_average2<-mean(gpa_iq2$iq)

iq_average1

## [1] 105.8387

iq_average2

## [1] 110.9574

data <- data.frame(
  
  Group = c("Gender 1", "Gender 2"),
  
  Average = c(gpa_aver1, gpa_aver2))

ggplot(data, aes(x = Group, y = Average, fill = Group)) +
  
  geom_col() +
  
  theme_minimal() +
  
  labs(title = "Comparison of Average GPA", x = "Gender", y = "Average") +
  
  scale_fill_manual(values = c("Group 1" = "blue", "Group 2" = "red"))

data2 <- data.frame(
  
  Group = c("Gender 1", "Gender 2"),
  
  Average = c(iq_average1, iq_average2))

ggplot(data2, aes(x = Group, y = Average, fill = Group)) +
  
  geom_col() +
  
  theme_minimal() +
  
  labs(title = "Comparison of Average IQ", x = "Gender", y = "Average") +
  
  scale_fill_manual(values = c("Group 1" = "blue", "Group 2" = "red"))

gpa<- gpa_iq[-1]
View(gpa)
table(gpa$gpa)

## 
##  0.53  1.76 2.412 3.408 3.647  3.82 3.936     4 4.643   4.7 4.885 5.062 5.237 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## 5.528 5.936     6 6.057 6.173 6.231 6.419 6.938 7.167 7.295 7.333  7.47 7.571 
##     1     1     1     2     1     1     1     1     2     2     1     2     1 
## 7.585 7.588 7.598 7.643 7.647  7.65 7.822 7.825 7.833 7.882  7.94 7.996 7.998 
##     1     1     1     1     2     1     1     1     1     1     1     1     1 
##     8 8.167 8.175 8.235 8.292 8.333 8.353 8.714 8.833 8.882 8.938 8.998     9 
##     2     2     2     1     1     1     1     1     1     1     1     1     1 
## 9.167 9.333 9.348  9.41 9.429   9.5 9.571 9.585 9.648 9.763 9.999 10.14 10.58 
##     3     1     1     1     1     2     1     1     1     1     1     1     1 
##  10.7 10.76 
##     1     1

table(gpa$iq)

## 
##  72  74  77  79  86  89  90  91  93  96  97  98 100 102 103 104 105 106 107 108 
##   1   1   1   1   1   1   1   1   2   1   2   1   2   2   4   2   3   3   4   1 
## 109 110 111 112 113 114 115 116 118 119 120 123 124 126 127 128 130 132 136 
##   1   4   4   4   3   4   2   1   2   3   2   2   2   1   2   3   1   1   1

table(gpa$gender)

## 
##  1  2 
## 31 47

normaliZe<- function(x) {
  return ((x-min(x))/(max(x)-min(x)))
}
normaliZe(c(1,2,3,4,5))

## [1] 0.00 0.25 0.50 0.75 1.00

gpa_1<-as.data.frame(lapply(gpa[0:4], normaliZe))
summary(gpa_1)

##       obs              gpa               iq             gender      
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.2188   1st Qu.:0.5619   1st Qu.:0.4844   1st Qu.:0.0000  
##  Median :0.4659   Median :0.7135   Median :0.5938   Median :1.0000  
##  Mean   :0.4770   Mean   :0.6761   Mean   :0.5769   Mean   :0.6026  
##  3rd Qu.:0.7017   3rd Qu.:0.8263   3rd Qu.:0.7109   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000

any(is.na(gpa_iq))

## [1] FALSE

gpa<- gpa_iq[-1]
gpa

## # A tibble: 78 × 5
##      obs   gpa    iq gender concept
##    <dbl> <dbl> <dbl>  <dbl>   <dbl>
##  1     1  7.94   111      2      67
##  2     2  8.29   107      2      43
##  3     3  4.64   100      2      52
##  4     4  7.47   107      2      66
##  5     5  8.88   114      1      58
##  6     6  7.58   115      2      51
##  7     7  7.65   111      2      71
##  8     8  2.41    97      2      51
##  9     9  6      100      1      49
## 10    10  8.83   112      2      51
## # ℹ 68 more rows

View(gpa)
table(gpa$gpa)

## 
##  0.53  1.76 2.412 3.408 3.647  3.82 3.936     4 4.643   4.7 4.885 5.062 5.237 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## 5.528 5.936     6 6.057 6.173 6.231 6.419 6.938 7.167 7.295 7.333  7.47 7.571 
##     1     1     1     2     1     1     1     1     2     2     1     2     1 
## 7.585 7.588 7.598 7.643 7.647  7.65 7.822 7.825 7.833 7.882  7.94 7.996 7.998 
##     1     1     1     1     2     1     1     1     1     1     1     1     1 
##     8 8.167 8.175 8.235 8.292 8.333 8.353 8.714 8.833 8.882 8.938 8.998     9 
##     2     2     2     1     1     1     1     1     1     1     1     1     1 
## 9.167 9.333 9.348  9.41 9.429   9.5 9.571 9.585 9.648 9.763 9.999 10.14 10.58 
##     3     1     1     1     1     2     1     1     1     1     1     1     1 
##  10.7 10.76 
##     1     1

gpa$gpa<-factor(gpa$gpa)
str(gpa$gpa)

##  Factor w/ 67 levels "0.53","1.76",..: 37 44 9 25 49 27 32 3 16 48 ...

table(gpa$gpa)

## 
##  0.53  1.76 2.412 3.408 3.647  3.82 3.936     4 4.643   4.7 4.885 5.062 5.237 
##     1     1     1     1     1     1     1     1     1     1     1     1     1 
## 5.528 5.936     6 6.057 6.173 6.231 6.419 6.938 7.167 7.295 7.333  7.47 7.571 
##     1     1     1     2     1     1     1     1     2     2     1     2     1 
## 7.585 7.588 7.598 7.643 7.647  7.65 7.822 7.825 7.833 7.882  7.94 7.996 7.998 
##     1     1     1     1     2     1     1     1     1     1     1     1     1 
##     8 8.167 8.175 8.235 8.292 8.333 8.353 8.714 8.833 8.882 8.938 8.998     9 
##     2     2     2     1     1     1     1     1     1     1     1     1     1 
## 9.167 9.333 9.348  9.41 9.429   9.5 9.571 9.585 9.648 9.763 9.999 10.14 10.58 
##     3     1     1     1     1     2     1     1     1     1     1     1     1 
##  10.7 10.76 
##     1     1

dist_mat<-dist(gpa_1, method='euclidean')
hclust_avg<-hclust(dist_mat, method='average')
hclust_avg

## 
## Call:
## hclust(d = dist_mat, method = "average")
## 
## Cluster method   : average 
## Distance         : euclidean 
## Number of objects: 78

plot(hclust_avg)
cut_avg<-cutree(hclust_avg, k=2)
rect.hclust(hclust_avg, k=2, border=2.6)
abline(h=0.7,col='red')

library(dendextend)

## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------

## 
## Attaching package: 'dendextend'

## The following object is masked from 'package:stats':
## 
##     cutree

avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 3)
plot(avg_col_dend)

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

gpa_df_cl<-mutate(gpa,cluster=cut_avg)
count(gpa_df_cl,cluster)

## # A tibble: 2 × 2
##   cluster     n
##     <int> <int>
## 1       1    47
## 2       2    31

library(ggplot2)
ggplot(gpa_df_cl,aes(x=iq, y = gpa,color = factor(cluster))) +geom_point()

library("clValid")

## Loading required package: cluster

library(cluster)
?dunn

## starting httpd help server ...

##  done

dunn(dist_mat,cut_avg)

## [1] 0.8841589

dist_mat<-dist(gpa_1, method='euclidean')
hclust_avg<-hclust(dist_mat, method='average')
hclust_avg

## 
## Call:
## hclust(d = dist_mat, method = "average")
## 
## Cluster method   : average 
## Distance         : euclidean 
## Number of objects: 78

plot(hclust_avg)
cut_avg<-cutree(hclust_avg, k=3)
rect.hclust(hclust_avg, k=3, border=2.6)
abline(h=0.7,col='red')

library(dendextend)
avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 3)
plot(avg_col_dend)

library(dplyr)
gpa_df_cl<-mutate(gpa,cluster=cut_avg)
count(gpa_df_cl,cluster)

## # A tibble: 3 × 2
##   cluster     n
##     <int> <int>
## 1       1    37
## 2       2    10
## 3       3    31

library(ggplot2)
ggplot(gpa_df_cl,aes(x=iq, y = gpa,color = factor(cluster))) +geom_point()

library("clValid")
library(cluster)
?dunn
dunn(dist_mat,cut_avg)

## [1] 0.2465046

dist_mat<-dist(gpa_1, method='euclidean')
hclust_avg<-hclust(dist_mat, method='average')
hclust_avg

## 
## Call:
## hclust(d = dist_mat, method = "average")
## 
## Cluster method   : average 
## Distance         : euclidean 
## Number of objects: 78

plot(hclust_avg)
cut_avg<-cutree(hclust_avg, k=4)
rect.hclust(hclust_avg, k=4, border=2.6)
abline(h=0.7,col='red')

library(dendextend)
avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 3)
plot(avg_col_dend)

library(dplyr)
gpa_df_cl<-mutate(gpa,cluster=cut_avg)
count(gpa_df_cl,cluster)

## # A tibble: 4 × 2
##   cluster     n
##     <int> <int>
## 1       1    37
## 2       2    10
## 3       3    18
## 4       4    13

library(ggplot2)
ggplot(gpa_df_cl,aes(x=iq, y = gpa,color = factor(cluster))) +geom_point()

library("clValid")
library(cluster)
?dunn
dunn(dist_mat,cut_avg)

## [1] 0.2591645

kmeans(gpa_1, centers = 2, iter.max = 10, nstart = 1)

## K-means clustering with 2 clusters of sizes 47, 31
## 
## Cluster means:
##         obs       gpa        iq gender
## 1 0.4721954 0.6599842 0.6087101      1
## 2 0.4842375 0.7005424 0.5287298      0
## 
## Clustering vector:
##  [1] 1 1 1 1 2 1 1 1 2 1 2 2 1 2 2 2 2 2 1 2 1 1 2 1 2 1 1 1 1 2 2 2 1 1 2 1 1 2
## [39] 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 2 1 2 1 1 2 2 1 2 1 2 2 2 1
## [77] 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 7.686896 5.329421
##  (between_SS / total_SS =  59.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

set.seed(78)
km.res <- kmeans(gpa_1, 2, nstart = 25)
print(km.res)

## K-means clustering with 2 clusters of sizes 47, 31
## 
## Cluster means:
##         obs       gpa        iq gender
## 1 0.4721954 0.6599842 0.6087101      1
## 2 0.4842375 0.7005424 0.5287298      0
## 
## Clustering vector:
##  [1] 1 1 1 1 2 1 1 1 2 1 2 2 1 2 2 2 2 2 1 2 1 1 2 1 2 1 1 1 1 2 2 2 1 1 2 1 1 2
## [39] 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 2 2 2 1 1 1 1 1 1 2 1 2 1 1 2 2 1 2 1 2 2 2 1
## [77] 2 1
## 
## Within cluster sum of squares by cluster:
## [1] 7.686896 5.329421
##  (between_SS / total_SS =  59.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

kmeans(gpa_1, centers = 3, iter.max = 10, nstart = 1)

## K-means clustering with 3 clusters of sizes 13, 18, 47
## 
## Cluster means:
##         obs       gpa        iq gender
## 1 0.8164336 0.6635161 0.4122596      0
## 2 0.2443182 0.7272836 0.6128472      0
## 3 0.4721954 0.6599842 0.6087101      1
## 
## Clustering vector:
##  [1] 3 3 3 3 2 3 3 3 2 3 2 2 3 2 2 2 2 2 3 2 3 3 2 3 2 3 3 3 3 2 2 2 3 3 2 3 3 2
## [39] 2 3 3 3 3 3 3 3 1 3 3 3 3 3 3 1 1 1 3 3 3 3 3 3 1 3 1 3 3 1 1 3 1 3 1 1 1 3
## [77] 1 3
## 
## Within cluster sum of squares by cluster:
## [1] 1.148926 1.375380 7.686896
##  (between_SS / total_SS =  67.9 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

set.seed(78)
km.res <- kmeans(gpa_1, 3, nstart = 25)
print(km.res)

## K-means clustering with 3 clusters of sizes 27, 20, 31
## 
## Cluster means:
##         obs       gpa        iq gender
## 1 0.2752525 0.7233047 0.6805556      1
## 2 0.7380682 0.5745015 0.5117188      1
## 3 0.4842375 0.7005424 0.5287298      0
## 
## Clustering vector:
##  [1] 1 1 1 1 3 1 1 1 3 1 3 3 1 3 3 3 3 3 1 3 1 1 3 1 3 1 1 1 1 3 3 3 1 1 3 1 1 3
## [39] 3 1 1 1 1 1 1 2 3 2 2 2 2 2 2 3 3 3 2 2 2 2 2 2 3 2 3 2 2 3 3 2 3 2 3 3 3 2
## [77] 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 2.734232 1.909746 5.329421
##  (between_SS / total_SS =  68.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

kmeans(gpa_1, centers = 4, iter.max = 10, nstart = 1)

## K-means clustering with 4 clusters of sizes 11, 13, 18, 36
## 
## Cluster means:
##         obs       gpa        iq gender
## 1 0.4948347 0.3213365 0.3707386      1
## 2 0.8164336 0.6635161 0.4122596      0
## 3 0.2443182 0.7272836 0.6128472      0
## 4 0.4652778 0.7634599 0.6814236      1
## 
## Clustering vector:
##  [1] 4 4 1 4 3 4 4 1 3 4 3 3 4 3 3 3 3 3 1 3 4 1 3 4 3 4 4 4 4 3 3 3 4 4 3 4 4 3
## [39] 3 4 4 4 4 4 4 1 2 1 4 4 1 4 1 2 2 2 4 1 4 4 4 4 2 4 2 4 4 2 2 1 2 1 2 2 2 4
## [77] 2 4
## 
## Within cluster sum of squares by cluster:
## [1] 1.545389 1.148926 1.375380 3.673906
##  (between_SS / total_SS =  75.7 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

set.seed(78)
km.res <- kmeans(gpa_1, 4, nstart = 25)
print(km.res)

## K-means clustering with 4 clusters of sizes 27, 13, 20, 18
## 
## Cluster means:
##         obs       gpa        iq gender
## 1 0.2752525 0.7233047 0.6805556      1
## 2 0.8164336 0.6635161 0.4122596      0
## 3 0.7380682 0.5745015 0.5117188      1
## 4 0.2443182 0.7272836 0.6128472      0
## 
## Clustering vector:
##  [1] 1 1 1 1 4 1 1 1 4 1 4 4 1 4 4 4 4 4 1 4 1 1 4 1 4 1 1 1 1 4 4 4 1 1 4 1 1 4
## [39] 4 1 1 1 1 1 1 3 2 3 3 3 3 3 3 2 2 2 3 3 3 3 3 3 2 3 2 3 3 2 2 3 2 3 2 2 2 3
## [77] 2 3
## 
## Within cluster sum of squares by cluster:
## [1] 2.734232 1.148926 1.909746 1.375380
##  (between_SS / total_SS =  77.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

kmeans(gpa_1, centers = 5, iter.max = 10, nstart = 1)

## K-means clustering with 5 clusters of sizes 14, 10, 23, 18, 13
## 
## Cluster means:
##         obs       gpa        iq gender
## 1 0.7500000 0.6901480 0.6015625      1
## 2 0.4806818 0.2873412 0.3750000      1
## 3 0.2994071 0.8036423 0.7146739      1
## 4 0.2443182 0.7272836 0.6128472      0
## 5 0.8164336 0.6635161 0.4122596      0
## 
## Clustering vector:
##  [1] 3 3 2 3 4 3 3 2 4 3 4 4 3 4 4 4 4 4 2 4 3 2 4 3 4 3 3 3 3 4 4 4 3 3 4 3 3 4
## [39] 4 3 3 3 3 3 3 2 5 2 1 1 2 1 1 5 5 5 1 2 1 1 1 1 5 1 5 1 1 5 5 2 5 2 5 5 5 1
## [77] 5 1
## 
## Within cluster sum of squares by cluster:
## [1] 0.4876544 1.3942328 1.3559469 1.3753804 1.1489260
##  (between_SS / total_SS =  81.9 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

set.seed(78)
km.res <- kmeans(gpa_1, 5, nstart = 25)
print(km.res)

## K-means clustering with 5 clusters of sizes 18, 18, 17, 12, 13
## 
## Cluster means:
##          obs       gpa        iq gender
## 1 0.45770202 0.8327034 0.7760417      1
## 2 0.24431818 0.7272836 0.6128472      0
## 3 0.75133690 0.5460181 0.4678309      1
## 4 0.09848485 0.5623574 0.5572917      1
## 5 0.81643357 0.6635161 0.4122596      0
## 
## Clustering vector:
##  [1] 4 4 4 4 2 4 4 4 2 4 2 2 4 2 2 2 2 2 4 2 4 4 2 1 2 1 1 1 1 2 2 2 1 1 2 1 1 2
## [39] 2 1 1 1 1 1 1 3 5 3 3 1 3 3 3 5 5 5 1 3 1 3 3 3 5 3 5 3 3 5 5 3 5 3 5 5 5 3
## [77] 5 3
## 
## Within cluster sum of squares by cluster:
## [1] 0.5985039 1.3753804 1.5690878 0.7697003 1.1489260
##  (between_SS / total_SS =  82.9 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

kmeans(gpa_1, centers = 6, iter.max = 10, nstart = 1)

## K-means clustering with 6 clusters of sizes 8, 17, 13, 10, 12, 18
## 
## Cluster means:
##          obs       gpa        iq gender
## 1 0.16335227 0.5776760 0.4414062      0
## 2 0.75133690 0.5460181 0.4678309      1
## 3 0.81643357 0.6635161 0.4122596      0
## 4 0.30909091 0.8469697 0.7500000      0
## 5 0.09848485 0.5623574 0.5572917      1
## 6 0.45770202 0.8327034 0.7760417      1
## 
## Clustering vector:
##  [1] 5 5 5 5 4 5 5 5 1 5 1 1 5 1 1 4 4 1 5 1 5 5 1 6 4 6 6 6 6 4 4 4 6 6 4 6 6 4
## [39] 4 6 6 6 6 6 6 2 3 2 2 6 2 2 2 3 3 3 6 2 6 2 2 2 3 2 3 2 2 3 3 2 3 2 3 3 3 2
## [77] 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 0.1750368 1.5690878 1.1489260 0.3603926 0.7697003 0.5985039
##  (between_SS / total_SS =  85.5 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

set.seed(78)
km.res <- kmeans(gpa_1, 6, nstart = 25)
print(km.res)

## K-means clustering with 6 clusters of sizes 6, 18, 15, 13, 14, 12
## 
## Cluster means:
##          obs       gpa        iq gender
## 1 0.71022727 0.3046595 0.3020833      1
## 2 0.24431818 0.7272836 0.6128472      0
## 3 0.41666667 0.8520626 0.7791667      1
## 4 0.81643357 0.6635161 0.4122596      0
## 5 0.75000000 0.6901480 0.6015625      1
## 6 0.09848485 0.5623574 0.5572917      1
## 
## Clustering vector:
##  [1] 6 6 6 6 2 6 6 6 2 6 2 2 6 2 2 2 2 2 6 2 6 6 2 3 2 3 3 3 3 2 2 2 3 3 2 3 3 2
## [39] 2 3 3 3 3 3 3 1 4 1 5 5 1 5 5 4 4 4 5 1 5 5 5 5 4 5 4 5 5 4 4 1 4 1 4 4 4 5
## [77] 4 5
## 
## Within cluster sum of squares by cluster:
## [1] 0.4146329 1.3753804 0.4018837 1.1489260 0.4876544 0.7697003
##  (between_SS / total_SS =  85.6 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

gpa_dist<- dist(gpa_1, method = "euclidean")
gpaCluster<- hclust(gpa_dist, method = "ward.D")
plot(gpaCluster)

clusterGroup <- cutree(gpaCluster, k = 4)
tapply(gpa_1$gpa, clusterGroup, mean)

##         1         2         3         4 
## 0.7175367 0.7272836 0.5945837 0.6635161

plot(clusterGroup)

library(cluster)
s <- silhouette(clusterGroup, gpa_dist)
plot(s)

Project 2

Alyssa Scott and em

2024-03-26

R Markdown

Including Plots