install.packages("ggstatsplot", repos = "https://cloud.r-project.org")
## Installing package into 'C:/Users/dazae/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'ggstatsplot' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\dazae\AppData\Local\Temp\RtmpYZxFNy\downloaded_packages
install.packages("plotly", repos = "https://cloud.r-project.org")
## Installing package into 'C:/Users/dazae/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'plotly' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\dazae\AppData\Local\Temp\RtmpYZxFNy\downloaded_packages
install.packages("tidyverse", repos = "https://cloud.r-project.org")
## Installing package into 'C:/Users/dazae/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'tidyverse' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\dazae\AppData\Local\Temp\RtmpYZxFNy\downloaded_packages
install.packages("mlbench", repos = "https://cloud.r-project.org")
## Installing package into 'C:/Users/dazae/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'mlbench' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\dazae\AppData\Local\Temp\RtmpYZxFNy\downloaded_packages
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggstatsplot)
## You can cite this package as:
## Patil, I. (2021). Visualizations with statistical details: The 'ggstatsplot' approach.
## Journal of Open Source Software, 6(61), 3167, doi:10.21105/joss.03167
library(plotly)
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(mlbench)
data("BreastCancer")
#install.packages("mlbench")
data("BreastCancer")
view(BreastCancer)
head(BreastCancer)
## Id Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
## 1 1000025 5 1 1 1 2
## 2 1002945 5 4 4 5 7
## 3 1015425 3 1 1 1 2
## 4 1016277 6 8 8 1 3
## 5 1017023 4 1 1 3 2
## 6 1017122 8 10 10 8 7
## Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses Class
## 1 1 3 1 1 benign
## 2 10 3 2 1 benign
## 3 2 3 1 1 benign
## 4 4 3 7 1 benign
## 5 1 3 1 1 benign
## 6 10 9 7 1 malignant
summary(BreastCancer)
## Id Cl.thickness Cell.size Cell.shape Marg.adhesion
## Length:699 1 :145 1 :384 1 :353 1 :407
## Class :character 5 :130 10 : 67 2 : 59 2 : 58
## Mode :character 3 :108 3 : 52 10 : 58 3 : 58
## 4 : 80 2 : 45 3 : 56 10 : 55
## 10 : 69 4 : 40 4 : 44 4 : 33
## 2 : 50 5 : 30 5 : 34 8 : 25
## (Other):117 (Other): 81 (Other): 95 (Other): 63
## Epith.c.size Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses
## 2 :386 1 :402 2 :166 1 :443 1 :579
## 3 : 72 10 :132 3 :165 10 : 61 2 : 35
## 4 : 48 2 : 30 1 :152 3 : 44 3 : 33
## 1 : 47 5 : 30 7 : 73 2 : 36 10 : 14
## 6 : 41 3 : 28 4 : 40 8 : 24 4 : 12
## 5 : 39 (Other): 61 5 : 34 6 : 22 7 : 9
## (Other): 66 NA's : 16 (Other): 69 (Other): 69 (Other): 17
## Class
## benign :458
## malignant:241
##
##
##
##
##
clean_data <- BreastCancer %>% drop_na() %>% mutate(across(.cols = c(Cl.thickness, Cell.size, Cell.shape, Marg.adhesion,Epith.c.size, Bare.nuclei, Bl.cromatin, Normal.nucleoli, Mitoses), .fns = ~ as.numeric(as.character(.)))) %>% mutate(Class = as.factor(Class))
ggbetweenstats(data = clean_data,x = Class,y = Cl.thickness,title = "Comparison of Clump Thickness by Tumor Class",xlab = "Tumor Type", ylab = "Clump Thickness")
# Interpretation: # The plot shows that malignant tumors have a much
higher average clump thickness (mean = 7.19) compared to benign tumors
(mean = 2.96).
ggbetweenstats(data = clean_data,x = Class,y = Epith.c.size,title = "Epithelial Cell Size by Tumor Class",xlab = "Tumor Type",ylab = "Epithelial Cell Size")
# Interpretation: # The plot compares epithelial cell size between
benign and malignant tumors. On average:
ggscatterstats(
data = clean_data,
x = Cl.thickness,
y = Cell.size,
title = "Relationship Between Clump Thickness and Cell Size",
xlab = "Clump Thickness",
ylab = "Uniformity of Cell Size"
)
## Registered S3 method overwritten by 'ggside':
## method from
## +.gg ggplot2
## `stat_xsidebin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_ysidebin()` using `bins = 30`. Pick better value with `binwidth`.
library(ggplot2)
library(plotly)
data("BreastCancer")
p_clump <- ggplot(clean_data, aes(x = Class, y = Cl.thickness, color =Class,
text = paste("Tumor Type: ", Class,"<br>",
"Clump Thickness: ", Cl.thickness))) + geom_jitter(width = 0.3, height = 0.1, alpha = 0.7) +
labs(title = "Clump Thickness by Tumor Type",
x = "Tumor Type",
y = "Clump Thickness") +
theme_minimal()
fig_clump <- ggplotly(p_clump, tooltip = "text") %>%
layout(modebar = list(visible = FALSE))
fig_clump
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.