ggpubr is a package that offers a sophisticated interface to produce nice histograms and violin plots for numeric variables and easily ads significance values to the plots. oetteR::f_plot_hist takes a data_ls list and plots a histogram or a bar plot of frequencies of all variables but the histograms are lacking some of ggpubr features, which are not so easily replicated. Here we want to try whether we can modify oetteR::fplot_hist to use most of ggpubr features
suppressPackageStartupMessages( require(oetteR) )
suppressPackageStartupMessages( require(ggpubr) )
suppressPackageStartupMessages( require(tidyverse) )
data_ls = ISLR::Auto %>%
f_clean_data()
## [1] "Number of excluded observations: 0"
ggdensity( data = data_ls$data
, x = 'displacement'
, y = '..count..'
, color = 'cylinders'
, fill = 'cylinders'
, palette = f_plot_col_vector74()
, rug = T
, add = 'mean'
)
f_plot_hist( data_ls
, variable = 'displacement'
, group = 'cylinders'
, y_axis = 'count'
, graph_type = 'line'
, rug = T )
gghistogram( data = data_ls$data
, x = 'displacement'
, y = '..count..'
, color = 'cylinders'
, fill = 'cylinders'
, palette = f_plot_col_vector74()
, rug = T
, add = 'mean'
)
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
f_plot_hist( data_ls
, variable = 'displacement'
, group = 'cylinders'
, y_axis = 'count'
, graph_type = 'bar'
, rug = T )
v1 = ggviolin( data = data_ls$data
, x = 'cylinders'
, y = 'displacement'
#, color = 'cylinders'
, fill = 'cylinders'
, palette = f_plot_col_vector74()
, rug = T
, add = 'boxplot'
, add.params = list( fill = 'white')
)
v1
v2 = f_plot_hist( data_ls
, variable = 'displacement'
, group = 'cylinders'
, y_axis = 'count'
, graph_type = 'violin'
, rug = T )
v2
lvl = levels(data_ls$data$cylinders)
compare = expand.grid( a = lvl
, b = lvl
, stringsAsFactors = F) %>%
filter( a != b) %>%
mutate( comb = map2( a, b, function(x,y) sort( c(x,y) ) )
, comb = map( comb, paste0, collapse = ',' )
) %>%
unnest( comb ) %>%
group_by( comb ) %>%
summarise() %>%
mutate( comb = stringr::str_split(comb, ',') ) %>%
.$comb
v1_1 = v1 +
stat_compare_means(comparisons = compare, label = "p.signif")
v1_1
ggplot( data_ls$data, aes( cylinders
, displacement
, fill = cylinders) ) +
geom_violin() +
stat_compare_means(comparisons = compare, label = "p.signif")
p = f_plot_pretty_points( df = data_ls$data
, col_y = 'displacement'
, col_x = 'weight'
, col_facet = 'origin') +
geom_smooth() +
ggpubr::stat_cor()
## [1] "Number of excluded observations: 0"
p
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
Make random grouping
data_ls$data$rnd = as.factor( sample( LETTERS[1:5], nrow(data_ls$data), replace = T ) )
filter all pairs that have a t_test p_val over 0.05
f_compare_means = function(comb, data, col_var, col_group){
sym_group = as.name(col_group)
lvl1 = stringr::str_split(comb, ',')[[1]][1]
lvl2 = stringr::str_split(comb, ',')[[1]][2]
x = data %>%
filter( rlang::UQ(sym_group) == lvl1 ) %>%
.[[col_var]]
y = data %>%
filter( rlang::UQ(sym_group) == lvl2 ) %>%
.[[col_var]]
t_test = t.test(x,y)
return( t_test$p.value )
}
lvl = levels(data_ls$data$rnd)
compare_filt = expand.grid( a = lvl
, b = lvl
, stringsAsFactors = F) %>%
filter( a != b) %>%
mutate( comb = map2( a, b, function(x,y) sort( c(x,y) ) )
, comb = map( comb, paste0, collapse = ',' )
) %>%
unnest( comb ) %>%
group_by( comb ) %>%
summarise() %>%
mutate( p_val = map_dbl( comb, f_compare_means
, data = data_ls$data
, col_var = 'displacement'
, col_group = 'rnd' )
, comb = stringr::str_split(comb, ',') ) %>%
filter( p_val <= 0.05 ) %>%
.$comb
ggviolin( data = data_ls$data
, x = 'rnd'
, y = 'displacement'
, color = 'rnd'
, fill = 'rnd'
, palette = f_plot_col_vector74()
, rug = T
, add = 'boxplot'
, add.params = list( fill = 'white')
) +
stat_compare_means(comparisons = compare, label = "p.signif")
## Warning: Computation failed in `stat_signif()`:
## missing value where TRUE/FALSE needed
ggviolin( data = data_ls$data
, x = 'rnd'
, y = 'displacement'
, color = 'rnd'
, fill = 'rnd'
, palette = f_plot_col_vector74()
, rug = T
, add = 'boxplot'
, add.params = list( fill = 'white')
) +
stat_compare_means(comparisons = compare_filt, label = "p.signif")
v1_1 %>%
plotly::ggplotly()
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomSignif() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
v1 %>%
plotly::ggplotly()
v2 %>%
plotly::ggplotly()
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomSignif() has yet to be implemented in plotly.
## If you'd like to see this geom implemented,
## Please open an issue with your example code at
## https://github.com/ropensci/plotly/issues
ggdensity( data = data_ls$data
, x = 'displacement'
, y = '..count..'
, color = 'cylinders'
, fill = 'cylinders'
, palette = f_plot_col_vector74()
, rug = T
, add = 'mean'
) %>%
plotly::ggplotly()
p %>%
plotly::ggplotly()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
ggpubr is a great to add p values to any ggplot object and the histograms do look better than the ones from my default function. Unfortunately the p values dont show in plotly. ggpubr also does not plot frequency distributions of factor variables and does not offer a stat_ function to add a chi square p value.