“Not a training on specific tools”
sum(1,1)## [1] 2
sum## function (..., na.rm = FALSE) .Primitive("sum")
colSums(some_data)## some_variable another_variable
## -2.787727 150.029546
The code behind:
colSums## function (x, na.rm = FALSE, dims = 1L)
## {
## if (is.data.frame(x))
## x <- as.matrix(x)
## if (!is.array(x) || length(dn <- dim(x)) < 2L)
## stop("'x' must be an array of at least two dimensions")
## if (dims < 1L || dims > length(dn) - 1L)
## stop("invalid 'dims'")
## n <- prod(dn[id <- seq_len(dims)])
## dn <- dn[-id]
## z <- if (is.complex(x))
## .Internal(colSums(Re(x), n, prod(dn), na.rm)) + (0+1i) *
## .Internal(colSums(Im(x), n, prod(dn), na.rm))
## else .Internal(colSums(x, n, prod(dn), na.rm))
## if (length(dn) > 1L) {
## dim(z) <- dn
## dimnames(z) <- dimnames(x)[-id]
## }
## else names(z) <- dimnames(x)[[dims + 1L]]
## z
## }
## <bytecode: 0x000000000ffc2ea8>
## <environment: namespace:base>
THIS CHANGES EVERYTHING!!! WHY?
library(keras)
application_resnet50## function (include_top = TRUE, weights = "imagenet", input_tensor = NULL,
## input_shape = NULL, pooling = NULL, classes = 1000)
## {
## verify_application_prerequistes()
## keras$applications$ResNet50(include_top = include_top, weights = weights,
## input_tensor = input_tensor, input_shape = input_shape,
## pooling = pooling, classes = as.integer(classes))
## }
## <environment: namespace:keras>
a collection of functions that solve a specific problem
the classics: tidyverse, sf, lubridate, survey, magrittr, rmarkdown
install.packages("UpSetR")library("UpSetR")?UpSetR
?upsetupset(movies, nsets = 6)upset## function (data, nsets = 5, nintersects = 40, sets = NULL, keep.order = F,
## set.metadata = NULL, intersections = NULL, matrix.color = "gray23",
## main.bar.color = "gray23", mainbar.y.label = "Intersection Size",
## mainbar.y.max = NULL, sets.bar.color = "gray23", sets.x.label = "Set Size",
## point.size = 2.2, line.size = 0.7, mb.ratio = c(0.7, 0.3),
## expression = NULL, att.pos = NULL, att.color = main.bar.color,
## order.by = c("freq", "degree"), decreasing = c(T, F), show.numbers = "yes",
## number.angles = 0, group.by = "degree", cutoff = NULL, queries = NULL,
## query.legend = "none", shade.color = "gray88", shade.alpha = 0.25,
## matrix.dot.alpha = 0.5, empty.intersections = NULL, color.pal = 1,
## boxplot.summary = NULL, attribute.plots = NULL, scale.intersections = "identity",
## scale.sets = "identity", text.scale = 1, set_size.angles = 0)
## {
## startend <- FindStartEnd(data)
## first.col <- startend[1]
## last.col <- startend[2]
## if (color.pal == 1) {
## palette <- c("#1F77B4", "#FF7F0E", "#2CA02C", "#D62728",
## "#9467BD", "#8C564B", "#E377C2", "#7F7F7F", "#BCBD22",
## "#17BECF")
## }
## else {
## palette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442",
## "#0072B2", "#D55E00", "#CC79A7")
## }
## if (is.null(intersections) == F) {
## Set_names <- unique((unlist(intersections)))
## Sets_to_remove <- Remove(data, first.col, last.col, Set_names)
## New_data <- Wanted(data, Sets_to_remove)
## Num_of_set <- Number_of_sets(Set_names)
## if (keep.order == F) {
## Set_names <- order_sets(New_data, Set_names)
## }
## All_Freqs <- specific_intersections(data, first.col,
## last.col, intersections, order.by, group.by, decreasing,
## cutoff, main.bar.color, Set_names)
## }
## else if (is.null(intersections) == T) {
## Set_names <- sets
## if (is.null(Set_names) == T || length(Set_names) == 0) {
## Set_names <- FindMostFreq(data, first.col, last.col,
## nsets)
## }
## Sets_to_remove <- Remove(data, first.col, last.col, Set_names)
## New_data <- Wanted(data, Sets_to_remove)
## Num_of_set <- Number_of_sets(Set_names)
## if (keep.order == F) {
## Set_names <- order_sets(New_data, Set_names)
## }
## All_Freqs <- Counter(New_data, Num_of_set, first.col,
## Set_names, nintersects, main.bar.color, order.by,
## group.by, cutoff, empty.intersections, decreasing)
## }
## Matrix_setup <- Create_matrix(All_Freqs)
## labels <- Make_labels(Matrix_setup)
## att.x <- c()
## att.y <- c()
## if (is.null(attribute.plots) == F) {
## for (i in seq_along(attribute.plots$plots)) {
## if (length(attribute.plots$plots[[i]]$x) != 0) {
## att.x[i] <- attribute.plots$plots[[i]]$x
## }
## else if (length(attribute.plots$plots[[i]]$x) ==
## 0) {
## att.x[i] <- NA
## }
## if (length(attribute.plots$plots[[i]]$y) != 0) {
## att.y[i] <- attribute.plots$plots[[i]]$y
## }
## else if (length(attribute.plots$plots[[i]]$y) ==
## 0) {
## att.y[i] <- NA
## }
## }
## }
## BoxPlots <- NULL
## if (is.null(boxplot.summary) == F) {
## BoxData <- IntersectionBoxPlot(All_Freqs, New_data, first.col,
## Set_names)
## BoxPlots <- list()
## for (i in seq_along(boxplot.summary)) {
## BoxPlots[[i]] <- BoxPlotsPlot(BoxData, boxplot.summary[i],
## att.color)
## }
## }
## customAttDat <- NULL
## customQBar <- NULL
## Intersection <- NULL
## Element <- NULL
## legend <- NULL
## EBar_data <- NULL
## if (is.null(queries) == F) {
## custom.queries <- SeperateQueries(queries, 2, palette)
## customDat <- customQueries(New_data, custom.queries,
## Set_names)
## legend <- GuideGenerator(queries, palette)
## legend <- Make_legend(legend)
## if (is.null(att.x) == F && is.null(customDat) == F) {
## customAttDat <- CustomAttData(customDat, Set_names)
## }
## customQBar <- customQueriesBar(customDat, Set_names,
## All_Freqs, custom.queries)
## }
## if (is.null(queries) == F) {
## Intersection <- SeperateQueries(queries, 1, palette)
## Matrix_col <- intersects(QuerieInterData, Intersection,
## New_data, first.col, Num_of_set, All_Freqs, expression,
## Set_names, palette)
## Element <- SeperateQueries(queries, 1, palette)
## EBar_data <- ElemBarDat(Element, New_data, first.col,
## expression, Set_names, palette, All_Freqs)
## }
## else {
## Matrix_col <- NULL
## }
## Matrix_layout <- Create_layout(Matrix_setup, matrix.color,
## Matrix_col, matrix.dot.alpha)
## Set_sizes <- FindSetFreqs(New_data, first.col, Num_of_set,
## Set_names, keep.order)
## Bar_Q <- NULL
## if (is.null(queries) == F) {
## Bar_Q <- intersects(QuerieInterBar, Intersection, New_data,
## first.col, Num_of_set, All_Freqs, expression, Set_names,
## palette)
## }
## QInter_att_data <- NULL
## QElem_att_data <- NULL
## if ((is.null(queries) == F) & (is.null(att.x) == F)) {
## QInter_att_data <- intersects(QuerieInterAtt, Intersection,
## New_data, first.col, Num_of_set, att.x, att.y, expression,
## Set_names, palette)
## QElem_att_data <- elements(QuerieElemAtt, Element, New_data,
## first.col, expression, Set_names, att.x, att.y, palette)
## }
## AllQueryData <- combineQueriesData(QInter_att_data, QElem_att_data,
## customAttDat, att.x, att.y)
## ShadingData <- NULL
## if (is.null(set.metadata) == F) {
## ShadingData <- get_shade_groups(set.metadata, Set_names,
## Matrix_layout, shade.alpha)
## output <- Make_set_metadata_plot(set.metadata, Set_names)
## set.metadata.plots <- output[[1]]
## set.metadata <- output[[2]]
## if (is.null(ShadingData) == FALSE) {
## shade.alpha <- unique(ShadingData$alpha)
## }
## }
## if (is.null(ShadingData) == TRUE) {
## ShadingData <- MakeShading(Matrix_layout, shade.color)
## }
## Main_bar <- suppressMessages(Make_main_bar(All_Freqs, Bar_Q,
## show.numbers, mb.ratio, customQBar, number.angles, EBar_data,
## mainbar.y.label, mainbar.y.max, scale.intersections,
## text.scale, attribute.plots))
## Matrix <- Make_matrix_plot(Matrix_layout, Set_sizes, All_Freqs,
## point.size, line.size, text.scale, labels, ShadingData,
## shade.alpha)
## Sizes <- Make_size_plot(Set_sizes, sets.bar.color, mb.ratio,
## sets.x.label, scale.sets, text.scale, set_size.angles)
## Make_base_plot(Main_bar, Matrix, Sizes, labels, mb.ratio,
## att.x, att.y, New_data, expression, att.pos, first.col,
## att.color, AllQueryData, attribute.plots, legend, query.legend,
## BoxPlots, Set_names, set.metadata, set.metadata.plots)
## }
## <environment: namespace:UpSetR>
full source code: google “PACKAGENAME site:www.github.com”
example 1: This Presentation
example 2: the 2019 MSNA tool
example 3: issues (ggplot2)
remotes::install_github("ellieallien/cleaninginspectoR",
build_opts = c()
)build_opts=c(): makes sure to also download the manual (see ?remotes::install_github)library("cleaninginspectoR")Overview:
?cleaninginspectoRManuals / Help Documents
browseVignettes("cleaninginspectoR")List all functions
ls("package:cleaninginspectoR")Type Packagename:: and hit tab to see available functions
| contents |
|---|
| find_duplicates |
| find_duplicates_uuid |
| find_other_responses |
| find_outliers |
| inspect_all |
Use!
find_outliers(some_data)## [1] index value variable has_issue issue_type
## <0 rows> (or 0-length row.names)
… and feedback?
solution: read.csv("mydata.csv") %>% data_cleaning_checks %>% write.csv
Tools need to..
Field teams to lead on
best way?
adding: brainstorming session!
- outcomes: list of problems + solution proposals/ideas + priorities
- no big time comittments, but clear plan forward
adding: session on structuring self-contained projects
Mitigation strategies 1/2:
Mitigation strategies 2/2: Adjusting the critical P-Value - Bonferroni correction (p_critical_eff = p_critical * num_tests) - False Discovery Rate - **problem with this???*
=> “Analysis Case” => Appropriate statistics, visualisations, hypothesis tests
.. Good data = more accurate results but more importantly .. Good data = results you TRUST
Enable people to think more about why they do the analysis and what exactly they want to find out and report on, instead of having to know about or work on how to choose and apply the appropriate summary statistics, hypothesis tests and visualisations.
If each combination of data types, hypothesis types and sampling strategy can be associated uniquely with an appropriate summary statistic, hypothesis test and visualisation we can…