1 Functions

2 Summary statistics

summ.df = function(df){
  df = df[c(sapply(df, is.numeric))]
  summ = psych::describe(df, na.rm = TRUE, skew=FALSE, 
                         quant = c(0.01,0.05,0.10,0.25,0.50,0.75,0.90,0.95,0.99))
  rnames = as.data.frame(row.names(summ))
  names(rnames) = c("var.name")
  summ = cbind(rnames,summ)
  rownames(summ) = NULL
  return(summ)
}

2.1 Test runs and checks

3 Inspect numeric variables

inspect = function(df,opt){
  df = df[c(sapply(df, is.numeric))]
  datalist = list()
  for(i in seq_along(names(df))){
    var = names(df)[i]
    psts = sum(ifelse(df[[i]]>0 & df[[i]]!=Inf, 1, 0), na.rm = TRUE)
    negs = sum(ifelse(df[[i]]<0 & df[[i]]!=-Inf, 1, 0), na.rm = TRUE)
    zeros = sum(ifelse(df[[i]]==0, 1, 0), na.rm = TRUE)
    pst.Inf = sum(ifelse(df[[i]]==Inf, 1, 0), na.rm = TRUE)
    neg.Inf = sum(ifelse(df[[i]]==-Inf, 1, 0), na.rm = TRUE)
    nans = sum(ifelse(is.nan(df[[i]])==TRUE, 1, 0), na.rm = TRUE)
    nas = sum(ifelse(is.na(df[[i]])==TRUE, 1, 0), na.rm = TRUE)
    only.nas = nas - nans
    non.finite = only.nas+nans+pst.Inf+neg.Inf
    finite = psts+ negs +zeros
    n = finite+non.finite
    datalist[[i]] = data.frame(var=var, n=n, psts=psts, negs=negs,
                               zeros=zeros,finite=finite,
                               non.finite=non.finite, only.nas=only.nas,
                               nans=nans, pst.Inf=pst.Inf,
                               neg.Inf=neg.Inf,
                               p.psts=psts/n, p.negs=negs/n,
                               p.zeros=zeros/n,p.finite=finite/n,
                               p.non.finite=non.finite/n, p.only.nas=only.nas/n,
                               p.nans=nans/n, p.pst.Inf=pst.Inf/n,
                               p.neg.Inf=neg.Inf/n)
    rm(var,n,psts,negs,zeros,finite,non.finite,
       only.nas,nans,pst.Inf,neg.Inf)
  }
  big_data = do.call(rbind, datalist)
  pr = grep("^p\\.",names(big_data),value=TRUE)
  non.pr = base::setdiff(names(big_data), pr)
  if (opt==1) {
    big_data = big_data[c("var", "n", pr )]
  } else if (opt==2) {
    big_data = big_data[non.pr]
  } else { 
    big_data = big_data}
  return(big_data)
}

3.1 Test runs and checks

4 Adding * to variables

sign.clean = function(df,vr,pval){
  out = data.frame(df, vr = df[[vr]], pval = df[[pval]]) 
  out = out %>% 
    mutate(sign = case_when(pval<=0.01 ~ "*",
                            pval>0.01 & pval<=0.05 ~ "#",
                            pval>0.05 & pval<=0.10 ~ "~",
                            pval>0.10 ~ " "),
           vr = format(round(vr,3), nsmall = 3),
           vr = paste(as.character(vr), sign, sep = "")) %>% 
    select(-c(pval, sign)) %>% 
    as.data.frame()
  return(out)
}

5 Formatting data

frmt.clean = function(df,n){
  df1 = df[!c(sapply(df,is.numeric))]
  df2 = df[c(sapply(df,is.numeric))]
  for(i in seq_along(names(df2))){
    df2[[i]] = format(round(df2[[i]],digits=n), nsmall = n)
  }
  df = cbind(df1, df2)
  return(df)
  rm(df1,df2,i,n)
}
# ## Test runs
# X = win.df(x,.01); Y = win.df(x,.01)
# tmp = summ.df(x)
# tmp = format.clean(x,4)
# tmp = sign.clean(y,vr="x1",pval="x2")
# tmp1 = diff.tests.df(x,y,0)
# tmp2 = group.diff.tests.dfs(x,y,grp ="grp", 0)
# tmp3 = as.data.frame(bind_rows(tmp1,tmp2))
# tmp3 = tmp3 %>%
#        arrange(var.name, grp.name) %>%
#        mutate(grp.name = grp.name)