What I am looking for is a fucntion doint this - working out cases missing from a table.
library(tidyverse)
## -- Attaching packages --------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.8.0 v stringr 1.3.0
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
set.seed(1)
bb <-data.frame(X = sample(c("a","b"), 10, replace = TRUE ),
Y = sample(c("x","y","z"), 10, replace=TRUE ),
A = sample(c("u","v","w"),10, replace=TRUE ),
B = sample(c(1,2),10, replace=TRUE),
Intensity = rnorm(10),
SN = rnorm(10), stringsAsFactors = FALSE
)
x <- dcast(bb, X + Y ~ A + B, fun.aggregate = mean , value.var = "Intensity")
mx <- melt(x)
## Using X, Y as id variables
separate(mx, variable, c("A","B"), sep="_")
## X Y A B value
## 1 a x u 1 NaN
## 2 a z u 1 NaN
## 3 b y u 1 -1.98935170
## 4 b z u 1 NaN
## 5 a x u 2 0.78213630
## 6 a z u 2 0.61982575
## 7 b y u 2 NaN
## 8 b z u 2 -0.15579551
## 9 a x v 1 NaN
## 10 a z v 1 0.41794156
## 11 b y v 1 NaN
## 12 b z v 1 -0.69809370
## 13 a x v 2 NaN
## 14 a z v 2 NaN
## 15 b y v 2 -0.05612874
## 16 b z v 2 NaN
## 17 a x w 1 0.91897737
## 18 a z w 1 NaN
## 19 b y w 1 NaN
## 20 b z w 1 NaN
## 21 a x w 2 NaN
## 22 a z w 2 NaN
## 23 b y w 2 -0.47815006
## 24 b z w 2 NaN
But this implementation has a few drawbacks. a) should work with vectors of row IDs, and column IDs. i.e. c(“X”,“Y”), c(“A”,“B”) b) dcast merges the column IDs with “_" but variables A and B in practice contain “_" so it is difficult to seperate back. c) other “dependent” columns i.e. SN are dropeed
rowKeys = c("X","Y")
colKeys = c("A","B")
bb1<-bb %>% unite("rowKey",rowKeys, sep="~",remove = TRUE) %>% unite("colKey" ,colKeys, sep="~", remove=TRUE)
bb2 <- bb1 %>% select(c("rowKey","colKey")) %>% distinct() %>% mutate(auxilary = 1:nrow(.)) %>% spread("colKey",auxilary)
bb3 <- bb2 %>% gather(key="colKey", "auxilary", -rowKey)
bb4 <- bb3 %>% separate("colKey", colKeys) %>% separate("rowKey", rowKeys) %>% select(-auxilary)
sapply(bb, class)
## X Y A B Intensity SN
## "character" "character" "character" "numeric" "numeric" "numeric"
class(bb$B) <- "numeric"
sapply(bb4, class)
## X Y A B
## "character" "character" "character" "character"
class(bb4$B) <- "numeric"
left_join(bb4, bb)
## Joining, by = c("X", "Y", "A", "B")
## X Y A B Intensity SN
## 1 a x u 1 NA NA
## 2 a z u 1 NA NA
## 3 b y u 1 -1.98935170 -0.05380504
## 4 b z u 1 NA NA
## 5 a x u 2 0.78213630 -0.10278773
## 6 a z u 2 0.61982575 -1.37705956
## 7 b y u 2 NA NA
## 8 b z u 2 -0.15579551 -0.39428995
## 9 a x v 1 NA NA
## 10 a z v 1 0.41794156 0.76317575
## 11 b y v 1 NA NA
## 12 b z v 1 0.07456498 0.38767161
## 13 b z v 1 -1.47075238 -0.05931340
## 14 a x v 2 NA NA
## 15 a z v 2 NA NA
## 16 b y v 2 -0.05612874 -0.41499456
## 17 b z v 2 NA NA
## 18 a x w 1 0.91897737 1.35867955
## 19 a z w 1 NA NA
## 20 b y w 1 NA NA
## 21 b z w 1 NA NA
## 22 a x w 2 NA NA
## 23 a z w 2 NA NA
## 24 b y w 2 -0.47815006 1.10002537
## 25 b z w 2 NA NA
head(bb1)
## rowKey colKey Intensity SN
## 1 a~x w~1 0.91897737 1.35867955
## 2 a~x u~2 0.78213630 -0.10278773
## 3 b~z v~1 0.07456498 0.38767161
## 4 b~y u~1 -1.98935170 -0.05380504
## 5 a~z u~2 0.61982575 -1.37705956
## 6 b~y v~2 -0.05612874 -0.41499456
bb1<-bb %>% unite("rowKey",rowKeys, sep="~",remove = FALSE) %>% unite("colKey" ,colKeys, sep="~", remove=FALSE)
Drawbacks:
tidyr::complete(bb, nesting(A,B) ,nesting(X,Y))
## # A tibble: 25 x 6
## A B X Y Intensity SN
## <chr> <dbl> <chr> <chr> <dbl> <dbl>
## 1 u 1. a x NA NA
## 2 u 1. a z NA NA
## 3 u 1. b y -1.99 -0.0538
## 4 u 1. b z NA NA
## 5 u 2. a x 0.782 -0.103
## 6 u 2. a z 0.620 -1.38
## 7 u 2. b y NA NA
## 8 u 2. b z -0.156 -0.394
## 9 v 1. a x NA NA
## 10 v 1. a z 0.418 0.763
## # ... with 15 more rows