This is the solution make for me to an user of R Community blog.
He said this:
Example: A2589200014000006001 which accounts for
Project: A25
Center: 892
Patient: 00014
Disease: 00
Visit: 000 (baseline) / 701 (1 year visit)
Sample: 60
Specimen: 01
What I am trying to accomplish is to operate RQ values based on 1-year follow-up according to 701 code between follow-ups
link: https://community.rstudio.com/t/filtering-by-string-and-operating-with-rows/141899
database <- structure(list(sample_name = c("A2571900003007016001", "A2592000026007016001",
"A2592000036007016001", "A2542900002007016001", "A2552500037000006001",
"A2582200025000006001", "A2555600056007016001", "A2537700008000006001",
"A2537700009007016001", "A2564800002000006001", "A2589600037000006001",
"A2552500020007016001", "A2589600034000006001", "A2552500020000006001",
"A2571900001007016001", "A2582200032007016001", "A2582200020000006001",
"A2537700011000006001", "A2582200012007016001", "A2589800008007016001",
"A2591900088007016001", "A2541700027000006001", "A2537700030007016001",
"A2561700001000006001", "A2561900011007016001", "A2537700029000006001",
"A2537700010000006001", "A2537700032000006001", "A2552500026007016001",
"A2591800088000006001", "A2537600099007016001", "A2537700008007016001",
"A2582200021000006001", "A2537700013007016001", "A2555600055007016001",
"A2589600034000006001", "A2561900012007016001", "A2582200012007016001",
"A2555600056007016001", "A2582200019000006001", "A2543900001007016001",
"A2564800001000006001", "A2555100021000006001", "A2592000033000006001",
"A2552500027000006001", "A2537700031000006001", "A2552500026000006001",
"A2541700026007016001", "A2576000026007016001", "A2571200024000006001",
"A2552500026007016001", "A2571900005007016001", "A2556300014000006001",
"A2592000028007016001", "A2582200035000006001", "A2592000023007016001",
"A2552500034000006001", "A2555600055007016001", "A2582200019007016001",
"A2582200022000006001", "A2576000006007016001", "A2555300047000006001",
"A2589200008007016001", "A2582200035007016001", "A2592000028000006001",
"A2576000006007016001", "A2571200024007016001", "A2589400004007016001",
"A2589500045007016001", "A2592000038000006001", "A2555100024007016001",
"A2537700031007016001", "A2582200034000006001", "A2537600099007016001",
"A2537700016007016001", "A2589600038000006001", "A2554700034007016001",
"A2537700032000006001", "A2552500030000006001", "A2535500019000006001",
"A2576000006000006001", "A2571900001000006001", "A2582200030000006001",
"A2552500037000006001", "A2589200014007016001", "A2571200028007016001",
"A2561700001007016001", "A2592000024007016001", "A2582200026007016001",
"A2540900001007016001", "A2576100007000006001", "A2582200021000006001",
"A2544800001007016001", "A2576000007000006001", "A2582200020007016001",
"A2582200033000006001", "A2564800003007016001", "A2537700029000006001",
"A2555300047000006001", "A2555100024007016001", "A2555300047007016001",
"A2582200026007016001", "A2589200012007016001", "A2571900003007016001",
"A2537700015000006001", "A2589800009000006001", "A2592000029000006001",
"A2582200030000006001", "A2535500019000006001", "A2589200013007016001",
"A2574500061007016001", "A2582200020007016001", "A2591800099007016001",
"A2591900092000006001", "A2537700006007016001", "A2576000017000006001",
"A2561900013007016001", "A2576000017000006001", "A2582200012000006001",
"A2582200029000006001", "A2576000017000006001", "A2555100022007016001",
"A2582200033007016001", "A2537700029000006001", "A2582200011000006001",
"A2582200030007016001", "A2591900088000006001", "A2537700031007016001",
"A2576000006000006001", "A2555300047007016001", "A2582200019007016001",
"A2571200030007016001", "A2592000036007016001", "A2592000036000006001",
"A2589600038007016001", "A2589200008007016001", "A2571200027000006001",
"A2582200029007016001", "A2571900001000006001", "A2589200008007016001",
"A2592000036007016001", "A2589200010000006001", "A2571900007000006001",
"A2552500021000006001", "A2592000027000006001", "A2537700009000006001",
"A2582200037000006001", "A2576000014007016001", "A2571200023007016001",
"A2537700017007016001", "A2537700011000006001", "A2589600034007016001",
"A2552500027000006001", "A2552500033000006001", "A2537700011000006001",
"A2592000038000006001", "A2535500019007016001", "A2591800088007016001",
"A2556300014007016001", "A2556300014007016001", "A2592000030000006001",
"A2540900001007016001", "A2589800008000006001", "A2576000014000006001",
"A2571900008007016001", "A2561500002000006001", "A2582200032007016001",
"A2591900095000006001", "A2591800088007016001", "A2592000023000006001",
"A2564800003000006001", "A2582200012000006001", "A2552500021007016001",
"A2540900001000006001", "A2576100015000006001", "A2537700027007016001",
"A2576000007007016001", "A2537700017000006001", "A2552500034007016001",
"A2592000035000006001"), RQ = c(1.08197462642302, 1.49002169146583,
2.2626289256918, 1.59364836099312, 1.16958766405195, 1.15722281085647,
1.90395581699507, 1.16070391438372, 1.42207741058728, NA, 2.01623900584052,
1.32715174233857, 0.937137946702273, 0.379016702964743, 1.19471513515602,
1.45969471059855, 1.83062125071651, 1.19803218514883, 2.57933650131177,
0.971083101781619, 0.844400887423782, 1.21644126855065, 1.23342215585474,
1.33299065527035, 1.15535269687227, 1.01091848210074, 1.36225803464049,
1.70133432190171, 0.179451596914852, 2.44697608226373, 2.76702050165719,
0.854409740889734, 1.60250997083131, 1.02266478905362, 0.943874312681693,
0.885972198499773, 0.839537496184133, 0.99723125135207, 0.638164384414478,
1.87081469574624, 1.13053056712459, 0.726146896129157, 1.29115899476399,
0.775214073170476, 1.00393555807285, 1.43561277531882, 1.12635895425256,
1.2466011942751, 0.584118641779783, 0.770571108358409, 4.80210406012398,
0.768437590644007, 1.10114159809796, 1.9683680440676, 1, 0.851453707748945,
0.1666623346364, 1.24372425877751, 2.32462821503624, 0.523405140987288,
1.03622215405832, 1.13393137629285, 0.970634469769548, 2.42054737903178,
NA, 0.709233866728796, 1.31281776506912, 2.04864025497385, 0.493002431338097,
0.600540452021357, 0.493800430722687, 1.88905756659272, 1.17609125029097,
2.95535888117926, 0.751754411118803, 1.12375951661894, 0.976708528962229,
0.858367088615479, 1.06191380396236, 1.28075986132977, 0.868942930406155,
0.816203046150902, 1.21307324843306, 0.472701058375372, 0.951757980304535,
0.694798558820258, 5.23189407871883, 1.2938466778861, 1.0614232089498,
1.90131820245963, NA, 0.574481895667812, 2.846784593394, 1.26927088601981,
1.88121751127258, 0.610332223355819, 2.46285780193722, 1.86951839513,
0.523889093650146, 0.968618189226629, 0.880869374126979, 1.15508578455358,
0.763305945229095, 1.59955063929534, 0.799221149722627, 0.70907001783973,
1.14419530791605, 1.2593389772436, 3.37760354969513, 0.579280229789139,
1.52837652076517, 0.963039358974067, 0.35675362642947, 0.335643125695067,
0.84109072602697, 0.900001929793514, 1.09378814701508, 1.45598954899909,
1.18701102375694, 0.855792732948346, 1.54221082540794, 0.952637998043937,
0.730522189272839, 1.0707833911119, 0.96638278943318, 1.35441200570982,
0.856188284546456, 2.09604060687005, 0.578878842457425, 1.14076371586843,
2.26106113424919, 9.77886256349568, 1.8881848383005, NA, 0.57700937588178,
2.10672207190967, 0.989656656415207, 2.64695438549274, 1.29863860272989,
1.51676754537444, 1.87384289374485, 1.2637110854608, 0.72547610391709,
0.479964630519455, 1.63052135171292, 0.749672992109442, NA, 1.02526723788859,
2.24026075844427, 0.935839697736695, 0.320560077029036, 0.135246828302878,
1.03646159987396, 1.59364836099312, 0.722298293338962, 0.272879010419556,
1.96337171049351, 1.65748980897599, 1.52414483033977, 1.11265012058483,
NA, 1.47324768581648, 1.04198415065663, 0.86294070314942, 0.553376518901261,
1.7021206869689, 5.49708453933297, 0.917215940605446, 1.65748980897599,
1.32439487523883, 0.310643836121484, 1.17473336089929, 0.768437590644006,
1.4382688051156, 0.940608755636287, 0.798851916449149, 1.17175152030837,
0.603182578991095, 1.18153852202942, 0.89089871814034)), row.names = c(NA,
-180L), class = c("tbl_df", "tbl", "data.frame"))
head(database)
## sample_name RQ
## 1 A2571900003007016001 1.081975
## 2 A2592000026007016001 1.490022
## 3 A2592000036007016001 2.262629
## 4 A2542900002007016001 1.593648
## 5 A2552500037000006001 1.169588
## 6 A2582200025000006001 1.157223
The solution found was to use the ‘library(stringr)’ to divide the values of each cell according to the needs. Afterwards, it was necessary to count the digits of the numbers by dividing them as requested in the question. Because all the information were is one column.
library(stringr)
library(tidyverse)
options(digits=19) # permit show 19 digits of each number.
str(database)
## tibble [180 x 2] (S3: tbl_df/tbl/data.frame)
## $ sample_name: chr [1:180] "A2571900003007016001" "A2592000026007016001" "A2592000036007016001" "A2542900002007016001" ...
## $ RQ : num [1:180] 1.08 1.49 2.26 1.59 1.17 ...
database %>%
mutate(P_letter = str_extract(sample_name, 'A'),
number = as.numeric(parse_number(sample_name)),
Project=str_sub(number,1,2),
Center= str_sub(number,3,5),
Patient= str_sub(number,6,10),
Disease = str_sub(number,11,12),
Visit= str_sub(number,13,15),
Sample= str_sub(number,16,17), # this digits changes
Specimen=str_sub(number,18,19), # this digits changes
Sample = replace(Sample, Sample == 59, 60), # this digits changes
Sample = replace(Sample, Sample == 61, 60), # this digits changes
Specimen = replace(Specimen, Specimen == 36, 01), # this digits changes
Specimen = replace(Specimen, Specimen == 44, 01)) # this digits changes
## # A tibble: 180 x 11
## sample_name RQ P_letter number Project Center Patient Disease Visit
## <chr> <dbl> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 A257190000300701~ 1.08 A 2.57e18 25 719 00003 00 701
## 2 A259200002600701~ 1.49 A 2.59e18 25 920 00026 00 701
## 3 A259200003600701~ 2.26 A 2.59e18 25 920 00036 00 701
## 4 A254290000200701~ 1.59 A 2.54e18 25 429 00002 00 701
## 5 A255250003700000~ 1.17 A 2.55e18 25 525 00037 00 000
## 6 A258220002500000~ 1.16 A 2.58e18 25 822 00025 00 000
## 7 A255560005600701~ 1.90 A 2.56e18 25 556 00056 00 701
## 8 A253770000800000~ 1.16 A 2.54e18 25 377 00008 00 000
## 9 A253770000900701~ 1.42 A 2.54e18 25 377 00009 00 701
## 10 A256480000200000~ NA A 2.56e18 25 648 00002 00 000
## # ... with 170 more rows, and 2 more variables: Sample <chr>, Specimen <chr>