Objetivo

Carregar o dataset e filtrar apenas as observacoes da coluna COMMAND que estao completamente entre colchetes, usando expressao regular com a funcao grep().

Carregando o dataset

ds <- read.csv(
  "https://www.dropbox.com/s/kiut6raev4z1e2u/compdataset_V2.csv?dl=1"
)

cat("Dimensoes do dataset:", nrow(ds), "linhas e", ncol(ds), "colunas\n")
## Dimensoes do dataset: 140 linhas e 6 colunas
head(ds)
##     USER  PID  VSZ  RSS X.MEM
## 1   root  397 2896 1912   0.2
## 2 nobody  342 4320 2156   0.2
## 3   root  320 7944 2348   0.2
## 4   root  449 5620 2384   0.2
## 5     pi  557 8576 2648   0.2
## 6     pi 4063 9944 2680   0.2
##                                                                                                                COMMAND
## 1                                                                                                     /sbin/dhcpcd-q-b
## 2 /usr/sbin/thd--triggers/etc/triggerhappy/triggers.d/--socket/run/thd.socket--usernobody--deviceglob/dev/input/event*
## 3                                                                                                     /usr/sbin/cron-f
## 4                                                                                                         /bin/login-f
## 5                                                                                                                -bash
## 6                                                                        ps-eouser,pid,vsz,rss,%mem,command--sort=%mem

Extraindo a coluna COMMAND

x <- ds$COMMAND

cat("Total de observacoes em COMMAND:", length(x), "\n\n")
## Total de observacoes em COMMAND: 140
cat("Primeiras 10 observacoes:\n")
## Primeiras 10 observacoes:
print(head(x, 10))
##  [1] "/sbin/dhcpcd-q-b"                                                                                                    
##  [2] "/usr/sbin/thd--triggers/etc/triggerhappy/triggers.d/--socket/run/thd.socket--usernobody--deviceglob/dev/input/event*"
##  [3] "/usr/sbin/cron-f"                                                                                                    
##  [4] "/bin/login-f"                                                                                                        
##  [5] "-bash"                                                                                                               
##  [6] "ps-eouser,pid,vsz,rss,%mem,command--sort=%mem"                                                                       
##  [7] "/lib/systemd/systemd-hostnamed"                                                                                      
##  [8] "[kthreadd]"                                                                                                          
##  [9] "[rcu_gp]"                                                                                                            
## [10] "[rcu_par_gp]"

Aplicando a expressao regular

A expressao ^\[.*\]$ garante que o texto esteja completamente entre colchetes:

resultado <- grep(
  pattern = "^\\[.*\\]$",
  x,
  value = TRUE
)

cat("Total de observacoes entre colchetes:", length(resultado), "\n\n")
## Total de observacoes entre colchetes: 80
print(resultado)
##  [1] "[kthreadd]"                          
##  [2] "[rcu_gp]"                            
##  [3] "[rcu_par_gp]"                        
##  [4] "[mm_percpu_wq]"                      
##  [5] "[ksoftirqd/2]"                       
##  [6] "[cpuhp/3]"                           
##  [7] "[migration/3]"                       
##  [8] "[ksoftirqd/3]"                       
##  [9] "[kdevtmpfs]"                         
## [10] "[netns]"                             
## [11] "[khungtaskd]"                        
## [12] "[oom_reaper]"                        
## [13] "[writeback]"                         
## [14] "[kcompactd0]"                        
## [15] "[crypto]"                            
## [16] "[kblockd]"                           
## [17] "[watchdogd]"                         
## [18] "[rpciod]"                            
## [19] "[kworker/u9:0-hci0]"                 
## [20] "[xprtiod]"                           
## [21] "[kswapd0]"                           
## [22] "[nfsiod]"                            
## [23] "[kthrotld]"                          
## [24] "[iscsi_eh]"                          
## [25] "[dwc_otg]"                           
## [26] "[DWCNotificatio]"                    
## [27] "[vchiq-slot/0]"                      
## [28] "[vchiq-recy/0]"                      
## [29] "[vchiq-sync/0]"                      
## [30] "[vchiq-keep/0]"                      
## [31] "[SMIO]"                              
## [32] "[irq/86-mmc1]"                       
## [33] "[mmc_complete]"                      
## [34] "[kworker/2:1H+kblockd]"              
## [35] "[kworker/1:1H-kblockd]"              
## [36] "[jbd2/mmcblk0p7-]"                   
## [37] "[ext4-rsv-conver]"                   
## [38] "[ipv6_addrconf]"                     
## [39] "[kworker/3:1H-kblockd]"              
## [40] "[kworker/0:2H-mmc_complete]"         
## [41] "[SMIO]"                              
## [42] "[mmal-vchiq]"                        
## [43] "[mmal-vchiq]"                        
## [44] "[mmal-vchiq]"                        
## [45] "[cfg80211]"                          
## [46] "[brcmf_wq/mmc1:0]"                   
## [47] "[brcmf_wdog/mmc1]"                   
## [48] "[kworker/0:2-events]"                
## [49] "[kworker/u9:1-hci0]"                 
## [50] "[krfcommd]"                          
## [51] "[kworker/1:0-mm_percpu_wq]"          
## [52] "[kworker/3:2-events]"                
## [53] "[kworker/0:0]"                       
## [54] "[kworker/3:0]"                       
## [55] "[kworker/1:0H]"                      
## [56] "[kworker/u8:0-cfg80211]"             
## [57] "[kworker/3:0H]"                      
## [58] "[kworker/2:2-events]"                
## [59] "[kworker/1:1-mm_percpu_wq]"          
## [60] "[kworker/1:2H]"                      
## [61] "[kworker/3:2H]"                      
## [62] "[kworker/2:2H]"                      
## [63] "[kworker/0:1H]"                      
## [64] "[kworker/u8:2+flush-179:0]"          
## [65] "[kworker/2:0-events]"                
## [66] "[kworker/2:1-events]"                
## [67] "[kworker/1:2-events_power_efficient]"
## [68] "[kworker/2:0H]"                      
## [69] "[kworker/0:0H]"                      
## [70] "[kworker/u8:1]"                      
## [71] "[ksoftirqd/0]"                       
## [72] "[rcu_sched]"                         
## [73] "[rcu_bh]"                            
## [74] "[migration/0]"                       
## [75] "[cpuhp/0]"                           
## [76] "[cpuhp/1]"                           
## [77] "[migration/1]"                       
## [78] "[ksoftirqd/1]"                       
## [79] "[cpuhp/2]"                           
## [80] "[migration/2]"

Verificacao

cat("Exemplos que NAO devem aparecer (texto fora dos colchetes):\n")
## Exemplos que NAO devem aparecer (texto fora dos colchetes):
nao_colchetes <- grep(
  pattern = "^\\[.*\\]$",
  x,
  value = TRUE,
  invert = TRUE
)
print(head(nao_colchetes, 5))
## [1] "/sbin/dhcpcd-q-b"                                                                                                    
## [2] "/usr/sbin/thd--triggers/etc/triggerhappy/triggers.d/--socket/run/thd.socket--usernobody--deviceglob/dev/input/event*"
## [3] "/usr/sbin/cron-f"                                                                                                    
## [4] "/bin/login-f"                                                                                                        
## [5] "-bash"