Description

错误数据是从数据库中导出,记录测试过程中所发生的错误,目的是想知道各种错误的统计情况

Data

library(iotools)
errdata<-input.file("G:/err-db_a.log")
head(errdata)
##      [,1]                                                                                                                                                                                                                                                                                          
## [1,] "(('ba347e97-1549-49c6-bfde-989be0eb1180',"                                                                                                                                                                                                                                                   
## [2,] "  'Build of instance ba347e97-1549-49c6-bfde-989be0eb1180 was re-scheduled: invalid argument: could not find capabilities for domaintype=kvm '),"                                                                                                                                            
## [3,] " ('31ed1225-7850-4689-8642-e02657533644',"                                                                                                                                                                                                                                                   
## [4,] "  'unsupported operand type(s) for *: \\'NoneType\\' and \\'int\\'\\nTraceback (most recent call last):\\n\\n  File \"/usr/lib/python2.7/site-packages/oslo_messaging/rpc/dispatcher.py\", line 142, in _dispatch_and_reply\\n    executor_callback))\\n\\n  File \"/usr/lib/python2.7/si'),"
## [5,] " ('601d007c-277f-466c-819f-8e07fbcd25df',"                                                                                                                                                                                                                                                   
## [6,] "  'Build of instance 601d007c-277f-466c-819f-8e07fbcd25df was re-scheduled: Insufficient compute resources: Free CPUs 0.00 VCPUs < requested 1 VCPUs.'),"
tail(errdata)
##          [,1]                                                                                           
## [53922,] " ('61ec1e23-389b-41d3-a2bc-ab2af57a3e6a',"                                                    
## [53923,] "  \"(pymysql.err.OperationalError) (2013, 'Lost connection to MySQL server during query')\"),"
## [53924,] " ('b645ef17-b0d4-4b8f-b025-68f254c1f0b5',"                                                    
## [53925,] "  \"(pymysql.err.OperationalError) (2013, 'Lost connection to MySQL server during query')\"),"
## [53926,] " ('786c3874-fa0a-4295-96ec-043820dd5e37',"                                                    
## [53927,] "  \"(pymysql.err.OperationalError) (2013, 'Lost connection to MySQL server during query')\"))"
length(errdata)
## [1] 53927

数据由于经过python处理,因此数据格式变成了“((‘instance_id’,‘错误内容’),…)”

第一步、需要对错误做分类,而instance_id字段对分类没用,因此,先把instance_id字段从数据中去掉:

library(tm)
## Loading required package: NLP
library(SnowballC)
err.corp<-Corpus(VectorSource(errdata))
pattern.instance<-'[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}'
err.remove.instance<-tm_map(err.corp,function(x){gsub(pattern.instance,'',x)})
lapply(1:5,function(i){cat(as.character(err.remove.instance[[i]]),'\n')})->rubish;rm(rubish)
## (('', 
##   'Build of instance  was re-scheduled: invalid argument: could not find capabilities for domaintype=kvm '), 
##  ('', 
##   'unsupported operand type(s) for *: \'NoneType\' and \'int\'\nTraceback (most recent call last):\n\n  File "/usr/lib/python2.7/site-packages/oslo_messaging/rpc/dispatcher.py", line 142, in _dispatch_and_reply\n    executor_callback))\n\n  File "/usr/lib/python2.7/si'), 
##  ('',

把数据中的标点符号去掉,这里可能会损失一些信息(坑1)

err.remove.punc<-tm_map(err.remove.instance,removePunctuation)
lapply(1:5,function(i){cat(as.character(err.remove.punc[[i]]),'\n')})->rubish;rm(rubish)
##  
##   Build of instance  was rescheduled invalid argument could not find capabilities for domaintypekvm  
##   
##   unsupported operand types for  NoneType and intnTraceback most recent call lastnn  File usrlibpython27sitepackagesoslomessagingrpcdispatcherpy line 142 in dispatchandreplyn    executorcallbacknn  File usrlibpython27si 
## 

只包含instance_id和标点符号的数据就只剩空格或空了,同时也发现源码路径字段被拼接在了一起:

"/usr/lib/python2.7/site-packages/oslo_messaging/rpc/dispatcher.py" -> “usrlibpython27sitepackagesoslomessagingrpcdispatcherpy”

另外,数字也有可能会造成干扰,将数据中的数字去掉:

err.remove.num<-tm_map(err.remove.punc,removeNumbers)
err.remove.white<-tm_map(err.remove.num,stripWhitespace)
summary(as.factor(unlist(err.remove.white)))
##                                                                                                                                                                                                                  
##                                                                                                                                                                                                                1 
##                                                                                                                                                                                                                  
##                                                                                                                                                                                                            26930 
##                                                                                                                                                                                          Authentication required 
##                                                                                                                                                                                                                1 
##                                                                                                                                       Build of instance aborted Could not clean up failed build not rescheduling 
##                                                                                                                                                                                                            14510 
##                                                                                                                                       Build of instance aborted Failed to allocate the networks not rescheduling 
##                                                                                                                                                                                                                1 
##                                                                                Build of instance aborted Failed to allocate the networks with error No fixed IP addresses available for network not rescheduling 
##                                                                                                                                                                                                              383 
##                                                                                                                                                          Build of instance aborted Failure prepping block device 
##                                                                                                                                                                                                                4 
##                                                                                                                                                        Build of instance was rescheduled Authentication required 
##                                                                                                                                                                                                               26 
##                                                                                                         Build of instance was rescheduled Binding failed for port please check neutron logs for more information 
##                                                                                                                                                                                                              290 
##                                                                                                                                                Build of instance was rescheduled Compute host could not be found 
##                                                                                                                                                                                                                9 
##                                                                                                                       Build of instance was rescheduled compute is not a valid node managed by this compute host 
##                                                                                                                                                                                                              154 
##                                                Build of instance was rescheduled Connection to glance host failed Service Unavailable The server is currently unavailable Please try again at a later time HTTP  
##                                                                                                                                                                                                               40 
##                                                                                                          Build of instance was rescheduled htmlbodyh Gateway TimeouthnThe server didnt respond in timenbodyhtmln 
##                                                                                                                                                                                                             1140 
##                                                                                         Build of instance was rescheduled htmlbodyh Service UnavailablehnNo server is available to handle this requestnbodyhtmln 
##                                                                                                                                                                                                              175 
##                                                                                                                                                       Build of instance was rescheduled Image could not be found 
##                                                                                                                                                                                                                1 
##                                                                                                                 Build of instance was rescheduled Insufficient compute resources Free CPUs VCPUs requested VCPUs 
##                                                                                                                                                                                                             2257 
##                                                                                                                Build of instance was rescheduled invalid argument could not find capabilities for domaintypekvm  
##                                                                                                                                                                                                              358 
##                                                                                                                                                      Build of instance was rescheduled Not authorized for image  
##                                                                                                                                                                                                                9 
##                                                                                                                  Build of instance was rescheduled operation failed domain instancebde already exists with uuid  
##                                                                                                                                                                                                                1 
##                                                                                                                    Build of instance was rescheduled operation failed domain instancec already exists with uuid  
##                                                                                                                                                                                                                1 
##                                                                                                                                                                            Build of instance was rescheduled pid 
##                                                                                                                                                                                                                2 
##                                                                                                  Build of instance was rescheduled pymysqlerrOperationalError Cant connect to MySQL server on Errno ECONNREFUSED 
##                                                                                                                                                                                                                3 
##                                                                                                             Build of instance was rescheduled Request Failed internal server error while processing your request 
##                                                                                                                                                                                                               90 
##                                                                                                                                                  Build of instance was rescheduled Security group does not exist 
##                                                                                                                                                                                                                1 
##                                                                                   Build of instance was rescheduled Service UnavailablennThe server is currently unavailable Please try again at a later timenn  
##                                                                                                                                                                                                              138 
##                                                                                                                          Build of instance was rescheduled Unable to establish connection to httpvextensionsjson 
##                                                                                                                                                                                                               40 
##                                                                                                                          Build of instance was rescheduled Unable to establish connection to httpvnetworksjsonid 
##                                                                                                                                                                                                               14 
##                                                                                                                               Build of instance was rescheduled Unable to establish connection to httpvportsjson 
##                                                                                                                                                                                                                7 
##                                                                                                  Build of instance was rescheduled Unable to establish connection to httpvsecuritygroupsjsontenantidfbacedbbaaae 
##                                                                                                                                                                                                               28 
##                                                                                                                                                                                  Compute host could not be found 
##                                                                                                                                                                                                                2 
##  global name hoststate is not definednTraceback most recent call lastnn File usrlibpythonsitepackagesoslomessagingrpcdispatcherpy line in dispatchandreplyn executorcallbacknn File usrlibpythonsitepackagesoslo 
##                                                                                                                                                                                                               11 
##                                                                                                                                            htmlbodyh Gateway TimeouthnThe server didnt respond in timenbodyhtmln 
##                                                                                                                                                                                                             3058 
##                                                                                                                          htmlbodyh Request TimeouthnYour browser didnt send a complete request in timenbodyhtmln 
##                                                                                                                                                                                                              308 
##                                                                                                                           htmlbodyh Service UnavailablehnNo server is available to handle this requestnbodyhtmln 
##                                                                                                                                                                                                                1 
##                                                                                                                                                     No valid host was found There are not enough hosts available 
##                                                                                                                                                                                                             3064 
##                                                                                                                                                           Object action destroy failed because already destroyed 
##                                                                                                                                                                                                                2 
##                                                                                                                                                                                                              pid 
##                                                                                                                                                                                                               58 
##              pymysqlerrInternalError uLock wait timeout exceeded try restarting transaction SQL uUPDATE instanceinfocaches SET updatedats networkinfos WHERE instanceinfocachesid s parameters datetimedatetime  
##                                                                                                                                                                                                               17 
##                                                                                                                   pymysqlerrInternalError uUser nova already has more than maxuserconnections active connections 
##                                                                                                                                                                                                              416 
##                                                                                                                                    pymysqlerrOperationalError Cant connect to MySQL server on Errno ECONNREFUSED 
##                                                                                                                                                                                                                5 
##                                                                                                                                          pymysqlerrOperationalError Lost connection to MySQL server during query 
##                                                                                                                                                                                                              246 
##                                                                                                                                               Request Failed internal server error while processing your request 
##                                                                                                                                                                                                                2 
##                                                                                                                                                                                            Request Timeout HTTP  
##                                                                                                                                                                                                                6 
##                                                                                                                                                            Unable to establish connection to httpvextensionsjson 
##                                                                                                                                                                                                                3 
##                                                                                                                                                                 Unable to establish connection to httpvportsjson 
##                                                                                                                                                                                                               44 
##                                                                                                                                                         Unable to establish connection to httpvportsjsondeviceid 
##                                                                                                                                                                                                                4 
##                                                                                                                                                                    Unable to establish connection to httpvtokens 
##                                                                                                                                                                                                                2 
##      unsupported operand types for NoneType and intnTraceback most recent call lastnn File usrlibpythonsitepackagesoslomessagingrpcdispatcherpy line in dispatchandreplyn executorcallbacknn File usrlibpythonsi 
##                                                                                                                                                                                                               63 
##             upidnTraceback most recent call lastnn File usrlibpythonsitepackagesoslomessagingrpcdispatcherpy line in dispatchandreplyn executorcallbacknn File usrlibpythonsitepackagesoslomessagingrpcdispatche 
##                                                                                                                                                                                                                1

可以看到有两种特殊情况,空格和空,这两类实际上是一类,就是instance_id,被我们处理完后就变成空的了,因此,得把这两类去掉

err.summary<-summary(as.factor(unlist(err.remove.white)))[-c(1:2)]
err.summary<-err.summary[order(err.summary,decreasing = T)]
barplot(err.summary,horiz = T,col = 'cyan',xlab = 'err_number',ylab = 'err_type',names.arg = F)->q;text(0,q,names(err.summary),pos = 4,cex = .5)

相关代码

library(iotools)
errdata<-input.file("G:/err-db_a.log")
library(tm)
library(SnowballC)
err.corp<-Corpus(VectorSource(errdata))
pattern.instance<-'[0-9a-z]{8}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{4}-[0-9a-z]{12}'
err.remove.instance<-tm_map(err.corp,function(x){gsub(pattern.instance,'',x)})
err.remove.punc<-tm_map(err.remove.instance,removePunctuation)
err.remove.num<-tm_map(err.remove.punc,removeNumbers)
err.remove.white<-tm_map(err.remove.num,stripWhitespace)
err.summary<-summary(as.factor(unlist(err.remove.white)))[-c(1:2)]
err.summary<-err.summary[order(err.summary,decreasing = T)]
barplot(err.summary,horiz = T,col = 'cyan',xlab = 'err_number',ylab = 'err_type',names.arg = F)->q;text(0,q,names(err.summary),pos = 4,cex = .5)