Type of Analysis: Descriptive analysis - Univariate description

Intention of Analysis: 1- Understand how Core WordPress Community coproduce code. 2- Identify potential indicators for coherence analysis.

General Question: Wich are the atributes of coproduction (colunms of dataframe or variables)? Specific Questions: Which type of developers groups exist in WC? It is possible to make some indicator from this data?

Source: Data come from WordPress Report Trac System. URL Source Dataframe: GitHub Repository

Date collection: 04/07/2019.

1.GENERAL ANALYSIS

#READ DATA
library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(tidyr)

TicketW <- read_csv('~/PhD Analysis/1. PhD escriptive exploratory analysis/TicketW.csv')
## Parsed with column specification:
## cols(
##   id = col_double(),
##   Summary = col_character(),
##   Status = col_character(),
##   Version = col_logical(),
##   Owner = col_character(),
##   Type = col_character(),
##   Priority = col_character(),
##   Milestone = col_character(),
##   Component = col_character(),
##   Severity = col_character(),
##   Resolution = col_character(),
##   Created = col_character(),
##   Modified = col_character(),
##   Focuses = col_character(),
##   Reporter = col_character(),
##   Keywords = col_character()
## )
View(TicketW)
dim(TicketW) #dimension
## [1] 2333   16
TicketW[1:5,]  #5 fist lines
## # A tibble: 5 x 16
##      id Summary Status Version Owner Type  Priority Milestone Component
##   <dbl> <chr>   <chr>  <lgl>   <chr> <chr> <chr>    <chr>     <chr>    
## 1 24579 Add Dr~ new    NA      <NA>  enha~ high     Future R~ Upgrade/~
## 2 30361 Correc~ assig~ NA      pento task~ high     <NA>      General  
## 3 32502 Cannot~ new    NA      <NA>  defe~ high     <NA>      Administ~
## 4 36441 Custom~ new    NA      <NA>  defe~ high     Future R~ Customize
## 5 40439 Save p~ assig~ NA      mike~ enha~ high     5.3       Media    
## # ... with 7 more variables: Severity <chr>, Resolution <chr>,
## #   Created <chr>, Modified <chr>, Focuses <chr>, Reporter <chr>,
## #   Keywords <chr>
summary(TicketW)
##        id          Summary             Status          Version       
##  Min.   : 5235   Length:2333        Length:2333        Mode:logical  
##  1st Qu.:34555   Class :character   Class :character   NA's:2333     
##  Median :40511   Mode  :character   Mode  :character                 
##  Mean   :38029                                                       
##  3rd Qu.:44485                                                       
##  Max.   :47640                                                       
##     Owner               Type             Priority        
##  Length:2333        Length:2333        Length:2333       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##   Milestone          Component           Severity        
##  Length:2333        Length:2333        Length:2333       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##   Resolution          Created            Modified        
##  Length:2333        Length:2333        Length:2333       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
##                                                          
##    Focuses            Reporter           Keywords        
##  Length:2333        Length:2333        Length:2333       
##  Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character  
##                                                          
##                                                          
## 
glimpse(TicketW)
## Observations: 2,333
## Variables: 16
## $ id         <dbl> 24579, 30361, 32502, 36441, 40439, 41292, 41886, 41...
## $ Summary    <chr> "Add Drag'n'Drop UI to plugin and theme manual uplo...
## $ Status     <chr> "new", "assigned", "new", "new", "assigned", "reope...
## $ Version    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Owner      <chr> NA, "pento", NA, NA, "mikeschroder", "jnylen0", "me...
## $ Type       <chr> "enhancement", "task (blessed)", "defect (bug)", "d...
## $ Priority   <chr> "high", "high", "high", "high", "high", "high", "hi...
## $ Milestone  <chr> "Future Release", NA, NA, "Future Release", "5.3", ...
## $ Component  <chr> "Upgrade/Install", "General", "Administration", "Cu...
## $ Severity   <chr> "normal", "normal", "major", "normal", "normal", "n...
## $ Resolution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Created    <chr> "06/14/2013 05:03:38 PM", "11/17/2014 12:10:55 PM",...
## $ Modified   <chr> "04/12/2019 11:04:54 AM", "06/04/2019 07:42:28 PM",...
## $ Focuses    <chr> NA, "ui, administration", NA, NA, "ui", NA, NA, NA,...
## $ Reporter   <chr> "tw2113", "pento", "ryan", "azaozz", "mikeschroder"...
## $ Keywords   <chr> "ui-feedback ux-feedback needs-patch shiny-updates"...

Function unianalysis() - Transform vector into a data frame with frequency of levels and proportion

# Transform vector into a data frame with frequency of levels and proportion
.Unianalysis = function (x) {
    y <- as.data.frame(as.table(table(x)))
    y <- mutate(y, proportion = prop.table(y$Freq) *100)#Proportion
    y <- arrange(y, desc(y$Freq))
return(y)
}

1.1 Variables related with members Analysis Goal: Find which variables have hight variability, and find a line of cut, in order to use into Bivariate Analysis.

# Var1
Status<-.Unianalysis(TicketW$Status)

#Var2
TType<-.Unianalysis(TicketW$Type)

#Var3
Priority<-.Unianalysis(TicketW$Priority)

#Var4
Milestone<-.Unianalysis(TicketW$Milestone)

#Var5
Component<-.Unianalysis(TicketW$Component) 
Component = filter(Component, Freq>90) # Filter components with more than 100 tickets
sum(Component$Freq) #Total Tickets into most frequent components (more than 100 tickets)
## [1] 738
#Var6
Severity<-.Unianalysis(TicketW$Severity)

#Var7
Focuses<-.Unianalysis(TicketW$Focuses)
Focuses = filter(Focuses, Freq>10) # Filter Focuses with more than 10 tickets
Focuses<-Focuses[order(Focuses$Freq, decreasing = TRUE),]
sum(Focuses$Freq) #Total Tickets into most frequent components (more than 100 tickets)
## [1] 832
#Var8
Keywords<-.Unianalysis(TicketW$Keywords)
Keywords = filter(Keywords, Freq>8)
Keywords<-Keywords[order(Keywords$Freq, decreasing = TRUE),]
sum(Keywords$Freq)
## [1] 1082

Functions for establish groups of active (GroupActive()), median (GroupMedian()), or less active (GroupAlien()) members:

#1. Filter Group of agents 
.Grouping = function(x, less, more) {
        y = filter(x, Freq < less & Freq > more)
        x <- y[order(y$Freq, decreasing = TRUE),]
        return(x)
}

1.2 Variable Members Analysis - Reporters: Reporters are WordPress Community members who find and report a problem from WP Platform, into a Ticket.

Goal: Find type of report member groups (actives, medians, and aliens), and find a line of cut, in order to use into Bivariate Analysis.

#Find Groups and quantity of people for each group
             
Reporter<-.Unianalysis(TicketW$Reporter) 
totalReporter<-nrow(Reporter)

ActiveReporters<-.Grouping(Reporter, 1000, 10)
totalAR<-nrow(ActiveReporters)

MedianReporters<-.Grouping(Reporter, 10, 4)
totalMR<-nrow(MedianReporters)

LessReporters<-.Grouping(Reporter, 4, 0)
totalLR<-nrow(LessReporters)

1.3 Variable Member Analysis - Owners: Owners are WordPress Community members who pick up a ticket from WP Platform (sended by a reporter) in order to solve it.

Goal: Find type of owner member groups (actives, medians, and aliens), and find a line of cut, in order to use into Bivariate Analysis.

Owner <-.Unianalysis(TicketW$Owner) 
totalOwner <-nrow(Owner)

#Find Groups and quantity of people for each group
             
ActiveOwner <-.Grouping(Owner, 1000, 10)
totalAO <-nrow(ActiveOwner)

MedianOwner <-.Grouping(Owner, 10, 4)
totalMO <-nrow(MedianOwner)

LessOwner <-.Grouping(Owner, 4, 0)
totalLO <-nrow(LessOwner)

1.4 Sum of groups by Reporters and Owners:

#Total members per Reporter
MembersTotal<-c(totalAR,totalMR,totalLR)
CoreGroup<-rbind("Active Reporters","Median Reporters","Alien Reporters")
WPCGroupR<-data.frame(CoreGroup,MembersTotal)

#Total members per Owner
MembersTotal<-c(totalAO,totalMO,totalLO)
CoreGroup<-rbind("Active Owners","Median Owners","Alien Owners")
WPCGroupO<-data.frame(CoreGroup,MembersTotal)

#Total members per every groups
MembersTotal<- c(totalOwner,totalReporter)
CoreGroup <- rbind("Owners","Reporters")
WPCGroups <- data.frame(CoreGroup,MembersTotal)

2. DESCRIPTION ANALYSIS REPORT :

2.1 Variables related with members Report: Variables selected to bivariate analysis are Component(+100 tickets per level), Focuses (+1 Ticket per level), Keywords(+9 tickets per level), Type, Status.

library(plotrix)
library(plotly)
library(ggplot2)
library(wordcloud)
## Loading required package: RColorBrewer
#Graphic Function
#Fun Plot
.Plot_FunPlot = function(x,y) {
  fan.plot(x$Freq,
           max.span=pi,
           labels=paste(x$x, x$Freq, sep=": "),
            main=y,ticks=360)
}
#Word Plot
.Plot_word = function(x, num1, num2) {
     wordcloud(words = x$x, freq = x$Freq, min.freq = num1,
              max.words=num2, random.order=FALSE, rot.per=0.35, 
              colors=brewer.pal(8, "Dark2")) 
}

Status#Var1
##           x Freq proportion
## 1       new 1844  79.039863
## 2  assigned  215   9.215602
## 3  reopened  116   4.972139
## 4 reviewing  105   4.500643
## 5  accepted   53   2.271753
#Graphics
.Plot_FunPlot(Status, "Tickets per Status")

TType# Var2
##                 x Freq proportion
## 1     enhancement 1109  47.535362
## 2    defect (bug) 1012  43.377625
## 3 feature request  168   7.201029
## 4  task (blessed)   44   1.885984
#Graphics
.Plot_FunPlot(TType, "Tickets per Type")

Priority# Var3
##        x Freq proportion
## 1 normal 2265 97.0852979
## 2    low   49  2.1003000
## 3   high   12  0.5143592
## 4 lowest    7  0.3000429
#Graphics
.Plot_FunPlot(Priority, "Tickets per Priority")

Milestone#Var4
##                 x Freq  proportion
## 1 Awaiting Review 1104 60.19629226
## 2  Future Release  544 29.66194111
## 3             5.3  172  9.37840785
## 4   WordPress.org   13  0.70883315
## 5           5.2.3    1  0.05452563
#Graphics
.Plot_FunPlot(Milestone, "Tickets per Milestone")

Component#Var5
##                   x Freq proportion
## 1           General  205   8.786970
## 2             Media  166   7.115302
## 3    Administration  162   6.943849
## 4 Posts, Post Types  114   4.886412
## 5             Users   91   3.900557
#Graphic
.Plot_FunPlot(Component, "Tickets per Component (more than 90 tickets)")

Severity #Var6
##          x Freq  proportion
## 1   normal 2186 93.69909987
## 2    minor   91  3.90055722
## 3    major   27  1.15730819
## 4  trivial   19  0.81440206
## 5 critical    9  0.38576940
## 6  blocker    1  0.04286327
#Graphics
.Plot_FunPlot(Severity, "Tickets per Severity")

Focuses#Var7
##                     x Freq proportion
## 1      administration  192  19.512195
## 2                  ui  103  10.467480
## 3  ui, administration  101  10.264228
## 4           multisite   95   9.654472
## 5                docs   58   5.894309
## 6         performance   55   5.589431
## 7   ui, accessibility   51   5.182927
## 8          javascript   49   4.979675
## 9            template   48   4.878049
## 10           rest-api   30   3.048780
## 11   coding-standards   27   2.743902
## 12      accessibility   23   2.337398
#Graphics
pie(Focuses$Freq, main="Focuses", label = paste(Focuses$x, sep=": ", Focuses$Freq), col = rainbow(7))

.Plot_word(Focuses, 20, 200)

Keywords#Var8
##                               x Freq proportion
## 1                   needs-patch  266 15.2873563
## 2                     has-patch  246 14.1379310
## 3             reporter-feedback   57  3.2758621
## 4                   2nd-opinion   55  3.1609195
## 5       has-patch needs-refresh   47  2.7011494
## 6  needs-patch needs-unit-tests   46  2.6436782
## 7       has-patch needs-testing   44  2.5287356
## 8        has-patch dev-feedback   40  2.2988506
## 9      has-patch has-unit-tests   31  1.7816092
## 10        has-patch 2nd-opinion   29  1.6666667
## 11                 dev-feedback   28  1.6091954
## 12                        close   27  1.5517241
## 13   has-patch needs-unit-tests   24  1.3793103
## 14      needs-patch 2nd-opinion   19  1.0919540
## 15     good-first-bug has-patch   18  1.0344828
## 16  has-patch reporter-feedback   17  0.9770115
## 17              has-screenshots   15  0.8620690
## 18    has-screenshots has-patch   15  0.8620690
## 19    has-patch has-screenshots   14  0.8045977
## 20     dev-feedback needs-patch   13  0.7471264
## 21     needs-patch dev-feedback   13  0.7471264
## 22      2nd-opinion needs-patch    9  0.5172414
## 23       dev-feedback has-patch    9  0.5172414
#Graphics
pie(Keywords$Freq, main="Focuses", label = paste(Keywords$x, sep=": ", Keywords$Freq), col = rainbow(7))

.Plot_word(Keywords, 4, 300)
## Warning in wordcloud(words = x$x, freq = x$Freq, min.freq = num1, max.words
## = num2, : has-patch could not be fit on page. It will not be plotted.

2.2 Reporter Members Analysis Report The groups of Reporters:

ActiveReporters #Ranking the most active reporters
##                   x Freq proportion
## 1       johnbillion   72  3.0861552
## 2           afercia   53  2.2717531
## 3   johnjamesjacoby   47  2.0145735
## 4  sebastian.pisula   43  1.8431204
## 5          flixos90   42  1.8002572
## 6   danielbachhuber   39  1.6716674
## 7        karmatosed   33  1.4144878
## 8            rmccue   27  1.1573082
## 9  subrataemfluence   25  1.0715817
## 10      westonruter   24  1.0287184
## 11            nacin   23  0.9858551
## 12          desrosj   22  0.9429919
## 13  garrett-eclipse   22  0.9429919
## 14           pbiron   22  0.9429919
## 15           azaozz   21  0.9001286
## 16             dd32   21  0.9001286
## 17     boonebgorges   19  0.8144021
## 18            pento   17  0.7286755
## 19            ramiy   17  0.7286755
## 20           scribu   17  0.7286755
## 21          anevins   16  0.6858123
## 22         dshanske   15  0.6429490
## 23            helen   15  0.6429490
## 24          iseulde   15  0.6429490
## 25      markjaquith   15  0.6429490
## 26        melchoyce   15  0.6429490
## 27   SergeyBiryukov   15  0.6429490
## 28       swissspidy   15  0.6429490
## 29        tazotodua   15  0.6429490
## 30        Presskopp   14  0.6000857
## 31     henry.wright   13  0.5572225
## 32           mark-k   12  0.5143592
## 33         mukesh27   12  0.5143592
## 34    peterwilsoncc   11  0.4714959
MedianReporters #Ranking the Median active reporters
##                 x Freq proportion
## 1       ericlewis    8  0.3429061
## 2         iandunn    8  0.3429061
## 3      jeremyfelt    8  0.3429061
## 4     joostdevalk    8  0.3429061
## 5          kjellr    8  0.3429061
## 6          netweb    8  0.3429061
## 7       pavelevap    8  0.3429061
## 8      programmin    8  0.3429061
## 9       alexvorn2    7  0.3000429
## 10           andy    7  0.3000429
## 11         eclare    7  0.3000429
## 12    Frank Klein    7  0.3000429
## 13       jdgrimes    7  0.3000429
## 14        pbearne    7  0.3000429
## 15     soulseekah    7  0.3000429
## 16 wonderboymusic    7  0.3000429
## 17           xkon    7  0.3000429
## 18            dlh    6  0.2571796
## 19        Ipstenu    6  0.2571796
## 20 jonoaldersonwp    6  0.2571796
## 21         jorbin    6  0.2571796
## 22        kraftbj    6  0.2571796
## 23          mor10    6  0.2571796
## 24      smerriman    6  0.2571796
## 25    WraithKenny    6  0.2571796
## 26          yoavf    6  0.2571796
## 27      allancole    5  0.2143163
## 28         BjornW    5  0.2143163
## 29   ComputerGuru    5  0.2143163
## 30          imath    5  0.2143163
## 31       jipmoors    5  0.2143163
## 32      joemcgill    5  0.2143163
## 33     mikejolley    5  0.2143163
## 34     milana_cap    5  0.2143163
## 35          Rarst    5  0.2143163
## 36          rnaby    5  0.2143163
LessReporters[c(1:30),c(1:2)] #Ranking the 30 less active Reporters
##                    x Freq
## 1           allendav    3
## 2              arena    3
## 3       chetan200891    3
## 4  chinteshprajapati    3
## 5           danieltj    3
## 6  Denis-de-Bernardy    3
## 7          diddledan    3
## 8            dimadin    3
## 9         dotancohen    3
## 10             duck_    3
## 11           fliespl    3
## 12             GaryJ    3
## 13       hlashbrooke    3
## 14          ishitaka    3
## 15   jason_the_adams    3
## 16         javorszky    3
## 17          joehoyle    3
## 18          joyously    3
## 19       jtsternberg    3
## 20           keraweb    3
## 21  ketanumretiya030    3
## 22            knutsp    3
## 23         krogsgard    3
## 24         mehulwpos    3
## 25      MikeHansenMe    3
## 26         Mista-Flo    3
## 27          mnelson4    3
## 28         monikarao    3
## 29            mrwweb    3
## 30           mt8.biz    3
#Grafic of Top 5 active Reporters
.Plot_FunPlot(ActiveReporters[1:5, 1:2], "5 Top Active Reporters")

.Plot_FunPlot(MedianReporters[1:5, 1:2], "5 Top Median Reporters")

.Plot_FunPlot(LessReporters[1:5, 1:2], "5 Top Less Active Reporters")