Type of Analysis: Descriptive analysis - Bivariate description

Source: Data come from WordPress Report Trac System. URL Source Dataframe: GitHub Repository

Date collection: 04/07/2019.

1. EXPLORATORY ANALYSIS -BIVARIABLE ANALYSIS

library(readr)
install.packages("dplyr")
## Installing package into 'C:/Users/Jacob/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Jacob\AppData\Local\Temp\Rtmp0OoOT2\downloaded_packages
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(markdown)
library(ggplot2)
library(lattice)
library(tidyr)

#READ DATA
TicketW <- read_csv('~/PhD Analysis/1. PhD escriptive exploratory analysis/TicketW.csv')
## Parsed with column specification:
## cols(
##   id = col_double(),
##   Summary = col_character(),
##   Status = col_character(),
##   Version = col_logical(),
##   Owner = col_character(),
##   Type = col_character(),
##   Priority = col_character(),
##   Milestone = col_character(),
##   Component = col_character(),
##   Severity = col_character(),
##   Resolution = col_character(),
##   Created = col_character(),
##   Modified = col_character(),
##   Focuses = col_character(),
##   Reporter = col_character(),
##   Keywords = col_character()
## )
View(TicketW) # Read 
glimpse(TicketW)
## Observations: 2,333
## Variables: 16
## $ id         <dbl> 24579, 30361, 32502, 36441, 40439, 41292, 41886, 41...
## $ Summary    <chr> "Add Drag'n'Drop UI to plugin and theme manual uplo...
## $ Status     <chr> "new", "assigned", "new", "new", "assigned", "reope...
## $ Version    <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Owner      <chr> NA, "pento", NA, NA, "mikeschroder", "jnylen0", "me...
## $ Type       <chr> "enhancement", "task (blessed)", "defect (bug)", "d...
## $ Priority   <chr> "high", "high", "high", "high", "high", "high", "hi...
## $ Milestone  <chr> "Future Release", NA, NA, "Future Release", "5.3", ...
## $ Component  <chr> "Upgrade/Install", "General", "Administration", "Cu...
## $ Severity   <chr> "normal", "normal", "major", "normal", "normal", "n...
## $ Resolution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Created    <chr> "06/14/2013 05:03:38 PM", "11/17/2014 12:10:55 PM",...
## $ Modified   <chr> "04/12/2019 11:04:54 AM", "06/04/2019 07:42:28 PM",...
## $ Focuses    <chr> NA, "ui, administration", NA, NA, "ui", NA, NA, NA,...
## $ Reporter   <chr> "tw2113", "pento", "ryan", "azaozz", "mikeschroder"...
## $ Keywords   <chr> "ui-feedback ux-feedback needs-patch shiny-updates"...

1 REPORTERS AND KEYWORDS ANALYSIS: Reporters are WordPress Community members who find and report a problem from WP Platform, into a Ticket.

Goal: Identify the most frequent used to each report member groups (actives, medians, and aliens).

#Bring the variables from the first univariable analysis
Reporter<-table(TicketW$Reporter) 
Reporter<-as.data.frame(as.table(Reporter))
ActiveReporters = filter(Reporter,Freq>10)
MedianReporters = filter(Reporter,Freq<10 & Freq>4)
LessReporters = filter(Reporter,Freq<4)

#1.1 Analyse Keywords used for Active reporters
Reporter_Keywords<- TicketW %>% select(Reporter,Keywords) #Table of Reporters and Keywords
summary(Reporter_Keywords)
##    Reporter           Keywords        
##  Length:2333        Length:2333       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character
dim(Reporter_Keywords)
## [1] 2333    2
glimpse(Reporter_Keywords)
## Observations: 2,333
## Variables: 2
## $ Reporter <chr> "tw2113", "pento", "ryan", "azaozz", "mikeschroder", ...
## $ Keywords <chr> "ui-feedback ux-feedback needs-patch shiny-updates", ...
nrow(ActiveReporters)#Quantity of Active Reporters
## [1] 34
AR<-ActiveReporters[1:34,1:1] #Select array of active reporters names to filter 
AR<-as.character(AR) #Transform filter into caracter

ActiveR_Key = subset(Reporter_Keywords, Reporter_Keywords$Reporter %in% AR) 
ActiveR_Key<-table(ActiveR_Key$Keywords,ActiveR_Key$Reporter)
dim(ActiveR_Key)# Find dimensions
## [1] 229  34
ActiveR_Sum_Keys<-rowSums(ActiveR_Key[1:229,1:34])# Sum of Keywords
ActiveR_Sum_Keys<-as.data.frame(as.table(ActiveR_Sum_Keys)) # Transform to a data frame
ActiveR_Sum_Keys<- ActiveR_Sum_Keys[order(ActiveR_Sum_Keys$Freq, decreasing = TRUE),] #Ranking Keywords

Most_Key_AR = filter(ActiveR_Sum_Keys,ActiveR_Sum_Keys$Freq>10)
Most_Key_AR# Filter the keywords with more than 10 frequency
##                           Var1 Freq
## 1                  needs-patch  114
## 2                    has-patch   76
## 3                  2nd-opinion   30
## 4      has-patch needs-refresh   20
## 5 needs-patch needs-unit-tests   17
## 6        has-patch 2nd-opinion   14
## 7              has-screenshots   11
#1.2 Analyse Keywords used for Median reporters
nr<-nrow(MedianReporters)#Quantity of Median Reporters
MR<-MedianReporters[1:nr,1:1] #Select array of median reporters names to filter 
MR<-as.character(MR) #Transform filter into caracter

MedianR_Key = subset(Reporter_Keywords, Reporter_Keywords$Reporter %in% MR) 
MedianR_Key<-table(MedianR_Key$Keywords,MedianR_Key$Reporter)
dim(MedianR_Key)# Find dimensions
## [1] 76 36
MedianR_Sum_Keys<-rowSums(MedianR_Key[1:76,1:36])# Sum of Keywords
MedianR_Sum_Keys<-as.data.frame(as.table(MedianR_Sum_Keys)) # Transform to a data frame
MedianR_Sum_Keys<- MedianR_Sum_Keys[order(MedianR_Sum_Keys$Freq, decreasing = TRUE),] #Ranking Keywords

Most_Key_MR = filter(MedianR_Sum_Keys,MedianR_Sum_Keys$Freq>2)
Most_Key_MR# Filter the keywords with more than 10 frequency
##                            Var1 Freq
## 1                     has-patch   30
## 2                   needs-patch   23
## 3        has-patch dev-feedback    9
## 4       has-patch needs-testing    8
## 5       has-patch needs-refresh    7
## 6      has-patch has-unit-tests    6
## 7                   2nd-opinion    5
## 8  needs-patch needs-unit-tests    5
## 9             reporter-feedback    4
## 10     good-first-bug has-patch    3
#1.3 Analyse Keywords used for Alien reporters
nr<-nrow(LessReporters)#Quantity of Alien Reporters
LR<-LessReporters[1:nr,1:1] #Select array of median reporters names to filter 
LR<-as.character(LR) #Transform filter into caracter

LessR_Key = subset(Reporter_Keywords, Reporter_Keywords$Reporter %in% LR) 
LessR_Key<-table(LessR_Key$Keywords,LessR_Key$Reporter)
dim(MedianR_Key)# Find dimensions
## [1] 76 36
LessR_Sum_Keys<-rowSums(LessR_Key[1:76,1:36])# Sum of Keywords
LessR_Sum_Keys<-as.data.frame(as.table(LessR_Sum_Keys)) # Transform to a data frame
LessR_Sum_Keys<- LessR_Sum_Keys[order(LessR_Sum_Keys$Freq, decreasing = TRUE),] #Ranking Keywords

Most_Key_LR = filter(LessR_Sum_Keys,LessR_Sum_Keys$Freq>=1)
Most_Key_LR# Filter the keywords with more than 10 frequency
##                                  Var1 Freq
## 1                           has-patch    8
## 2                   2nd-opinion early    1
## 3                               close    1
## 4                   close 2nd-opinion    1
## 5 close 2nd-opinion reporter-feedback    1
## 6                        dev-feedback    1
## 7 has-patch 2nd-opinion needs-testing    1
## 8  has-patch 4.8-early has-unit-tests    1
## 9              has-patch dev-feedback    1
par(mfrow=c(1,3))
barplot(Most_Key_AR$Freq, las = 2, names.arg = Most_Key_AR$Var1,
        col ="lightblue", main ="Freq. words of Active Reporters",
        ylab = "Word frequencies")
barplot(Most_Key_MR$Freq, las = 2, names.arg = Most_Key_MR$Var1,
        col ="lightgreen", main ="Freq. words of Median Reporters",
        ylab = "Word frequencies")
barplot(Most_Key_LR$Freq, las = 2, names.arg = Most_Key_LR$Var1,
        col ="lightgreen", main ="Freq. words of Median Reporters",
        ylab = "Word frequencies")