Type of Analysis: Descriptive analysis - Bivariate description
Source: Data come from WordPress Report Trac System. URL Source Dataframe: GitHub Repository
Date collection: 04/07/2019.
1. EXPLORATORY ANALYSIS -BIVARIABLE ANALYSIS
library(readr)
install.packages("dplyr")
## Installing package into 'C:/Users/Jacob/Documents/R/win-library/3.5'
## (as 'lib' is unspecified)
## package 'dplyr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Jacob\AppData\Local\Temp\Rtmp0OoOT2\downloaded_packages
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(markdown)
library(ggplot2)
library(lattice)
library(tidyr)
#READ DATA
TicketW <- read_csv('~/PhD Analysis/1. PhD escriptive exploratory analysis/TicketW.csv')
## Parsed with column specification:
## cols(
## id = col_double(),
## Summary = col_character(),
## Status = col_character(),
## Version = col_logical(),
## Owner = col_character(),
## Type = col_character(),
## Priority = col_character(),
## Milestone = col_character(),
## Component = col_character(),
## Severity = col_character(),
## Resolution = col_character(),
## Created = col_character(),
## Modified = col_character(),
## Focuses = col_character(),
## Reporter = col_character(),
## Keywords = col_character()
## )
View(TicketW) # Read
glimpse(TicketW)
## Observations: 2,333
## Variables: 16
## $ id <dbl> 24579, 30361, 32502, 36441, 40439, 41292, 41886, 41...
## $ Summary <chr> "Add Drag'n'Drop UI to plugin and theme manual uplo...
## $ Status <chr> "new", "assigned", "new", "new", "assigned", "reope...
## $ Version <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Owner <chr> NA, "pento", NA, NA, "mikeschroder", "jnylen0", "me...
## $ Type <chr> "enhancement", "task (blessed)", "defect (bug)", "d...
## $ Priority <chr> "high", "high", "high", "high", "high", "high", "hi...
## $ Milestone <chr> "Future Release", NA, NA, "Future Release", "5.3", ...
## $ Component <chr> "Upgrade/Install", "General", "Administration", "Cu...
## $ Severity <chr> "normal", "normal", "major", "normal", "normal", "n...
## $ Resolution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Created <chr> "06/14/2013 05:03:38 PM", "11/17/2014 12:10:55 PM",...
## $ Modified <chr> "04/12/2019 11:04:54 AM", "06/04/2019 07:42:28 PM",...
## $ Focuses <chr> NA, "ui, administration", NA, NA, "ui", NA, NA, NA,...
## $ Reporter <chr> "tw2113", "pento", "ryan", "azaozz", "mikeschroder"...
## $ Keywords <chr> "ui-feedback ux-feedback needs-patch shiny-updates"...
1 REPORTERS AND KEYWORDS ANALYSIS: Reporters are WordPress Community members who find and report a problem from WP Platform, into a Ticket.
Goal: Identify the most frequent used to each report member groups (actives, medians, and aliens).
#Bring the variables from the first univariable analysis
Reporter<-table(TicketW$Reporter)
Reporter<-as.data.frame(as.table(Reporter))
ActiveReporters = filter(Reporter,Freq>10)
MedianReporters = filter(Reporter,Freq<10 & Freq>4)
LessReporters = filter(Reporter,Freq<4)
#1.1 Analyse Keywords used for Active reporters
Reporter_Keywords<- TicketW %>% select(Reporter,Keywords) #Table of Reporters and Keywords
summary(Reporter_Keywords)
## Reporter Keywords
## Length:2333 Length:2333
## Class :character Class :character
## Mode :character Mode :character
dim(Reporter_Keywords)
## [1] 2333 2
glimpse(Reporter_Keywords)
## Observations: 2,333
## Variables: 2
## $ Reporter <chr> "tw2113", "pento", "ryan", "azaozz", "mikeschroder", ...
## $ Keywords <chr> "ui-feedback ux-feedback needs-patch shiny-updates", ...
nrow(ActiveReporters)#Quantity of Active Reporters
## [1] 34
AR<-ActiveReporters[1:34,1:1] #Select array of active reporters names to filter
AR<-as.character(AR) #Transform filter into caracter
ActiveR_Key = subset(Reporter_Keywords, Reporter_Keywords$Reporter %in% AR)
ActiveR_Key<-table(ActiveR_Key$Keywords,ActiveR_Key$Reporter)
dim(ActiveR_Key)# Find dimensions
## [1] 229 34
ActiveR_Sum_Keys<-rowSums(ActiveR_Key[1:229,1:34])# Sum of Keywords
ActiveR_Sum_Keys<-as.data.frame(as.table(ActiveR_Sum_Keys)) # Transform to a data frame
ActiveR_Sum_Keys<- ActiveR_Sum_Keys[order(ActiveR_Sum_Keys$Freq, decreasing = TRUE),] #Ranking Keywords
Most_Key_AR = filter(ActiveR_Sum_Keys,ActiveR_Sum_Keys$Freq>10)
Most_Key_AR# Filter the keywords with more than 10 frequency
## Var1 Freq
## 1 needs-patch 114
## 2 has-patch 76
## 3 2nd-opinion 30
## 4 has-patch needs-refresh 20
## 5 needs-patch needs-unit-tests 17
## 6 has-patch 2nd-opinion 14
## 7 has-screenshots 11
#1.2 Analyse Keywords used for Median reporters
nr<-nrow(MedianReporters)#Quantity of Median Reporters
MR<-MedianReporters[1:nr,1:1] #Select array of median reporters names to filter
MR<-as.character(MR) #Transform filter into caracter
MedianR_Key = subset(Reporter_Keywords, Reporter_Keywords$Reporter %in% MR)
MedianR_Key<-table(MedianR_Key$Keywords,MedianR_Key$Reporter)
dim(MedianR_Key)# Find dimensions
## [1] 76 36
MedianR_Sum_Keys<-rowSums(MedianR_Key[1:76,1:36])# Sum of Keywords
MedianR_Sum_Keys<-as.data.frame(as.table(MedianR_Sum_Keys)) # Transform to a data frame
MedianR_Sum_Keys<- MedianR_Sum_Keys[order(MedianR_Sum_Keys$Freq, decreasing = TRUE),] #Ranking Keywords
Most_Key_MR = filter(MedianR_Sum_Keys,MedianR_Sum_Keys$Freq>2)
Most_Key_MR# Filter the keywords with more than 10 frequency
## Var1 Freq
## 1 has-patch 30
## 2 needs-patch 23
## 3 has-patch dev-feedback 9
## 4 has-patch needs-testing 8
## 5 has-patch needs-refresh 7
## 6 has-patch has-unit-tests 6
## 7 2nd-opinion 5
## 8 needs-patch needs-unit-tests 5
## 9 reporter-feedback 4
## 10 good-first-bug has-patch 3
#1.3 Analyse Keywords used for Alien reporters
nr<-nrow(LessReporters)#Quantity of Alien Reporters
LR<-LessReporters[1:nr,1:1] #Select array of median reporters names to filter
LR<-as.character(LR) #Transform filter into caracter
LessR_Key = subset(Reporter_Keywords, Reporter_Keywords$Reporter %in% LR)
LessR_Key<-table(LessR_Key$Keywords,LessR_Key$Reporter)
dim(MedianR_Key)# Find dimensions
## [1] 76 36
LessR_Sum_Keys<-rowSums(LessR_Key[1:76,1:36])# Sum of Keywords
LessR_Sum_Keys<-as.data.frame(as.table(LessR_Sum_Keys)) # Transform to a data frame
LessR_Sum_Keys<- LessR_Sum_Keys[order(LessR_Sum_Keys$Freq, decreasing = TRUE),] #Ranking Keywords
Most_Key_LR = filter(LessR_Sum_Keys,LessR_Sum_Keys$Freq>=1)
Most_Key_LR# Filter the keywords with more than 10 frequency
## Var1 Freq
## 1 has-patch 8
## 2 2nd-opinion early 1
## 3 close 1
## 4 close 2nd-opinion 1
## 5 close 2nd-opinion reporter-feedback 1
## 6 dev-feedback 1
## 7 has-patch 2nd-opinion needs-testing 1
## 8 has-patch 4.8-early has-unit-tests 1
## 9 has-patch dev-feedback 1
par(mfrow=c(1,3))
barplot(Most_Key_AR$Freq, las = 2, names.arg = Most_Key_AR$Var1,
col ="lightblue", main ="Freq. words of Active Reporters",
ylab = "Word frequencies")
barplot(Most_Key_MR$Freq, las = 2, names.arg = Most_Key_MR$Var1,
col ="lightgreen", main ="Freq. words of Median Reporters",
ylab = "Word frequencies")
barplot(Most_Key_LR$Freq, las = 2, names.arg = Most_Key_LR$Var1,
col ="lightgreen", main ="Freq. words of Median Reporters",
ylab = "Word frequencies")