Type of Analysis: Descriptive analysis - Bivariate description
Source: Data come from WordPress Report Trac System. URL Source Dataframe: GitHub Repository
Date collection: 04/07/2019.
1. EXPLORATORY ANALYSIS -BIVARIABLE ANALYSIS
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#READ DATA
TicketW <- read_csv('~/PhD Analysis/1. PhD escriptive exploratory analysis/TicketW.csv')
## Parsed with column specification:
## cols(
## id = col_double(),
## Summary = col_character(),
## Status = col_character(),
## Version = col_logical(),
## Owner = col_character(),
## Type = col_character(),
## Priority = col_character(),
## Milestone = col_character(),
## Component = col_character(),
## Severity = col_character(),
## Resolution = col_character(),
## Created = col_character(),
## Modified = col_character(),
## Focuses = col_character(),
## Reporter = col_character(),
## Keywords = col_character()
## )
View(TicketW) # Read
glimpse(TicketW)
## Observations: 2,333
## Variables: 16
## $ id <dbl> 24579, 30361, 32502, 36441, 40439, 41292, 41886, 41...
## $ Summary <chr> "Add Drag'n'Drop UI to plugin and theme manual uplo...
## $ Status <chr> "new", "assigned", "new", "new", "assigned", "reope...
## $ Version <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Owner <chr> NA, "pento", NA, NA, "mikeschroder", "jnylen0", "me...
## $ Type <chr> "enhancement", "task (blessed)", "defect (bug)", "d...
## $ Priority <chr> "high", "high", "high", "high", "high", "high", "hi...
## $ Milestone <chr> "Future Release", NA, NA, "Future Release", "5.3", ...
## $ Component <chr> "Upgrade/Install", "General", "Administration", "Cu...
## $ Severity <chr> "normal", "normal", "major", "normal", "normal", "n...
## $ Resolution <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ Created <chr> "06/14/2013 05:03:38 PM", "11/17/2014 12:10:55 PM",...
## $ Modified <chr> "04/12/2019 11:04:54 AM", "06/04/2019 07:42:28 PM",...
## $ Focuses <chr> NA, "ui, administration", NA, NA, "ui", NA, NA, NA,...
## $ Reporter <chr> "tw2113", "pento", "ryan", "azaozz", "mikeschroder"...
## $ Keywords <chr> "ui-feedback ux-feedback needs-patch shiny-updates"...
Functions
#Bivariable analysis
#Functions
#Function .List.f.filter() make variable set that will be filtered
.List.f.filter = function(x){
nr<-nrow(x)#Quantity of members
y<-x[1:nr,1:1] #Select array of members names to filter
z<-as.character(y) #Transform names into caracter array
return(z)
}
# Function .List.w.Group() Select two key valiables for each group and return a list of inter-ralation of it
# Where: x is dataframe with 2 colunms of variables
# y is a variable yo want to extract a list in order to filter a group
# f is a list of the group achieve from Function .List.f.filter()
.Top.15.Group = function(x,y,f){
z = subset(x, y %in% f)
z<-table(z$Keywords,z$Reporter)
d<-dim(z)
w<-rowSums(z[1:d[1],1:d[2]])
w<-as.data.frame(as.table(w)) # Transform to a data frame
w<- w[order(w$Freq, decreasing = TRUE),] #Ranking
w<-w[1:15,1:2] #Filter top 10
return(w)
}
1 REPORTERS AND KEYWORDS ANALYSIS: Reporters are WordPress Community members who find and report a problem from WP Platform, into a Ticket.
Goal: Identify the most frequent used to each report member groups (actives, medians, and aliens).
#1.Analyse Keywords used for Active reporters
Reporter_Keywords<- TicketW %>% select(Reporter,Keywords) #Table of Reporters and Keywords
summary(Reporter_Keywords)
## Reporter Keywords
## Length:2333 Length:2333
## Class :character Class :character
## Mode :character Mode :character
dim(Reporter_Keywords)
## [1] 2333 2
glimpse(Reporter_Keywords)
## Observations: 2,333
## Variables: 2
## $ Reporter <chr> "tw2113", "pento", "ryan", "azaozz", "mikeschroder", ...
## $ Keywords <chr> "ui-feedback ux-feedback needs-patch shiny-updates", ...
#1.1 Analyse Keywords used for Active reporters
AR<-.List.f.filter(ActiveReporters)
# 10 top Keywords used by Active Group
Most_Key_AR<-.Top.15.Group(Reporter_Keywords,Reporter_Keywords$Reporter,AR)
#1.2 Analyse Keywords used for Median reporters
MR<-.List.f.filter(MedianReporters)
# 10 top Keywords used by Median Group
Most_Key_MR<-.Top.15.Group(Reporter_Keywords,Reporter_Keywords$Reporter,MR)
#1.3 Analyse Keywords used for Alien reporters
LR<-.List.f.filter(LessReporters)
# 10 top Keywords used by Alien Group
Most_Key_LR<-.Top.15.Group(Reporter_Keywords,Reporter_Keywords$Reporter,LR)
library(SnowballC)
library(wordcloud)
## Loading required package: RColorBrewer
library(RColorBrewer)
#Graphics of most frequent group words
#Active Group
barplot(Most_Key_AR$Freq, las = 2, names.arg = Most_Key_AR$Var1,
col ="lightblue", main ="Freq. words of Active Reporters",
ylab = "Word frequencies")
set.seed(1)
wordcloud(words = Most_Key_AR$Var1, freq = Most_Key_AR$Freq, min.freq = 1,
max.words=15, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
#Median Group
barplot(Most_Key_MR$Freq, las = 2, names.arg = Most_Key_MR$Var1,
col ="lightgreen", main ="Freq. words of Median Reporters",
ylab = "Word frequencies")
set.seed(1)
wordcloud(words = Most_Key_MR$Var1, freq = Most_Key_MR$Freq, min.freq = 1,
max.words=15, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))
#Alien Group
barplot(Most_Key_LR$Freq, las = 2, names.arg = Most_Key_LR$Var1,
col ="deeppink1", main ="Freq. words of Median Reporters",
ylab = "Word frequencies")
set.seed(1)
wordcloud(words = Most_Key_LR$Var1, freq = Most_Key_LR$Freq, min.freq = 1,
max.words=14, random.order=FALSE, rot.per=0.35,
colors=brewer.pal(8, "Dark2"))