## Load the R packages
# For text mining
library(tm)
## Loading required package: NLP
# For use the pipe sign
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# For plot
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:NLP':
##
## annotate
# For wordcoloud
library(wordcloud)
## Loading required package: RColorBrewer
## Warning: package 'RColorBrewer' was built under R version 4.1.2
# For pyramid plot
library(plotrix)
# For word network
library(qdap)
## Loading required package: qdapDictionaries
## Loading required package: qdapRegex
## Warning: package 'qdapRegex' was built under R version 4.1.2
##
## Attaching package: 'qdapRegex'
## The following object is masked from 'package:ggplot2':
##
## %+%
## The following object is masked from 'package:dplyr':
##
## explain
## Loading required package: qdapTools
##
## Attaching package: 'qdapTools'
## The following object is masked from 'package:dplyr':
##
## id
##
## Attaching package: 'qdap'
## The following objects are masked from 'package:tm':
##
## as.DocumentTermMatrix, as.TermDocumentMatrix
## The following object is masked from 'package:NLP':
##
## ngrams
## The following objects are masked from 'package:base':
##
## Filter, proportions
# For primaid plot
library(plotrix)
## Load the data
library(readxl)
## Warning: package 'readxl' was built under R version 4.1.2
MM_Data <- read_excel("MM Dissertation Coding Table(AutoRecovered).xlsx",
sheet = "Data Coding for EQTA")
MM_article_by_year <- MM_Data %>%
group_by(Year) %>%
count()
p <- ggplot(MM_article_by_year,aes(x=Year,y=n))+geom_line()+ggtitle("Article Frequency by Year")+ylab('Frequency')
p + theme(
panel.background = element_rect(fill = "White",
colour = "White",
size = 0.5, linetype = "solid"),
panel.grid.major = element_line(size = 0.25, linetype = 'solid',
colour = "Grey"),
panel.grid.minor = element_line(size = 0.25, linetype = 'solid',
colour = "Grey")
)
sum(MM_Data$`Author N`)
## [1] 294
mean(MM_Data$`Author N`)
## [1] 2.60177
sd(MM_Data$`Author N`)
## [1] 1.567185
Takeouts:
-1, It looks like every a few years, there is a heated trend for Mind Map research.
# How many counties?
unique(MM_Data$`Country of Authorship`) # 113 Research from 31 Country
## [1] "United States" "United Kingdom" "Finland"
## [4] "Sweden" "Turkey" "Switzerland"
## [7] "French" "Canada" "China"
## [10] "Romania" "Austria" "Taiwan"
## [13] "Malaysia" "Sri Lanka" "Iran"
## [16] "Australia" "Bangladesh" "Thailand"
## [19] "Indonesia" "Belgium" "Czech Republic"
## [22] "Russia" "India" "Egypt"
## [25] "United Arab Emirates" "Pakistan" "Netherlands"
## [28] "Perú" "Japan" "Uzbekistan"
## [31] "Brazil"
head(sort(table(MM_Data$`Country of Authorship`),decreasing=TRUE),5)
##
## United States China Turkey Czech Republic Indonesia
## 28 14 8 6 6
Countries with the most Mind Map studies.
United States: 28 Studies
China: 14 Studies
Turkey: 8 Studies
Czech Republic: 6 Studies
Indonesia: 6 Studies
clean_corpus <- function(corpus) {
# Remove punctuation
corpus <- tm_map(corpus, removePunctuation)
# Transform to lower case
corpus <- tm_map(corpus, content_transformer(tolower))
# Add more stopwords
corpus <- tm_map(corpus, removeWords, words = c(stopwords("en"),"using","used","two","can","will","article","paper","study","provide","provides","provided","across","based","within","found","data","three","set","use","results","variables","single","first","one","second","third","number","students","however","also","well","may","purpose","showed"))
# Strip white space
corpus <- tm_map(corpus,stripWhitespace)
return(corpus)
}
# Replace the duplicate words
MM_Data$`Article Topic` <- gsub("Maps","map",MM_Data$`Article Topic`)
# Make a vector source from titles
Title_source <- VectorSource(MM_Data$`Article Topic`)
# Make a volatile corpus from Title_source
Title_corpus <- VCorpus(Title_source)
# Clean the corpus
clean_Title <- clean_corpus(Title_corpus)
# Create a term-document matrix from the corpus
Title_tdm <- TermDocumentMatrix(clean_Title)
# Convert coffee_tdm to a matrix
Title_m <- as.matrix(Title_tdm)
# Calculate the row sums of Title_m
term_frequency <- rowSums(Title_m)
# Sort term_frequency in decreasing order
term_frequency <- sort(term_frequency,decreasing=TRUE)
# View the top 10 most common words
term_frequency[1:15]
## mind map learning mapping education research
## 96 59 29 29 12 11
## teaching application knowledge thinking tool analysis
## 11 8 8 8 8 7
## concept maps strategy
## 7 7 7
# Plot a barchart of the 10 most common words
barplot(term_frequency[1:15], col = "tan", las = 2)
# Vector of terms
terms_vec <- names(term_frequency)
# Create a word cloud for the values in word_freqs
wordcloud(terms_vec,term_frequency,max.words=80,rot.per = 0.35,random.order = FALSE, colors=brewer.pal(8,"Dark2"))
# Make a vector source from Abstract
Abstract_source <- VectorSource(MM_Data$Abstract)
# Make a volatile corpus from Abstract_source
Abstract_corpus <- VCorpus(Abstract_source)
# Clean the corpus
clean_Abstract <- clean_corpus(Abstract_corpus)
# Create a term-document matrix from the corpus
Abstract_tdm <- TermDocumentMatrix(clean_Abstract)
# Convert Abstract_tdm to a matrix
Abstract_m <- as.matrix(Abstract_tdm)
# Calculate the row sums of Abstract_m
term_frequency <- rowSums(Abstract_m)
# Sort term_frequency in decreasing order
term_frequency <- sort(term_frequency,decreasing=TRUE)
# View the top 15 most common words
term_frequency[1:15]
## mind learning mapping maps map knowledge
## 403 198 147 139 135 91
## research information group thinking teaching technique
## 85 76 72 71 63 61
## analysis skills groups
## 50 50 44
# Plot a barchart of the 10 most common words
x1 <- barplot(term_frequency[1:15], col = "tan", las = 2,main="Top 15 Most Frequent Terms")
y1 <- as.matrix(term_frequency[1:15])
gg <- text(x1,y1+2,labes=as.character(y1))
## Warning in text.default(x1, y1 + 2, labes = as.character(y1)): "labes" is not a
## graphical parameter
# Vector of terms
terms_vec <- names(term_frequency)
# Create a word cloud for the values in word_freqs
wordcloud(terms_vec,term_frequency,max.words=80,rot.per = 0.35,random.order = FALSE, colors=brewer.pal(8,"Dark2"))
wordcloud(terms_vec,term_frequency,max.words=80,rot.per = 0.35,random.order = FALSE)
table(MM_Data$Subject)
##
## Arts Biology Business Computer Science
## 1 2 3 17
## e-learning Economics Education Engineering
## 6 3 16 4
## General Language Learning Marketing Medical
## 13 11 2 20
## Physical Science Social Science Teacher Education
## 4 2 2 1
## Writing
## 6
table(MM_Data$Target)
##
## Concept Design Cooperative Learning Creatical Thinking
## 1 3 1
## Creative Thinking General Improving Learning
## 7 42 46
## Improving Teaching Problem Solving Researching
## 1 1 11
Mind Map Usage
table(MM_Data$`MindMap Usage`)
##
## Both Hand Written Not Report Software
## 4 32 18 59
Mind Map Duration
table(MM_Data$`MindMap Duration`)
##
## Long Term Median Term Not Report Short term
## 9 8 42 54
Mind Map Calling
table(MM_Data$`Primary-Calling`)
##
## Interchangeably Mind Map Mind Mapping Others
## 53 50 6 4
Quantitative or Qualitative
table(MM_Data$`quali or quant`)
##
## Mixed-Method Qualitative Quantitative
## 5 55 53
Research Design
table(MM_Data$`Research Design`)
##
## Case Study Correlational
## 1 2
## Descriptive Embeded Design
## 20 1
## Experimental Explanatory Sequential Design
## 8 2
## Exploratory Sequential Design Focus group
## 2 14
## Grounded Theory Literature Review
## 11 4
## Meta-Analysis Narrative
## 1 25
## Quasi-experimental
## 22
Quality of Evidence
table(MM_Data$`Quality of the Evidence`)
##
## LV1-Meta LV2-RCT LV3-CT
## 1 12 24
## LV4-Case_Study LV5-Descriptive LV6-Single_Qualitative
## 18 28 30
## Pooling out the all Mind Map paper used software
MM_software <- MM_Data[which(MM_Data$`MindMap Usage` =="Software"),]
## Pooling out all the abstracts from software paper
All_software <- paste(MM_software$Abstract,collapse = "")
## Pooling out all the Mind Map paper used hand written
MM_hand <- MM_Data[which(MM_Data$`MindMap Usage`=="Hand Written"),]
## Pooling out all the abstracts from hand-made MM paper
All_hand <- paste(MM_hand$Abstract,collapse = "")
All_papers <- c(All_software,All_hand)
## Clean all papers
All_papers <- VectorSource(All_papers)
All_corpus <- VCorpus(All_papers)
All_clean <- clean_corpus(All_corpus)
All_tdm <- TermDocumentMatrix(All_clean)
colnames(All_tdm) <- c("Mind Map Use Software","Mind Map by hand")
All_m <- as.matrix(All_tdm)
### ### What are the difference between Methodology paper and Apply paper in their dissimilar terms? (Comparison Cloud)
# Make comparison cloud
comparison.cloud(All_m,max.words=30,random.order = FALSE,rot.per=0.35,colors=brewer.pal(max(3,ncol(All_m)),"Dark2"))
### What are the difference between software paper and hand written paper in their common terms? (pyramid plot)
## Identify terms shared by both documents
common_words <- subset(All_m,All_m[,1]>0 & All_m[,2]>0)
## Find most commonly shared words
difference <- abs(common_words[,1]-common_words[,2])
common_words <- cbind(common_words,difference)
common_words <- common_words[order(common_words[,3],decreasing = TRUE),]
top25_df <- data.frame(x=common_words[1:25],1,y=common_words[1:25,2],labels=rownames(common_words[1:25,]))
top25_df <- top25_df[order(top25_df$labels,decreasing=TRUE),]
## Make pyramid plot
pyramid.plot(top25_df$x,top25_df$y,labels =top25_df$labels,main="Words in Common",gap=80,raxlab=NULL,unit=NULL,top.labels=c("Mind Map by software","word","Mind Map by hand"))
## 199 199
## [1] 5.1 4.1 4.1 2.1
## Pooling out the all short term Mind Map intervention
MM_short <- MM_Data[which(MM_Data$`MindMap Duration` =="Short term"),]
## Pooling out all the abstracts from short term Mind Map intervention
All_short <- paste(MM_short$Abstract,collapse = "")
## Pooling out all the Mind Median and Long term Mind Map Intervention
MM_Median<- MM_Data[which(MM_Data$`MindMap Duration`=="Median Term"),]
MM_long <- MM_Data[which(MM_Data$`MindMap Duration`=="Long Term"),]
MM_MedianNlong <- rbind(MM_Median,MM_long)
## Pooling out all the abstracts from hand-made MM paper
All_MedianNlong <- paste(MM_MedianNlong$Abstract,collapse = "")
All_papers <- c(All_short,All_MedianNlong)
## Clean all papers
All_papers <- VectorSource(All_papers)
All_corpus <- VCorpus(All_papers)
All_clean <- clean_corpus(All_corpus)
All_tdm <- TermDocumentMatrix(All_clean)
colnames(All_tdm) <- c("Short Term Intervention","Medium & Long Term Intervention")
All_m <- as.matrix(All_tdm)
### ### What are the difference between Methodology paper and Apply paper in their dissimilar terms? (Comparison Cloud)
# Make comparison cloud
comparison.cloud(All_m,max.words=30,random.order = FALSE,rot.per=0.35,colors=brewer.pal(max(3,ncol(All_m)),"Dark2"))
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## metacognitive could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## traditional could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## significant could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## anatomy could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## research could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## cooperative could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## higher could not be fit on page. It will not be plotted.
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, : mean
## could not be fit on page. It will not be plotted.
### What are the difference between software paper and hand written paper in their common terms? (pyramid plot)
## Identify terms shared by both documents
common_words <- subset(All_m,All_m[,1]>0 & All_m[,2]>0)
## Find most commonly shared words
difference <- abs(common_words[,1]-common_words[,2])
common_words <- cbind(common_words,difference)
common_words <- common_words[order(common_words[,3],decreasing = TRUE),]
top25_df <- data.frame(x=common_words[1:25],1,y=common_words[1:25,2],labels=rownames(common_words[1:25,]))
top25_df <- top25_df[order(top25_df$labels,decreasing=TRUE),]
## Make pyramid plot
pyramid.plot(top25_df$x,top25_df$y*3,labels =top25_df$labels,main="Words in Common",gap=80,raxlab=NULL,unit=NULL,top.labels=c("Short Term Intervention","word","Medium & Long Term Intervention"))
## 198 198
## [1] 5.1 4.1 4.1 2.1
## Pooling out the all Qualitative Mind Map Paper
MM_Quali <- MM_Data[which(MM_Data$`quali or quant` =="Qualitative"),]
## Pooling out all the Qualitative Mind Map Paper
All_Quali <- paste(MM_Quali$Abstract,collapse = "")
## Pooling out all the Quantitative Mind Map Paper
MM_Quant <- MM_Data[which(MM_Data$`quali or quant`=="Quantitative"),]
## Pooling out all the Quantitative Mind Map Paper
All_Quant <- paste(MM_Quant$Abstract,collapse = "")
All_papers <- c(All_Quali,All_Quant)
## Clean all papers
All_papers <- VectorSource(All_papers)
All_corpus <- VCorpus(All_papers)
All_clean <- clean_corpus(All_corpus)
All_tdm <- TermDocumentMatrix(All_clean)
colnames(All_tdm) <- c("Qualitative paper","Quantitative paper")
All_m <- as.matrix(All_tdm)
### ### What are the difference between Methodology paper and Apply paper in their dissimilar terms? (Comparison Cloud)
# Make comparison cloud
comparison.cloud(All_m,max.words=30,random.order = FALSE,rot.per=0.35,colors=brewer.pal(max(3,ncol(All_m)),"Dark2"))
## Warning in comparison.cloud(All_m, max.words = 30, random.order = FALSE, :
## qualitative could not be fit on page. It will not be plotted.
### What are the difference between software paper and hand written paper in their common terms? (pyramid plot)
## Identify terms shared by both documents
common_words <- subset(All_m,All_m[,1]>0 & All_m[,2]>0)
## Find most commonly shared words
difference <- abs(common_words[,1]-common_words[,2])
common_words <- cbind(common_words,difference)
common_words <- common_words[order(common_words[,3],decreasing = TRUE),]
top25_df <- data.frame(x=common_words[1:25],1,y=common_words[1:25,2],labels=rownames(common_words[1:25,]))
top25_df <- top25_df[order(top25_df$labels,decreasing=TRUE),]
## Make pyramid plot
pyramid.plot(top25_df$x,top25_df$y,labels =top25_df$labels,main="Words in Common",gap=80,raxlab=NULL,unit=NULL,top.labels=c("Qualitative Paper","word","Quantitative Paper"))
## 210 210
## [1] 5.1 4.1 4.1 2.1
# Pool the data
Q1 <- MM_Data[,c(11,14)]
colnames(Q1) <- c("Usage","QUA")
Q1_soft <- subset(Q1,Usage=="Software")
table(Q1_soft$QUA)/59
##
## Mixed-Method Qualitative Quantitative
## 0.03389831 0.62711864 0.33898305
Q1_hand <- subset(Q1,Usage=="Hand Written")
table(Q1_hand$QUA)/32
##
## Mixed-Method Qualitative Quantitative
## 0.09375 0.18750 0.71875
# Pool the data
Q2 <- MM_Data[,c(11,12)]
colnames(Q2) <- c("Usage","Duration")
Q2_soft <- subset(Q2,Usage=="Software")
table(Q2_soft$Duration)/59
##
## Long Term Median Term Not Report Short term
## 0.05084746 0.06779661 0.44067797 0.44067797
Q2_hand <- subset(Q2,Usage=="Hand Written")
table(Q2_hand$Duration)/32
##
## Long Term Median Term Not Report Short term
## 0.18750 0.09375 0.03125 0.68750
# Pool the data
Q3 <- MM_Data[,c(7,11)]
Q3_first <- subset(Q3,Year<=2009)
Q3_Second <- subset(Q3,Year>2009)
table(Q3_first$`MindMap Usage`)/30
##
## Both Hand Written Not Report Software
## 0.06666667 0.30000000 0.10000000 0.53333333
table(Q3_Second$`MindMap Usage`)/83
##
## Both Hand Written Not Report Software
## 0.02409639 0.27710843 0.18072289 0.51807229
# Pool the data
Q4 <- MM_Data[,c(7,12)]
Q4_first <- subset(Q4,Year<=2009)
Q4_Second <- subset(Q4,Year>2009)
table(Q4_first$`MindMap Duration`)/30
##
## Median Term Not Report Short term
## 0.1 0.4 0.5
table(Q4_Second$`MindMap Duration`)/83
##
## Long Term Median Term Not Report Short term
## 0.10843373 0.06024096 0.36144578 0.46987952
# Pool the data
Q5 <- MM_Data[,c(7,14)]
Q5_first <- subset(Q5,Year<=2009)
Q5_Second <- subset(Q5,Year>2009)
table(Q5_first$`quali or quant`)/30
##
## Qualitative Quantitative
## 0.5666667 0.4333333
table(Q5_Second$`quali or quant`)/83
##
## Mixed-Method Qualitative Quantitative
## 0.06024096 0.45783133 0.48192771