「データ駆動型教育」なるものの,海外の研究動向を概観したい。
興味はないけど。
ー https://rpubs.com/koyo/dde03 では拾い切れていない文献がある。 - 追加して(というより,本来こっちが先だと思うが)文献を検索する。
# ERIC
全てのフィールド
("data driven education") OR ("data driven instruction") OR ("data driven classroom")
査読付き,学術誌,英語
教育レベルは指定しない
# PsycInfo
全てのフィールド
("data driven education") OR ("data driven instruction") OR ("data driven classroom")
査読付き,学術誌,英語,
年齢層は指定しない
ERICは19件
PsycInfoは10件
(検索日: 2023/08/31)
これらの結果をbibファイルで取得
# ダウンロードしたbibファイルを読み
e2 <- readr::read_file(file = "../bib/230902_ERIC.bib")
p2 <- readr::read_file(file = "../bib/230902_PsycInfo.bib")
# 1論文1行にして
e21 <- data.frame(base::strsplit(e2, split = "\n\n")[[1]])
p21 <- data.frame(base::strsplit(p2, split = "\r\n\r\n\r\n")[[1]])
# データベースごとの行数のデータを作り
e2.row <- c(1:nrow(e21))
p2.row <- c(1:nrow(p21))
# 1論文1つのcsvファイルで保存して
for(i in e2.row){
file <- data.frame(base::strsplit(e21[i,], split=",\n"))
setwd("../ERIC")
csvname <- paste("e2_", i, ".csv", sep = "")
write.csv(file, csvname)
}
for(i in p2.row){
file <- data.frame(base::strsplit(p21[i,], split=",\r\n"))
setwd("../PsycInfo")
csvname <- paste("p2_", i, ".csv", sep = "")
write.csv(file, csvname)
}
# csvファイルを読み出し
for(i in e2.row){
name <- paste("e2_", i, sep = "")
dataname <- paste("e2.csv.", i, sep = "")
assign(dataname, read.csv(paste("../ERIC/", name, ".csv", sep = "")))
}
for(i in p2.row){
name <- paste("p2_", i, sep = "")
dataname <- paste("p2.csv.", i, sep = "")
assign(dataname, read.csv(paste("../PsycInfo/", name, ".csv", sep = "")))
}
eric2.nrow <- data.frame(matrix(c(
1, nrow(e2.csv.1), 2, nrow(e2.csv.2), 3, nrow(e2.csv.3),
4, nrow(e2.csv.4), 5, nrow(e2.csv.5), 6, nrow(e2.csv.6),
7, nrow(e2.csv.7), 8, nrow(e2.csv.8), 9, nrow(e2.csv.9),
10, nrow(e2.csv.10), 11, nrow(e2.csv.11), 12, nrow(e2.csv.12),
13, nrow(e2.csv.13), 14, nrow(e2.csv.14), 15, nrow(e2.csv.15),
16, nrow(e2.csv.16), 17, nrow(e2.csv.17), 18, nrow(e2.csv.18),
19, nrow(e2.csv.19)
),ncol = 2, byrow =TRUE))
colnames(eric2.nrow) <- c("seq", "nrow")
psyc2.nrow <- data.frame(matrix(c(
1, nrow(p2.csv.1), 2, nrow(p2.csv.2), 3, nrow(p2.csv.3),
4, nrow(p2.csv.4), 5, nrow(p2.csv.5), 6, nrow(p2.csv.6),
7, nrow(p2.csv.7), 8, nrow(p2.csv.8), 9, nrow(p2.csv.9),
10, nrow(p2.csv.10)
),ncol = 2, byrow =TRUE))
colnames(psyc2.nrow) <- c("seq", "nrow")
table(eric2.nrow[c("nrow")])
## nrow
## 13 14 15
## 4 8 7
table(psyc2.nrow[c("nrow")])
## nrow
## 13
## 10
eric2.nrow.15.seq <- subset(eric2.nrow, nrow == 15); eric2.nrow.15.seq[c("seq")]
## seq
## 1 1
## 2 2
## 3 3
## 5 5
## 9 9
## 17 17
## 19 19
eric2.nrow.14.seq <- subset(eric2.nrow, nrow == 14); eric2.nrow.14.seq[c("seq")]
## seq
## 4 4
## 7 7
## 8 8
## 12 12
## 13 13
## 15 15
## 16 16
## 18 18
eric2.nrow.13.seq <- subset(eric2.nrow, nrow == 13); eric2.nrow.13.seq[c("seq")]
## seq
## 6 6
## 10 10
## 11 11
## 14 14
eric2.nrow.15_ <- cbind(e2.csv.1[,2], e2.csv.2[,2], e2.csv.3[,2],
e2.csv.5[,2], e2.csv.9[,2], e2.csv.17[,2],
e2.csv.19[,2])
eric2.nrow.14_ <- cbind(e2.csv.4[,2], e2.csv.7[,2], e2.csv.8[,2],
e2.csv.12[,2], e2.csv.13[,2], e2.csv.15[,2],
e2.csv.16[,2], e2.csv.18[,2])
eric2.nrow.13_ <- cbind(e2.csv.6[,2], e2.csv.10[,2], e2.csv.11[,2],
e2.csv.14[,2])
# 保存
eric2.nrow.15 <- data.frame(t(eric2.nrow.15_))
eric2.nrow.14 <- data.frame(t(eric2.nrow.14_))
eric2.nrow.13 <- data.frame(t(eric2.nrow.13_))
library(openxlsx)
write.xlsx(eric2.nrow.15, "../Data/eric2_nrow_15.xlsx")
write.xlsx(eric2.nrow.14, "../Data/eric2_nrow_14.xlsx")
write.xlsx(eric2.nrow.13, "../Data/eric2_nrow_13.xlsx")
psyc2.nrow.13.seq <- subset(psyc2.nrow, nrow == 13); psyc2.nrow.13.seq[c("seq")]
## seq
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 7 7
## 8 8
## 9 9
## 10 10
psyc2.nrow.13_ <- cbind(p2.csv.1[,2], p2.csv.2[,2], p2.csv.3[,2], p2.csv.4[,2],
p2.csv.5[,2], p2.csv.6[,2], p2.csv.8[,2], p2.csv.9[,2],
p2.csv.10[,2])
# 保存
psyc2.nrow.13 <- data.frame(t(psyc2.nrow.13_))
library(openxlsx)
write.xlsx(psyc2.nrow.13, "../Data/psyc2_nrow_13.xlsx")
行の内容が一致しないケースがある
出力したxlsxファイルを手作業で加工する必要がある
# ERIC
15列
c("author", "year", "month", "title", "journal", "volume", "number", "pages", "note", "abstract", "keywords", "issn", "language", "url", "no.use")
14列,13列
整形して15列にして以下の通りにする
c("author", "year", "month", "title", "journal", "volume", "number", "pages", "note", "abstract", "keywords", "isbn", "language", "url", "no.use")
12列,11列
isbnやjournalのタイトルがないものは対象外
# PsycInfo
13列
VolumeとURLが入れ替わっているデータがあるので注意
c("no.use.1", "abstract", "author", "issn", "journal", "keywords", "number", "pages", "title", "url", "volume", "year", "no.use2")
library(openxlsx)
eric2.nrow.15r <- read.xlsx("../Data/eric2_nrow_15r.xlsx", sheet=1)
eric2.nrow.14r <- read.xlsx("../Data/eric2_nrow_14r.xlsx", sheet=1)
eric2.nrow.13r <- read.xlsx("../Data/eric2_nrow_13r.xlsx", sheet=1)
psyc2.nrow.13r <- read.xlsx("../Data/psyc2_nrow_13r.xlsx", sheet=1)
eric.col <- c("author", "year", "month", "title", "journal", "volume", "number", "pages", "note", "abstract", "keywords", "issn", "language", "url", "no.use")
psyc.col <- c("no.use.1", "abstract", "author", "issn", "journal", "keywords", "number", "pages", "title", "url", "volume", "year", "no.use2")
colnames(eric2.nrow.15r) <- eric.col
colnames(eric2.nrow.14r) <- eric.col
colnames(eric2.nrow.13r) <- eric.col
colnames(psyc2.nrow.13r) <- psyc.col
eric2.nrow.13r$month <- as.character(eric2.nrow.13r$month) # number が数値型になっているので
eric2.nrow.13r$number <- as.character(eric2.nrow.13r$number) # pages が数値型になっているので
library(dplyr)
eric2.raw <- dplyr::bind_rows(eric2.nrow.15r, eric2.nrow.14r, eric2.nrow.13r)
psyc2.raw <- psyc2.nrow.13r
author.rm2 <- c("\\r")
# @article { \r\nauthor={
author.rm <- c("@article\\{\nauthor=\\{")
# @article{\nauthor={
rm.2 <- c("\\}")
year.rm <- c("year=\\{")
month.rm <- c("month=\\{")
title.rm <- c("title=\\{")
journal.rm <- c("journal=\\{")
volume.rm <- c("volume=\\{")
number.rm <- c("number=\\{")
pages.rm <- c("pages=\\{")
abstract.rm <- c("abstract=\\{")
keywords.rm <- c("keywords=\\{")
issn.rm <- c("isbn=\\{")
url.rm <- c("url=\\{")
library(stringr)
eric2.raw$author <- str_replace_all(eric2.raw$author, pattern = author.rm2, replacement = "")
eric2.raw$author <- str_replace_all(eric2.raw$author, pattern = author.rm, replacement = "")
eric2.raw$author <- str_replace_all(eric2.raw$author, rm.2, "")
eric2.raw$year <- str_replace_all(eric2.raw$year, pattern = year.rm, replacement = "")
eric2.raw$year <- str_replace_all(eric2.raw$year, rm.2, "")
eric2.raw$month <- str_replace_all(eric2.raw$month, pattern = month.rm, replacement = "")
eric2.raw$month <- str_replace_all(eric2.raw$month, rm.2, "")
eric2.raw$title <- str_replace_all(eric2.raw$title, pattern = title.rm, replacement = "")
eric2.raw$title <- str_replace_all(eric2.raw$title, rm.2, "")
eric2.raw$journal <- str_replace_all(eric2.raw$journal, pattern = journal.rm, replacement = "")
eric2.raw$journal <- str_replace_all(eric2.raw$journal, rm.2, "")
eric2.raw$volume <- str_replace_all(eric2.raw$volume, pattern = volume.rm, replacement = "")
eric2.raw$volume <- str_replace_all(eric2.raw$volume, rm.2, "")
eric2.raw$number <- str_replace_all(eric2.raw$number, pattern = number.rm, replacement = "")
eric2.raw$number <- str_replace_all(eric2.raw$number, rm.2, "")
eric2.raw$pages <- str_replace_all(eric2.raw$pages, pattern = pages.rm, replacement = "")
eric2.raw$pages <- str_replace_all(eric2.raw$pages, rm.2, "")
eric2.raw$abstract <- str_replace_all(eric2.raw$abstract, pattern = abstract.rm, replacement = "")
eric2.raw$abstract <- str_replace_all(eric2.raw$abstract, rm.2, "")
eric2.raw$keywords <- str_replace_all(eric2.raw$keywords, pattern = keywords.rm, replacement = "")
eric2.raw$keywords <- str_replace_all(eric2.raw$keywords, rm.2, "")
eric2.raw$issn <- str_replace_all(eric2.raw$issn, pattern = issn.rm, replacement = "")
eric2.raw$issn <- str_replace_all(eric2.raw$issn, rm.2, "")
eric2.raw$url <- str_replace_all(eric2.raw$url, pattern = url.rm, replacement = "")
eric2.raw$url <- str_replace_all(eric2.raw$url, rm.2, "")
# タイトルを先頭だけ大文字,あとは小文字
eric2.raw$title <- str_to_sentence(eric2.raw$title)
abstract.rm <- c("Abstract = \\{")
author.rm <- c("Author = \\{")
issn.rm <- c("ISSN = \\{")
journal.rm <- c("Journal = \\{")
keywords.rm <- c("Keywords = \\{")
number.rm <- c("Number = \\{")
pages.rm <- c("Pages = \\{")
title.rm <- c("Title = \\{")
url.rm <- c("URL = \\{")
volume.rm <- c("Volume = \\{")
year.rm <- c("Year = \\{")
rm.2 <- c("\\}")
library(stringr)
psyc2.raw$abstract <- str_replace_all(psyc2.raw$abstract, pattern = abstract.rm, replacement = "")
psyc2.raw$abstract <- str_replace_all(psyc2.raw$abstract, rm.2, "")
psyc2.raw$author <- str_replace_all(psyc2.raw$author, pattern = author.rm, replacement = "")
psyc2.raw$author <- str_replace_all(psyc2.raw$author, rm.2, "")
psyc2.raw$issn <- str_replace_all(psyc2.raw$issn, pattern = issn.rm, replacement = "")
psyc2.raw$issn <- str_replace_all(psyc2.raw$issn, rm.2, "")
psyc2.raw$journal <- str_replace_all(psyc2.raw$journal, pattern = journal.rm, replacement = "")
psyc2.raw$journal <- str_replace_all(psyc2.raw$journal, rm.2, "")
psyc2.raw$keywords <- str_replace_all(psyc2.raw$keywords, pattern = keywords.rm, replacement = "")
psyc2.raw$keywords <- str_replace_all(psyc2.raw$keywords, rm.2, "")
psyc2.raw$number <- str_replace_all(psyc2.raw$number, pattern = number.rm, replacement = "")
psyc2.raw$number <- str_replace_all(psyc2.raw$number, rm.2, "")
psyc2.raw$pages <- str_replace_all(psyc2.raw$pages, pattern = pages.rm, replacement = "")
psyc2.raw$pages <- str_replace_all(psyc2.raw$pages, rm.2, "")
psyc2.raw$title <- str_replace_all(psyc2.raw$title, pattern = title.rm, replacement = "")
psyc2.raw$title <- str_replace_all(psyc2.raw$title, rm.2, "")
psyc2.raw$url <- str_replace_all(psyc2.raw$url, pattern = url.rm, replacement = "")
psyc2.raw$url <- str_replace_all(psyc2.raw$url, rm.2, "")
psyc2.raw$volume <- str_replace_all(psyc2.raw$volume, pattern = volume.rm, replacement = "")
psyc2.raw$volume <- str_replace_all(psyc2.raw$volume, rm.2, "")
psyc2.raw$year <- str_replace_all(psyc2.raw$year, pattern = year.rm, replacement = "")
psyc2.raw$year <- str_replace_all(psyc2.raw$year, rm.2, "")
# キーワード区切りをセミコロンにする
psyc2.raw$keywords <- str_replace_all(psyc2.raw$keywords, pattern = ",", replacement = ";")
# ページ番号の空白をなくす
psyc2.raw$pages <- str_replace_all(psyc2.raw$pages, " ", "")
# タイトルを先頭だけ大文字,あとは小文字
psyc2.raw$title <- str_to_sentence(psyc2.raw$title)
# ERIC
eric2.raw$db <- c("eric2")
library(dplyr)
eric2.raw <- eric2.raw %>% mutate(seq = row_number())
eric2.raw$db.seq <- paste(eric2.raw$db, ".", eric2.raw$seq, sep = "")
eric2 <- eric2.raw[c("db", "seq", "db.seq", "author", "year", "title", "journal",
"volume", "number", "pages", "keywords","issn", "abstract")]
# PsycInfo
psyc2.raw$db <- c("psyc2")
library(dplyr)
psyc2.raw <- psyc2.raw %>% mutate(seq = row_number())
psyc2.raw$db.seq <- paste(psyc2.raw$db, ".", psyc2.raw$seq, sep = "")
psyc2 <- psyc2.raw[c("db", "seq", "db.seq", "author", "year", "title", "journal",
"volume", "number", "pages", "keywords","issn", "abstract")]
# まとめる
eric2.psyc2 <- dplyr::bind_rows(eric2, psyc2)
# ここで保存しておく
library(openxlsx)
write.xlsx(eric2.psyc2, "../Data/ERIC2_Psyc2.xlsx")
# タイトルで重複確認
library("dplyr")
duplicate <- eric2.psyc2 %>% group_by(title) %>% filter(n()>1)
duplicate <- duplicate[order(duplicate$title, decreasing=T),]
duplicate.sec.title <- duplicate[c("db.seq", "title")]
library(openxlsx)
write.xlsx(duplicate.sec.title, "../Data/Duplicate_2.xlsx")
# 重複は1件
eric2.psyc2.nodp <- eric2.psyc2 %>%
dplyr::filter(db.seq != "eric.2.4")
library(openxlsx)
write.xlsx(eric2.psyc2.nodp, "../Data/eric2_psyc2_nodp.xlsx")
# データ読み込み
library(openxlsx)
eric.psyc.nodp <- read.xlsx("../Data/eric_psyc_nodp.xlsx")
eric2.psyc2.nodp <- read.xlsx("../Data/eric2_psyc2_nodp.xlsx")
# 重複確認
e.e2.p.p2 <- dplyr::bind_rows(eric.psyc.nodp, eric2.psyc2.nodp)
library(dplyr)
duplicate <- e.e2.p.p2 %>% group_by(title) %>% filter(n()>1)
duplicate <- duplicate[order(duplicate$title, decreasing=T),]
duplicate.sec.title <- duplicate[c("db.seq", "title")]
library(openxlsx)
write.xlsx(duplicate.sec.title, "../Data/Duplicate_3.xlsx")
以下の論文は先に読むべき論文と思われる
eric.205, eric.175, psyc.31, eric.57
eric.169, eric.25, eric.128, eric.85
eric.176,
psyc.68
# 重複は11件
# eric2.8, eric2.9, eric2.4, eric2.3, eric2.10, eric2.5, eric2.6,
# eric2.7, eric2.12, psyc2.9, psyc2.8
e2.p2.nodp <- eric2.psyc2.nodp %>%
dplyr::filter(db.seq != "eric2.8") %>%
dplyr::filter(db.seq != "eric2.9") %>%
dplyr::filter(db.seq != "eric2.4") %>%
dplyr::filter(db.seq != "eric2.3") %>%
dplyr::filter(db.seq != "eric2.10") %>%
dplyr::filter(db.seq != "eric2.5") %>%
dplyr::filter(db.seq != "eric2.6") %>%
dplyr::filter(db.seq != "eric2.7") %>%
dplyr::filter(db.seq != "eric2.12") %>%
dplyr::filter(db.seq != "psyc2.9") %>%
dplyr::filter(db.seq != "psyc2.8")
library(openxlsx)
write.xlsx(e2.p2.nodp, "../Data/e2_p2_nodp.xlsx")
library(dplyr)
library(tidyverse)
library(deeplr)
データの読み込み
library(openxlsx)
e2.p2.nodp <- read.xlsx("../Data/e2_p2_nodp.xlsx")
アブストの文字数を数えてみる
必要に応じてDeepL
APIの文字数制限を変更する
sum(nchar(e2.p2.nodp$abstract))
## [1] 14150
Deepl APIの情報(非表示)
動作確認
# available_languages(auth_key = api)
言語の指定
source_lang <- "EN"
target_lang <- "JA"
関数
- https://note.com/text_tier2718/n/n3451567126a7
に載っている関数を使う
deepL <- function(Sentence, source_lang = "EN", target_lang = "JA", api_key = api_key) {
a <- system(
paste0(
'curl -s https://api.deepl.com/v2/translate -d "auth_key=',
api_key,
'" -d "text=',
str_replace_all(Sentence, pattern = '"', replacement = "'"), # 翻訳文中に""があるとpaste0と干渉してエラーを起こすので''に変換
'" -d source_lang="',
source_lang,
'" -d "target_lang=',
target_lang,
'"'
),
intern = T
)
b <- strsplit(
strsplit(
as.character(a),
'\"text\":\"'
)[[1]][2],
'\"}]}'
)[[1]][1]
Sys.sleep(1)
return(b)
}
翻訳
translation <- map(e2.p2.nodp$abstract, function(x) {
deepL(
Sentence = x,
source_lang = source_lang,
target_lang = target_lang,
api_key = api
)
})
eric2.psyc2.ja <- mutate(e2.p2.nodp, abstract_ja = translation)
library(openxlsx)
write.xlsx(eric2.psyc2.ja, "../Data/eric2_psyc2_ja.xlsx")
## user system elapsed
## 1.161 0.425 34.124