ここでは,bibファイルを分析しやすいように成形するところまでをやってみる。
ERICやPsycinfoでダウンロードしたbibファイルを,分析しやすいように成形する。
Serarch queryは以下の通り
# ERIC
全てのフィールド
("data driven") AND ("education" OR "instruction" OR "classroom")
査読付き,学術誌,英語,就学前から12年生まで
# PsycInfo
全てのフィールド
("data driven") AND ("education" OR "instruction" OR "classroom")
査読付き,学術誌,英語,
Preschool age (2-5歳),School age (6-12歳),Adolescence (13-17歳)
ERICは270件
PsycInfoは135件
(検索日: 2023/08/31)
# ダウンロードしたbibファイルを読み
e <- readr::read_file(file = "../bib/230831_ERIC.bib")
p <- readr::read_file(file = "../bib/230831_PsycInfo.bib")
# 1論文1行にして
e1 <- data.frame(base::strsplit(e, split = "\n\n")[[1]])
p1 <- data.frame(base::strsplit(p, split = "\r\n\r\n\r\n")[[1]])
# データベースごとの行数のデータを作り
e.row <- c(1:nrow(e1))
p.row <- c(1:nrow(p1))
# 1論文1つのcsvファイルで保存して
for(i in e.row){
file <- data.frame(base::strsplit(e1[i,], split=",\n"))
setwd("../ERIC")
csvname <- paste("e", i, ".csv", sep = "")
write.csv(file, csvname)
}
for(i in p.row){
file <- data.frame(base::strsplit(p1[i,], split=",\r\n"))
setwd("../PsycInfo")
csvname <- paste("p", i, ".csv", sep = "")
write.csv(file, csvname)
}
# csvファイルを読み出し
for(i in e.row){
name <- paste("e", i, sep = "")
dataname <- paste("e.csv.", i, sep = "")
assign(dataname, read.csv(paste("../ERIC/", name, ".csv", sep = "")))
}
for(i in p.row){
name <- paste("p", i, sep = "")
dataname <- paste("p.csv.", i, sep = "")
assign(dataname, read.csv(paste("../PsycInfo/", name, ".csv", sep = "")))
}
eric.nrow <- data.frame(matrix(c(
1, nrow(e.csv.1), 2, nrow(e.csv.2), 3, nrow(e.csv.3),
4, nrow(e.csv.4), 5, nrow(e.csv.5), 6, nrow(e.csv.6),
7, nrow(e.csv.7), 8, nrow(e.csv.8), 9, nrow(e.csv.9),
10, nrow(e.csv.10), 11, nrow(e.csv.11), 12, nrow(e.csv.12),
13, nrow(e.csv.13), 14, nrow(e.csv.14), 15, nrow(e.csv.15),
16, nrow(e.csv.16), 17, nrow(e.csv.17), 18, nrow(e.csv.18),
19, nrow(e.csv.19), 20, nrow(e.csv.20), 21, nrow(e.csv.21),
22, nrow(e.csv.22), 23, nrow(e.csv.23), 24, nrow(e.csv.24),
25, nrow(e.csv.25), 26, nrow(e.csv.26), 27, nrow(e.csv.27),
28, nrow(e.csv.28), 29, nrow(e.csv.29), 30, nrow(e.csv.30),
31, nrow(e.csv.31), 32, nrow(e.csv.32), 33, nrow(e.csv.33),
34, nrow(e.csv.34), 35, nrow(e.csv.35), 36, nrow(e.csv.36),
37, nrow(e.csv.37), 38, nrow(e.csv.38), 39, nrow(e.csv.39),
40, nrow(e.csv.40), 41, nrow(e.csv.41), 42, nrow(e.csv.42),
43, nrow(e.csv.43), 44, nrow(e.csv.44), 45, nrow(e.csv.45),
46, nrow(e.csv.46), 47, nrow(e.csv.47), 48, nrow(e.csv.48),
49, nrow(e.csv.49), 50, nrow(e.csv.50), 51, nrow(e.csv.51),
52, nrow(e.csv.52), 53, nrow(e.csv.53), 54, nrow(e.csv.54),
55, nrow(e.csv.55), 56, nrow(e.csv.56), 57, nrow(e.csv.57),
58, nrow(e.csv.58), 59, nrow(e.csv.59), 60, nrow(e.csv.60),
61, nrow(e.csv.61), 62, nrow(e.csv.62), 63, nrow(e.csv.63),
64, nrow(e.csv.64), 65, nrow(e.csv.65), 66, nrow(e.csv.66),
67, nrow(e.csv.67), 68, nrow(e.csv.68), 69, nrow(e.csv.69),
70, nrow(e.csv.70), 71, nrow(e.csv.71), 72, nrow(e.csv.72),
73, nrow(e.csv.73), 74, nrow(e.csv.74), 75, nrow(e.csv.75),
76, nrow(e.csv.76), 77, nrow(e.csv.77), 78, nrow(e.csv.78),
79, nrow(e.csv.79), 80, nrow(e.csv.80), 81, nrow(e.csv.81),
82, nrow(e.csv.82), 83, nrow(e.csv.83), 84, nrow(e.csv.84),
85, nrow(e.csv.85), 86, nrow(e.csv.86), 87, nrow(e.csv.87),
88, nrow(e.csv.88), 89, nrow(e.csv.89), 90, nrow(e.csv.90),
91, nrow(e.csv.91), 92, nrow(e.csv.92), 93, nrow(e.csv.93),
94, nrow(e.csv.94), 95, nrow(e.csv.95), 96, nrow(e.csv.96),
97, nrow(e.csv.97), 98, nrow(e.csv.98), 99, nrow(e.csv.99),
100, nrow(e.csv.100), 101, nrow(e.csv.101), 102, nrow(e.csv.102),
103, nrow(e.csv.103), 104, nrow(e.csv.104), 105, nrow(e.csv.105),
106, nrow(e.csv.106), 107, nrow(e.csv.107), 108, nrow(e.csv.108),
109, nrow(e.csv.109), 110, nrow(e.csv.110), 111, nrow(e.csv.111),
112, nrow(e.csv.112), 113, nrow(e.csv.113), 114, nrow(e.csv.114),
115, nrow(e.csv.115), 116, nrow(e.csv.116), 117, nrow(e.csv.117),
118, nrow(e.csv.118), 119, nrow(e.csv.119), 120, nrow(e.csv.120),
121, nrow(e.csv.121), 122, nrow(e.csv.122), 123, nrow(e.csv.123),
124, nrow(e.csv.124), 125, nrow(e.csv.125), 126, nrow(e.csv.126),
127, nrow(e.csv.127), 128, nrow(e.csv.128), 129, nrow(e.csv.129),
130, nrow(e.csv.130), 131, nrow(e.csv.131), 132, nrow(e.csv.132),
133, nrow(e.csv.133), 134, nrow(e.csv.134), 135, nrow(e.csv.135),
136, nrow(e.csv.136), 137, nrow(e.csv.137), 138, nrow(e.csv.138),
139, nrow(e.csv.139), 140, nrow(e.csv.140), 141, nrow(e.csv.141),
142, nrow(e.csv.142), 143, nrow(e.csv.143), 144, nrow(e.csv.144),
145, nrow(e.csv.145), 146, nrow(e.csv.146), 147, nrow(e.csv.147),
148, nrow(e.csv.148), 149, nrow(e.csv.149), 150, nrow(e.csv.150),
151, nrow(e.csv.151), 152, nrow(e.csv.152), 153, nrow(e.csv.153),
154, nrow(e.csv.154), 155, nrow(e.csv.155), 156, nrow(e.csv.156),
157, nrow(e.csv.157), 158, nrow(e.csv.158), 159, nrow(e.csv.159),
160, nrow(e.csv.160), 161, nrow(e.csv.161), 162, nrow(e.csv.162),
163, nrow(e.csv.163), 164, nrow(e.csv.164), 165, nrow(e.csv.165),
166, nrow(e.csv.166), 167, nrow(e.csv.167), 168, nrow(e.csv.168),
169, nrow(e.csv.169), 170, nrow(e.csv.170), 171, nrow(e.csv.171),
172, nrow(e.csv.172), 173, nrow(e.csv.173), 174, nrow(e.csv.174),
175, nrow(e.csv.175), 176, nrow(e.csv.176), 177, nrow(e.csv.177),
178, nrow(e.csv.178), 179, nrow(e.csv.179), 180, nrow(e.csv.180),
181, nrow(e.csv.181), 182, nrow(e.csv.182), 183, nrow(e.csv.183),
184, nrow(e.csv.184), 185, nrow(e.csv.185), 186, nrow(e.csv.186),
187, nrow(e.csv.187), 188, nrow(e.csv.188), 189, nrow(e.csv.189),
190, nrow(e.csv.190), 191, nrow(e.csv.191), 192, nrow(e.csv.192),
193, nrow(e.csv.193), 194, nrow(e.csv.194), 195, nrow(e.csv.195),
196, nrow(e.csv.196), 197, nrow(e.csv.197), 198, nrow(e.csv.198),
199, nrow(e.csv.199), 200, nrow(e.csv.200), 201, nrow(e.csv.201),
202, nrow(e.csv.202), 203, nrow(e.csv.203), 204, nrow(e.csv.204),
205, nrow(e.csv.205), 206, nrow(e.csv.206), 207, nrow(e.csv.207),
208, nrow(e.csv.208), 209, nrow(e.csv.209), 210, nrow(e.csv.210),
211, nrow(e.csv.211), 212, nrow(e.csv.212), 213, nrow(e.csv.213),
214, nrow(e.csv.214), 215, nrow(e.csv.215), 216, nrow(e.csv.216),
217, nrow(e.csv.217), 218, nrow(e.csv.218), 219, nrow(e.csv.219),
220, nrow(e.csv.220), 221, nrow(e.csv.221), 222, nrow(e.csv.222),
223, nrow(e.csv.223), 224, nrow(e.csv.224), 225, nrow(e.csv.225),
226, nrow(e.csv.226), 227, nrow(e.csv.227), 228, nrow(e.csv.228),
229, nrow(e.csv.229), 230, nrow(e.csv.230), 231, nrow(e.csv.231),
232, nrow(e.csv.232), 233, nrow(e.csv.233), 234, nrow(e.csv.234),
235, nrow(e.csv.235), 236, nrow(e.csv.236), 237, nrow(e.csv.237),
238, nrow(e.csv.238), 239, nrow(e.csv.239), 240, nrow(e.csv.240),
241, nrow(e.csv.241), 242, nrow(e.csv.242), 243, nrow(e.csv.243),
244, nrow(e.csv.244), 245, nrow(e.csv.245), 246, nrow(e.csv.246),
247, nrow(e.csv.247), 248, nrow(e.csv.248), 249, nrow(e.csv.249),
250, nrow(e.csv.250), 251, nrow(e.csv.251), 252, nrow(e.csv.252),
253, nrow(e.csv.253), 254, nrow(e.csv.254), 255, nrow(e.csv.255),
256, nrow(e.csv.256), 257, nrow(e.csv.257), 258, nrow(e.csv.258),
259, nrow(e.csv.259), 260, nrow(e.csv.260), 261, nrow(e.csv.261),
262, nrow(e.csv.262), 263, nrow(e.csv.263), 264, nrow(e.csv.264),
265, nrow(e.csv.265), 266, nrow(e.csv.266), 267, nrow(e.csv.267),
268, nrow(e.csv.268), 269, nrow(e.csv.269), 270, nrow(e.csv.270)
),ncol = 2, byrow =TRUE))
colnames(eric.nrow) <- c("seq", "nrow")
psyc.nrow <- data.frame(matrix(c(
1, nrow(p.csv.1), 2, nrow(p.csv.2), 3, nrow(p.csv.3),
4, nrow(p.csv.4), 5, nrow(p.csv.5), 6, nrow(p.csv.6),
7, nrow(p.csv.7), 8, nrow(p.csv.8), 9, nrow(p.csv.9),
10, nrow(p.csv.10), 11, nrow(p.csv.11), 12, nrow(p.csv.12),
13, nrow(p.csv.13), 14, nrow(p.csv.14), 15, nrow(p.csv.15),
16, nrow(p.csv.16), 17, nrow(p.csv.17), 18, nrow(p.csv.18),
19, nrow(p.csv.19), 20, nrow(p.csv.20), 21, nrow(p.csv.21),
22, nrow(p.csv.22), 23, nrow(p.csv.23), 24, nrow(p.csv.24),
25, nrow(p.csv.25), 26, nrow(p.csv.26), 27, nrow(p.csv.27),
28, nrow(p.csv.28), 29, nrow(p.csv.29), 30, nrow(p.csv.30),
31, nrow(p.csv.31), 32, nrow(p.csv.32), 33, nrow(p.csv.33),
34, nrow(p.csv.34), 35, nrow(p.csv.35), 36, nrow(p.csv.36),
37, nrow(p.csv.37), 38, nrow(p.csv.38), 39, nrow(p.csv.39),
40, nrow(p.csv.40), 41, nrow(p.csv.41), 42, nrow(p.csv.42),
43, nrow(p.csv.43), 44, nrow(p.csv.44), 45, nrow(p.csv.45),
46, nrow(p.csv.46), 47, nrow(p.csv.47), 48, nrow(p.csv.48),
49, nrow(p.csv.49), 50, nrow(p.csv.50), 51, nrow(p.csv.51),
52, nrow(p.csv.52), 53, nrow(p.csv.53), 54, nrow(p.csv.54),
55, nrow(p.csv.55), 56, nrow(p.csv.56), 57, nrow(p.csv.57),
58, nrow(p.csv.58), 59, nrow(p.csv.59), 60, nrow(p.csv.60),
61, nrow(p.csv.61), 62, nrow(p.csv.62), 63, nrow(p.csv.63),
64, nrow(p.csv.64), 65, nrow(p.csv.65), 66, nrow(p.csv.66),
67, nrow(p.csv.67), 68, nrow(p.csv.68), 69, nrow(p.csv.69),
70, nrow(p.csv.70), 71, nrow(p.csv.71), 72, nrow(p.csv.72),
73, nrow(p.csv.73), 74, nrow(p.csv.74), 75, nrow(p.csv.75),
76, nrow(p.csv.76), 77, nrow(p.csv.77), 78, nrow(p.csv.78),
79, nrow(p.csv.79), 80, nrow(p.csv.80), 81, nrow(p.csv.81),
82, nrow(p.csv.82), 83, nrow(p.csv.83), 84, nrow(p.csv.84),
85, nrow(p.csv.85), 86, nrow(p.csv.86), 87, nrow(p.csv.87),
88, nrow(p.csv.88), 89, nrow(p.csv.89), 90, nrow(p.csv.90),
91, nrow(p.csv.91), 92, nrow(p.csv.92), 93, nrow(p.csv.93),
94, nrow(p.csv.94), 95, nrow(p.csv.95), 96, nrow(p.csv.96),
97, nrow(p.csv.97), 98, nrow(p.csv.98), 99, nrow(p.csv.99),
100, nrow(p.csv.100), 101, nrow(p.csv.101), 102, nrow(p.csv.102),
103, nrow(p.csv.103), 104, nrow(p.csv.104), 105, nrow(p.csv.105),
106, nrow(p.csv.106), 107, nrow(p.csv.107), 108, nrow(p.csv.108),
109, nrow(p.csv.109), 110, nrow(p.csv.110), 111, nrow(p.csv.111),
112, nrow(p.csv.112), 113, nrow(p.csv.113), 114, nrow(p.csv.114),
115, nrow(p.csv.115), 116, nrow(p.csv.116), 117, nrow(p.csv.117),
118, nrow(p.csv.118), 119, nrow(p.csv.119), 120, nrow(p.csv.120),
121, nrow(p.csv.121), 122, nrow(p.csv.122), 123, nrow(p.csv.123),
124, nrow(p.csv.124), 125, nrow(p.csv.125), 126, nrow(p.csv.126),
127, nrow(p.csv.127), 128, nrow(p.csv.128), 129, nrow(p.csv.129),
130, nrow(p.csv.130), 131, nrow(p.csv.131), 132, nrow(p.csv.132),
133, nrow(p.csv.133), 134, nrow(p.csv.134), 135, nrow(p.csv.135)
),ncol = 2, byrow =TRUE))
colnames(psyc.nrow) <- c("seq", "nrow")
table(eric.nrow[c("nrow")])
## nrow
## 11 12 13 14 15
## 2 4 29 100 135
table(psyc.nrow[c("nrow")])
## nrow
## 11 12 13
## 12 23 100
eric.nrow.15.seq <- subset(eric.nrow, nrow == 15); eric.nrow.15.seq[c("seq")]
## seq
## 1 1
## 3 3
## 4 4
## 5 5
## 6 6
## 21 21
## 22 22
## 23 23
## 25 25
## 26 26
## 33 33
## 35 35
## 36 36
## 37 37
## 39 39
## 40 40
## 42 42
## 43 43
## 44 44
## 45 45
## 46 46
## 47 47
## 62 62
## 63 63
## 66 66
## 67 67
## 68 68
## 69 69
## 70 70
## 73 73
## 84 84
## 86 86
## 87 87
## 88 88
## 89 89
## 108 108
## 109 109
## 110 110
## 111 111
## 113 113
## 115 115
## 125 125
## 126 126
## 127 127
## 128 128
## 129 129
## 130 130
## 132 132
## 134 134
## 135 135
## 138 138
## 151 151
## 152 152
## 153 153
## 154 154
## 155 155
## 156 156
## 157 157
## 158 158
## 159 159
## 160 160
## 161 161
## 162 162
## 163 163
## 168 168
## 174 174
## 175 175
## 176 176
## 177 177
## 178 178
## 179 179
## 180 180
## 181 181
## 182 182
## 183 183
## 184 184
## 185 185
## 186 186
## 192 192
## 193 193
## 196 196
## 197 197
## 198 198
## 201 201
## 202 202
## 203 203
## 204 204
## 205 205
## 206 206
## 207 207
## 208 208
## 211 211
## 212 212
## 213 213
## 214 214
## 215 215
## 216 216
## 217 217
## 218 218
## 219 219
## 221 221
## 222 222
## 223 223
## 229 229
## 230 230
## 231 231
## 232 232
## 233 233
## 234 234
## 235 235
## 236 236
## 237 237
## 240 240
## 241 241
## 242 242
## 243 243
## 244 244
## 245 245
## 247 247
## 248 248
## 249 249
## 250 250
## 251 251
## 252 252
## 254 254
## 255 255
## 256 256
## 258 258
## 262 262
## 263 263
## 264 264
## 265 265
## 268 268
## 269 269
## 270 270
eric.nrow.14.seq <- subset(eric.nrow, nrow == 14); eric.nrow.14.seq[c("seq")]
## seq
## 2 2
## 7 7
## 10 10
## 11 11
## 15 15
## 16 16
## 17 17
## 18 18
## 19 19
## 20 20
## 24 24
## 27 27
## 28 28
## 29 29
## 30 30
## 31 31
## 32 32
## 38 38
## 41 41
## 48 48
## 49 49
## 51 51
## 53 53
## 54 54
## 55 55
## 57 57
## 58 58
## 59 59
## 60 60
## 61 61
## 64 64
## 74 74
## 75 75
## 77 77
## 78 78
## 79 79
## 80 80
## 81 81
## 82 82
## 83 83
## 90 90
## 95 95
## 96 96
## 97 97
## 98 98
## 99 99
## 100 100
## 104 104
## 106 106
## 112 112
## 114 114
## 117 117
## 118 118
## 120 120
## 123 123
## 124 124
## 136 136
## 137 137
## 141 141
## 143 143
## 144 144
## 145 145
## 146 146
## 147 147
## 148 148
## 149 149
## 150 150
## 164 164
## 165 165
## 166 166
## 167 167
## 169 169
## 170 170
## 171 171
## 172 172
## 173 173
## 187 187
## 188 188
## 189 189
## 190 190
## 191 191
## 195 195
## 199 199
## 200 200
## 209 209
## 210 210
## 220 220
## 224 224
## 225 225
## 226 226
## 227 227
## 228 228
## 238 238
## 239 239
## 246 246
## 257 257
## 260 260
## 261 261
## 266 266
## 267 267
eric.nrow.13.seq <- subset(eric.nrow, nrow == 13); eric.nrow.13.seq[c("seq")]
## seq
## 8 8
## 9 9
## 12 12
## 13 13
## 34 34
## 50 50
## 52 52
## 56 56
## 65 65
## 71 71
## 72 72
## 76 76
## 85 85
## 91 91
## 92 92
## 93 93
## 94 94
## 101 101
## 102 102
## 103 103
## 105 105
## 107 107
## 119 119
## 121 121
## 122 122
## 139 139
## 194 194
## 253 253
## 259 259
eric.nrow.12.seq <- subset(eric.nrow, nrow == 12); eric.nrow.12.seq[c("seq")]
## seq
## 14 14
## 116 116
## 131 131
## 133 133
eric.nrow.11.seq <- subset(eric.nrow, nrow == 11); eric.nrow.11.seq[c("seq")]
## seq
## 140 140
## 142 142
eric.nrow.15_ <- cbind(e.csv.1[,2], e.csv.3[,2], e.csv.4[,2], e.csv.5[,2], e.csv.6[,2],
e.csv.21[,2], e.csv.22[,2], e.csv.23[,2], e.csv.25[,2], e.csv.26[,2],
e.csv.33[,2], e.csv.35[,2], e.csv.36[,2], e.csv.37[,2], e.csv.39[,2],
e.csv.40[,2], e.csv.42[,2], e.csv.43[,2], e.csv.44[,2], e.csv.45[,2],
e.csv.46[,2], e.csv.47[,2], e.csv.62[,2], e.csv.63[,2], e.csv.66[,2],
e.csv.67[,2], e.csv.68[,2], e.csv.69[,2], e.csv.70[,2], e.csv.73[,2],
e.csv.84[,2], e.csv.86[,2], e.csv.87[,2], e.csv.88[,2], e.csv.89[,2],
e.csv.108[,2], e.csv.109[,2], e.csv.110[,2], e.csv.111[,2], e.csv.113[,2],
e.csv.115[,2], e.csv.125[,2], e.csv.126[,2], e.csv.127[,2], e.csv.128[,2],
e.csv.129[,2], e.csv.130[,2], e.csv.132[,2], e.csv.134[,2], e.csv.135[,2],
e.csv.138[,2], e.csv.151[,2], e.csv.152[,2], e.csv.153[,2], e.csv.154[,2],
e.csv.155[,2], e.csv.156[,2], e.csv.157[,2], e.csv.158[,2], e.csv.159[,2],
e.csv.160[,2], e.csv.161[,2], e.csv.162[,2], e.csv.163[,2], e.csv.168[,2],
e.csv.174[,2], e.csv.175[,2], e.csv.176[,2], e.csv.177[,2], e.csv.178[,2],
e.csv.179[,2], e.csv.180[,2], e.csv.181[,2], e.csv.182[,2], e.csv.183[,2],
e.csv.184[,2], e.csv.185[,2], e.csv.186[,2], e.csv.192[,2], e.csv.193[,2],
e.csv.196[,2], e.csv.197[,2], e.csv.198[,2], e.csv.201[,2], e.csv.202[,2],
e.csv.203[,2], e.csv.204[,2], e.csv.205[,2], e.csv.206[,2], e.csv.207[,2],
e.csv.208[,2], e.csv.211[,2], e.csv.212[,2], e.csv.213[,2], e.csv.214[,2],
e.csv.215[,2], e.csv.216[,2], e.csv.217[,2], e.csv.218[,2], e.csv.219[,2],
e.csv.221[,2], e.csv.222[,2], e.csv.223[,2], e.csv.229[,2], e.csv.230[,2],
e.csv.231[,2], e.csv.232[,2], e.csv.233[,2], e.csv.234[,2], e.csv.235[,2],
e.csv.236[,2], e.csv.237[,2], e.csv.240[,2], e.csv.241[,2], e.csv.242[,2],
e.csv.243[,2], e.csv.244[,2], e.csv.245[,2], e.csv.247[,2], e.csv.248[,2],
e.csv.249[,2], e.csv.250[,2], e.csv.251[,2], e.csv.252[,2], e.csv.254[,2],
e.csv.255[,2], e.csv.256[,2], e.csv.258[,2], e.csv.262[,2], e.csv.263[,2],
e.csv.264[,2], e.csv.265[,2], e.csv.268[,2], e.csv.269[,2], e.csv.270[,2])
eric.nrow.14_ <- cbind(e.csv.2[,2], e.csv.7[,2], e.csv.10[,2], e.csv.11[,2], e.csv.15[,2],
e.csv.16[,2], e.csv.17[,2], e.csv.18[,2], e.csv.19[,2], e.csv.20[,2],
e.csv.24[,2], e.csv.27[,2], e.csv.28[,2], e.csv.29[,2], e.csv.30[,2],
e.csv.31[,2], e.csv.32[,2], e.csv.38[,2], e.csv.41[,2], e.csv.48[,2],
e.csv.49[,2], e.csv.51[,2], e.csv.53[,2], e.csv.54[,2], e.csv.55[,2],
e.csv.57[,2], e.csv.58[,2], e.csv.59[,2], e.csv.60[,2], e.csv.61[,2],
e.csv.64[,2], e.csv.74[,2], e.csv.75[,2], e.csv.77[,2], e.csv.78[,2],
e.csv.79[,2], e.csv.80[,2], e.csv.81[,2], e.csv.82[,2], e.csv.83[,2],
e.csv.90[,2], e.csv.95[,2], e.csv.96[,2], e.csv.97[,2], e.csv.98[,2],
e.csv.99[,2], e.csv.100[,2], e.csv.104[,2], e.csv.106[,2], e.csv.112[,2],
e.csv.114[,2], e.csv.117[,2], e.csv.118[,2], e.csv.120[,2], e.csv.123[,2],
e.csv.124[,2], e.csv.136[,2], e.csv.137[,2], e.csv.141[,2], e.csv.143[,2],
e.csv.144[,2], e.csv.145[,2], e.csv.146[,2], e.csv.147[,2], e.csv.148[,2],
e.csv.149[,2], e.csv.150[,2], e.csv.164[,2], e.csv.165[,2], e.csv.166[,2],
e.csv.167[,2], e.csv.169[,2], e.csv.170[,2], e.csv.171[,2], e.csv.172[,2],
e.csv.173[,2], e.csv.187[,2], e.csv.188[,2], e.csv.189[,2], e.csv.190[,2],
e.csv.191[,2], e.csv.195[,2], e.csv.199[,2], e.csv.200[,2], e.csv.209[,2],
e.csv.210[,2], e.csv.220[,2], e.csv.224[,2], e.csv.225[,2], e.csv.226[,2],
e.csv.227[,2], e.csv.228[,2], e.csv.238[,2], e.csv.239[,2], e.csv.246[,2],
e.csv.257[,2], e.csv.260[,2], e.csv.261[,2], e.csv.266[,2], e.csv.267[,2])
eric.nrow.13_ <- cbind(e.csv.8[,2], e.csv.9[,2], e.csv.12[,2], e.csv.13[,2], e.csv.34[,2],
e.csv.50[,2], e.csv.52[,2], e.csv.56[,2], e.csv.65[,2], e.csv.71[,2],
e.csv.72[,2], e.csv.76[,2], e.csv.85[,2], e.csv.91[,2], e.csv.92[,2],
e.csv.93[,2], e.csv.94[,2], e.csv.101[,2], e.csv.102[,2], e.csv.103[,2],
e.csv.105[,2], e.csv.107[,2], e.csv.119[,2], e.csv.121[,2], e.csv.122[,2],
e.csv.139[,2], e.csv.194[,2], e.csv.253[,2], e.csv.259[,2])
eric.nrow.12_ <- cbind(e.csv.14[,2], e.csv.116[,2], e.csv.131[,2], e.csv.133[,2])
eric.nrow.11_ <- cbind(e.csv.140[,2], e.csv.142[,2])
# 保存
eric.nrow.15 <- data.frame(t(eric.nrow.15_))
eric.nrow.14 <- data.frame(t(eric.nrow.14_))
eric.nrow.13 <- data.frame(t(eric.nrow.13_))
eric.nrow.12 <- data.frame(t(eric.nrow.12_))
eric.nrow.11 <- data.frame(t(eric.nrow.11_))
library(openxlsx)
write.xlsx(eric.nrow.15, "../Data/eric_nrow_15.xlsx")
write.xlsx(eric.nrow.14, "../Data/eric_nrow_14.xlsx")
write.xlsx(eric.nrow.13, "../Data/eric_nrow_13.xlsx")
write.xlsx(eric.nrow.12, "../Data/eric_nrow_12.xlsx")
write.xlsx(eric.nrow.11, "../Data/eric_nrow_11.xlsx")
psyc.nrow.13.seq <- subset(psyc.nrow, nrow == 13); psyc.nrow.13.seq[c("seq")]
## seq
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
## 6 6
## 8 8
## 9 9
## 10 10
## 11 11
## 12 12
## 15 15
## 18 18
## 19 19
## 21 21
## 22 22
## 23 23
## 24 24
## 25 25
## 26 26
## 27 27
## 28 28
## 29 29
## 31 31
## 32 32
## 33 33
## 34 34
## 36 36
## 40 40
## 41 41
## 46 46
## 47 47
## 48 48
## 49 49
## 55 55
## 56 56
## 57 57
## 58 58
## 59 59
## 62 62
## 64 64
## 65 65
## 66 66
## 69 69
## 70 70
## 71 71
## 73 73
## 74 74
## 76 76
## 77 77
## 78 78
## 79 79
## 80 80
## 81 81
## 82 82
## 85 85
## 88 88
## 89 89
## 90 90
## 91 91
## 93 93
## 94 94
## 95 95
## 96 96
## 97 97
## 98 98
## 99 99
## 100 100
## 101 101
## 102 102
## 103 103
## 104 104
## 105 105
## 106 106
## 107 107
## 108 108
## 109 109
## 110 110
## 111 111
## 112 112
## 113 113
## 114 114
## 115 115
## 116 116
## 117 117
## 118 118
## 119 119
## 120 120
## 121 121
## 123 123
## 125 125
## 126 126
## 127 127
## 129 129
## 130 130
## 131 131
## 132 132
## 133 133
## 134 134
## 135 135
psyc.nrow.12.seq <- subset(psyc.nrow, nrow == 12); psyc.nrow.12.seq[c("seq")]
## seq
## 7 7
## 14 14
## 16 16
## 17 17
## 20 20
## 30 30
## 37 37
## 38 38
## 42 42
## 43 43
## 50 50
## 51 51
## 52 52
## 60 60
## 63 63
## 67 67
## 68 68
## 83 83
## 86 86
## 87 87
## 92 92
## 124 124
## 128 128
psyc.nrow.11.seq <- subset(psyc.nrow, nrow == 11); psyc.nrow.11.seq[c("seq")]
## seq
## 13 13
## 35 35
## 39 39
## 44 44
## 45 45
## 53 53
## 54 54
## 61 61
## 72 72
## 75 75
## 84 84
## 122 122
psyc.nrow.13_ <- cbind(p.csv.1[,2], p.csv.2[,2], p.csv.3[,2], p.csv.4[,2],
p.csv.5[,2], p.csv.6[,2], p.csv.8[,2], p.csv.9[,2],
p.csv.10[,2], p.csv.11[,2], p.csv.12[,2], p.csv.15[,2],
p.csv.18[,2], p.csv.19[,2], p.csv.21[,2], p.csv.22[,2],
p.csv.23[,2], p.csv.24[,2], p.csv.25[,2], p.csv.26[,2],
p.csv.27[,2], p.csv.28[,2], p.csv.29[,2], p.csv.31[,2],
p.csv.32[,2], p.csv.33[,2], p.csv.34[,2], p.csv.36[,2],
p.csv.40[,2], p.csv.41[,2], p.csv.46[,2], p.csv.47[,2],
p.csv.48[,2], p.csv.49[,2], p.csv.55[,2], p.csv.56[,2],
p.csv.57[,2], p.csv.58[,2], p.csv.59[,2], p.csv.62[,2],
p.csv.64[,2], p.csv.65[,2], p.csv.66[,2], p.csv.69[,2],
p.csv.70[,2], p.csv.71[,2], p.csv.73[,2], p.csv.74[,2],
p.csv.76[,2], p.csv.77[,2], p.csv.78[,2], p.csv.79[,2],
p.csv.80[,2], p.csv.81[,2], p.csv.82[,2], p.csv.85[,2],
p.csv.88[,2], p.csv.89[,2], p.csv.90[,2], p.csv.91[,2],
p.csv.93[,2], p.csv.94[,2], p.csv.95[,2], p.csv.96[,2],
p.csv.97[,2], p.csv.98[,2], p.csv.99[,2], p.csv.100[,2],
p.csv.101[,2], p.csv.102[,2], p.csv.103[,2], p.csv.104[,2],
p.csv.105[,2], p.csv.106[,2], p.csv.107[,2], p.csv.108[,2],
p.csv.109[,2], p.csv.110[,2], p.csv.111[,2], p.csv.112[,2],
p.csv.113[,2], p.csv.114[,2], p.csv.115[,2], p.csv.116[,2],
p.csv.117[,2], p.csv.118[,2], p.csv.119[,2], p.csv.120[,2],
p.csv.121[,2], p.csv.123[,2], p.csv.125[,2], p.csv.126[,2],
p.csv.127[,2], p.csv.129[,2], p.csv.130[,2], p.csv.131[,2],
p.csv.132[,2], p.csv.133[,2], p.csv.134[,2], p.csv.135[,2])
psyc.nrow.12_ <- cbind(p.csv.7[,2], p.csv.14[,2], p.csv.16[,2], p.csv.17[,2],
p.csv.20[,2], p.csv.30[,2], p.csv.37[,2], p.csv.38[,2],
p.csv.42[,2], p.csv.43[,2], p.csv.50[,2], p.csv.51[,2],
p.csv.52[,2], p.csv.60[,2], p.csv.63[,2], p.csv.67[,2],
p.csv.68[,2], p.csv.83[,2], p.csv.86[,2], p.csv.87[,2],
p.csv.92[,2], p.csv.124[,2], p.csv.128[,2])
psyc.nrow.11_ <- cbind(p.csv.13[,2], p.csv.35[,2], p.csv.39[,2], p.csv.44[,2],
p.csv.45[,2], p.csv.53[,2], p.csv.54[,2], p.csv.61[,2],
p.csv.72[,2], p.csv.75[,2], p.csv.84[,2], p.csv.122[,2])
# 保存
psyc.nrow.13 <- data.frame(t(psyc.nrow.13_))
psyc.nrow.12 <- data.frame(t(psyc.nrow.12_))
psyc.nrow.11 <- data.frame(t(psyc.nrow.11_))
library(openxlsx)
write.xlsx(psyc.nrow.13, "../Data/psyc_nrow_13.xlsx")
write.xlsx(psyc.nrow.12, "../Data/psyc_nrow_12.xlsx")
write.xlsx(psyc.nrow.11, "../Data/psyc_nrow_11.xlsx")
行の内容が一致しないケースがある
出力したxlsxファイルを手作業で加工する必要がある
# ERIC
15列
c("author", "year", "month", "title", "journal", "volume", "number", "pages", "note", "abstract", "keywords", "issn", "language", "url", "no.use")
14列,13列
整形して15列にして以下の通りにする
c("author", "year", "month", "title", "journal", "volume", "number", "pages", "note", "abstract", "keywords", "isbn", "language", "url", "no.use")
12列,11列
isbnやjournalのタイトルがないので対象外
# PsycInfo
13列
VolumeとURLが入れ替わっているデータがあるので注意
c("no.use.1", "abstract", "author", "issn", "journal", "keywords", "number", "pages", "title", "url", "volume", "year", "no.use2")
12列
整形して13列にして以下の通りにする
c("no.use.1", "abstract", "author", "issn", "journal", "keywords", "number", "pages", "title", "url", "volume", "year", "no.use2")
11列
揃っているがオンラインジャーナルでnumberとpagesがないので,これら2列を加えて,列名を以下の通りにする
c("no.use.1", "abstract", "author", "issn", "journal", "keywords", "number", "pages", "title", "url", "volume", "year", "no.use2")
library(openxlsx)
eric.nrow.15r <- read.xlsx("../Data/eric_nrow_15r.xlsx", sheet=1)
eric.nrow.14r <- read.xlsx("../Data/eric_nrow_14r.xlsx", sheet=1)
eric.nrow.13r <- read.xlsx("../Data/eric_nrow_13r.xlsx", sheet=1)
psyc.nrow.13r <- read.xlsx("../Data/psyc_nrow_13r.xlsx", sheet=1)
psyc.nrow.12r <- read.xlsx("../Data/psyc_nrow_12r.xlsx", sheet=1)
psyc.nrow.11r <- read.xlsx("../Data/psyc_nrow_11r.xlsx", sheet=1)
eric.col <- c("author", "year", "month", "title", "journal", "volume", "number", "pages", "note", "abstract", "keywords", "issn", "language", "url", "no.use")
psyc.col <- c("no.use.1", "abstract", "author", "issn", "journal", "keywords", "number", "pages", "title", "url", "volume", "year", "no.use2")
colnames(eric.nrow.15r) <- eric.col
colnames(eric.nrow.14r) <- eric.col
colnames(eric.nrow.13r) <- eric.col
colnames(psyc.nrow.13r) <- psyc.col
colnames(psyc.nrow.12r) <- psyc.col
colnames(psyc.nrow.11r) <- psyc.col
psyc.nrow.11r$number <- as.character(psyc.nrow.11r$number) # number が数値型になっているので
psyc.nrow.11r$pages <- as.character(psyc.nrow.11r$pages) # pages が数値型になっているので
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
eric.raw <- dplyr::bind_rows(eric.nrow.15r, eric.nrow.14r, eric.nrow.13r)
psyc.raw <- dplyr::bind_rows(psyc.nrow.13r, psyc.nrow.12r, psyc.nrow.11r)
author.rm2 <- c("\\r")
# @article { \r\nauthor={
author.rm <- c("@article\\{\nauthor=\\{")
# @article{\nauthor={
rm.2 <- c("\\}")
year.rm <- c("year=\\{")
month.rm <- c("month=\\{")
title.rm <- c("title=\\{")
journal.rm <- c("journal=\\{")
volume.rm <- c("volume=\\{")
number.rm <- c("number=\\{")
pages.rm <- c("pages=\\{")
abstract.rm <- c("abstract=\\{")
keywords.rm <- c("keywords=\\{")
issn.rm <- c("isbn=\\{")
url.rm <- c("url=\\{")
library(stringr)
eric.raw$author <- str_replace_all(eric.raw$author, pattern = author.rm2, replacement = "")
eric.raw$author <- str_replace_all(eric.raw$author, pattern = author.rm, replacement = "")
eric.raw$author <- str_replace_all(eric.raw$author, rm.2, "")
eric.raw$year <- str_replace_all(eric.raw$year, pattern = year.rm, replacement = "")
eric.raw$year <- str_replace_all(eric.raw$year, rm.2, "")
eric.raw$month <- str_replace_all(eric.raw$month, pattern = month.rm, replacement = "")
eric.raw$month <- str_replace_all(eric.raw$month, rm.2, "")
eric.raw$title <- str_replace_all(eric.raw$title, pattern = title.rm, replacement = "")
eric.raw$title <- str_replace_all(eric.raw$title, rm.2, "")
eric.raw$journal <- str_replace_all(eric.raw$journal, pattern = journal.rm, replacement = "")
eric.raw$journal <- str_replace_all(eric.raw$journal, rm.2, "")
eric.raw$volume <- str_replace_all(eric.raw$volume, pattern = volume.rm, replacement = "")
eric.raw$volume <- str_replace_all(eric.raw$volume, rm.2, "")
eric.raw$number <- str_replace_all(eric.raw$number, pattern = number.rm, replacement = "")
eric.raw$number <- str_replace_all(eric.raw$number, rm.2, "")
eric.raw$pages <- str_replace_all(eric.raw$pages, pattern = pages.rm, replacement = "")
eric.raw$pages <- str_replace_all(eric.raw$pages, rm.2, "")
eric.raw$abstract <- str_replace_all(eric.raw$abstract, pattern = abstract.rm, replacement = "")
eric.raw$abstract <- str_replace_all(eric.raw$abstract, rm.2, "")
eric.raw$keywords <- str_replace_all(eric.raw$keywords, pattern = keywords.rm, replacement = "")
eric.raw$keywords <- str_replace_all(eric.raw$keywords, rm.2, "")
eric.raw$issn <- str_replace_all(eric.raw$issn, pattern = issn.rm, replacement = "")
eric.raw$issn <- str_replace_all(eric.raw$issn, rm.2, "")
eric.raw$url <- str_replace_all(eric.raw$url, pattern = url.rm, replacement = "")
eric.raw$url <- str_replace_all(eric.raw$url, rm.2, "")
# タイトルを先頭だけ大文字,あとは小文字
eric.raw$title <- str_to_sentence(eric.raw$title)
abstract.rm <- c("Abstract = \\{")
author.rm <- c("Author = \\{")
issn.rm <- c("ISSN = \\{")
journal.rm <- c("Journal = \\{")
keywords.rm <- c("Keywords = \\{")
number.rm <- c("Number = \\{")
pages.rm <- c("Pages = \\{")
title.rm <- c("Title = \\{")
url.rm <- c("URL = \\{")
volume.rm <- c("Volume = \\{")
year.rm <- c("Year = \\{")
rm.2 <- c("\\}")
library(stringr)
psyc.raw$abstract <- str_replace_all(psyc.raw$abstract, pattern = abstract.rm, replacement = "")
psyc.raw$abstract <- str_replace_all(psyc.raw$abstract, rm.2, "")
psyc.raw$author <- str_replace_all(psyc.raw$author, pattern = author.rm, replacement = "")
psyc.raw$author <- str_replace_all(psyc.raw$author, rm.2, "")
psyc.raw$issn <- str_replace_all(psyc.raw$issn, pattern = issn.rm, replacement = "")
psyc.raw$issn <- str_replace_all(psyc.raw$issn, rm.2, "")
psyc.raw$journal <- str_replace_all(psyc.raw$journal, pattern = journal.rm, replacement = "")
psyc.raw$journal <- str_replace_all(psyc.raw$journal, rm.2, "")
psyc.raw$keywords <- str_replace_all(psyc.raw$keywords, pattern = keywords.rm, replacement = "")
psyc.raw$keywords <- str_replace_all(psyc.raw$keywords, rm.2, "")
psyc.raw$number <- str_replace_all(psyc.raw$number, pattern = number.rm, replacement = "")
psyc.raw$number <- str_replace_all(psyc.raw$number, rm.2, "")
psyc.raw$pages <- str_replace_all(psyc.raw$pages, pattern = pages.rm, replacement = "")
psyc.raw$pages <- str_replace_all(psyc.raw$pages, rm.2, "")
psyc.raw$title <- str_replace_all(psyc.raw$title, pattern = title.rm, replacement = "")
psyc.raw$title <- str_replace_all(psyc.raw$title, rm.2, "")
psyc.raw$url <- str_replace_all(psyc.raw$url, pattern = url.rm, replacement = "")
psyc.raw$url <- str_replace_all(psyc.raw$url, rm.2, "")
psyc.raw$volume <- str_replace_all(psyc.raw$volume, pattern = volume.rm, replacement = "")
psyc.raw$volume <- str_replace_all(psyc.raw$volume, rm.2, "")
psyc.raw$year <- str_replace_all(psyc.raw$year, pattern = year.rm, replacement = "")
psyc.raw$year <- str_replace_all(psyc.raw$year, rm.2, "")
# キーワード区切りをセミコロンにする
psyc.raw$keywords <- str_replace_all(psyc.raw$keywords, pattern = ",", replacement = ";")
# ページ番号の空白をなくす
psyc.raw$pages <- str_replace_all(psyc.raw$pages, " ", "")
# タイトルを先頭だけ大文字,あとは小文字
psyc.raw$title <- str_to_sentence(psyc.raw$title)
# ERIC
eric.raw$db <- c("eric")
library(dplyr)
eric.raw <- eric.raw %>% mutate(seq = row_number())
eric.raw$db.seq <- paste(eric.raw$db, ".", eric.raw$seq, sep = "")
eric <- eric.raw[c("db", "seq", "db.seq", "author", "year", "title", "journal",
"volume", "number", "pages", "keywords","issn", "abstract")]
# PsycInfo
psyc.raw$db <- c("psyc")
library(dplyr)
psyc.raw <- psyc.raw %>% mutate(seq = row_number())
psyc.raw$db.seq <- paste(psyc.raw$db, ".", psyc.raw$seq, sep = "")
psyc <- psyc.raw[c("db", "seq", "db.seq", "author", "year", "title", "journal",
"volume", "number", "pages", "keywords","issn", "abstract")]
# まとめる
eric.psyc <- dplyr::bind_rows(eric, psyc)
# ここで保存しておく
library(openxlsx)
write.xlsx(eric.psyc, "../Data/ERIC_Psyc.xlsx")
# タイトルで重複確認
library("dplyr")
duplicate <- eric.psyc %>% group_by(title) %>% filter(n()>1)
duplicate <- duplicate[order(duplicate$title, decreasing=T),]
duplicate.sec.title <- duplicate[c("db.seq", "title")]
library(openxlsx)
write.xlsx(duplicate.sec.title, "../Data/Duplicate.xlsx")
# 重複は21件
eric.psyc.nodp <- eric.psyc %>%
dplyr::filter(db.seq != "eric.80") %>%
dplyr::filter(db.seq != "eric.196") %>%
dplyr::filter(db.seq != "eric.264") %>%
dplyr::filter(db.seq != "eric.92") %>%
dplyr::filter(db.seq != "eric.32") %>%
dplyr::filter(db.seq != "eric.27") %>%
dplyr::filter(db.seq != "eric.67") %>%
dplyr::filter(db.seq != "eric.195") %>%
dplyr::filter(db.seq != "eric.123") %>%
dplyr::filter(db.seq != "eric.156") %>%
dplyr::filter(db.seq != "eric.151") %>%
dplyr::filter(db.seq != "eric.183") %>%
dplyr::filter(db.seq != "eric.118") %>%
dplyr::filter(db.seq != "eric.50") %>%
dplyr::filter(db.seq != "eric.227") %>%
dplyr::filter(db.seq != "eric.198") %>%
dplyr::filter(db.seq != "eric.124") %>%
dplyr::filter(db.seq != "eric.21") %>%
dplyr::filter(db.seq != "eric.35") %>%
dplyr::filter(db.seq != "eric.22") %>%
dplyr::filter(db.seq != "eric.71")
library(openxlsx)
write.xlsx(eric.psyc.nodp, "../Data/eric_psyc_nodp.xlsx")