TrainData <- read.csv("/Users/jusimioni/Desktop/TrainingData.csv")
colnames(TrainData)
## [1] "Patent_Number" "Cites_Patent_Count" "Cited_by_Patent_Count"
head(TrainData)
## Patent_Number Cites_Patent_Count Cited_by_Patent_Count
## 1 PL 3341367 T3 0 0
## 2 HR P20210871 T1 0 0
## 3 CR 20210284 A 0 0
## 4 US 2021/0205309 A1 0 0
## 5 JP 2021100972 A 3 0
## 6 AU 2021/203768 A1 0 0
This dataset contains information about some of Pfizer’s patent
applications.
a) Use regression to try to predict the Cited_by_Patent_Count, with
Cites_Patent_Count used as a covariate. Based on your results, write an
equation using the following format:
model <- lm(Cited_by_Patent_Count ~ Cites_Patent_Count, data = TrainData)
summary(model)
##
## Call:
## lm(formula = Cited_by_Patent_Count ~ Cites_Patent_Count, data = TrainData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9469 -0.0523 -0.0523 -0.0523 3.7661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0522603 0.0130896 3.993 7.32e-05 ***
## Cites_Patent_Count 0.0026705 0.0005322 5.018 6.81e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3223 on 624 degrees of freedom
## Multiple R-squared: 0.03879, Adjusted R-squared: 0.03725
## F-statistic: 25.18 on 1 and 624 DF, p-value: 6.811e-07
Cited_by_Patent_Count ≈ B1(Cites_Patent_Count) + B2, where B1 and B2
are real numbers
Fill-in the blanks for the following statements:
b) Your regression equation provides an “estimate” of the actual values
for the numbers B1 and B2. There is a 90% probability that the actual
value of B1 lies between 0.001793859 and 0.003547102.
confint(model, level = 0.90)
## 5 % 95 %
## (Intercept) 0.030697775 0.073822761
## Cites_Patent_Count 0.001793859 0.003547102
cooks.distance(model)
## 1 2 3 4 5 6
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.810625e-05 2.174933e-05
## 7 8 9 10 11 12
## 2.174933e-05 2.372145e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 13 14 15 16 17 18
## 5.071376e-05 2.174933e-05 2.174933e-05 8.574799e-05 2.174933e-05 2.174933e-05
## 19 20 21 22 23 24
## 2.372145e-05 2.174933e-05 2.174933e-05 2.174933e-05 3.021095e-02 2.372145e-05
## 25 26 27 28 29 30
## 2.174933e-05 2.174933e-05 2.583357e-05 2.372145e-05 7.152876e-03 2.174933e-05
## 31 32 33 34 35 36
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 37 38 39 40 41 42
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 43 44 45 46 47 48
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 49 50 51 52 53 54
## 2.174933e-05 2.174933e-05 2.174933e-05 2.583357e-05 2.174933e-05 2.174933e-05
## 55 56 57 58 59 60
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 61 62 63 64 65 66
## 2.174933e-05 2.174933e-05 2.174933e-05 3.612723e-05 2.174933e-05 2.174933e-05
## 67 68 69 70 71 72
## 2.174933e-05 6.914970e-03 2.174933e-05 2.174933e-05 3.612723e-05 2.174933e-05
## 73 74 75 76 77 78
## 6.580877e-05 6.111181e-02 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 79 80 81 82 83 84
## 2.810625e-05 5.071376e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 85 86 87 88 89 90
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 3.322685e-05
## 91 92 93 94 95 96
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 7.152876e-03 6.580877e-05
## 97 98 99 100 101 102
## 2.174933e-05 2.174933e-05 2.372145e-05 2.174933e-05 4.284078e-04 2.174933e-05
## 103 104 105 106 107 108
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 109 110 111 112 113 114
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 115 116 117 118 119 120
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 121 122 123 124 125 126
## 2.174933e-05 2.174933e-05 2.174933e-05 2.810625e-05 2.174933e-05 4.275628e-05
## 127 128 129 130 131 132
## 9.239691e-02 5.071376e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.372145e-05
## 133 134 135 136 137 138
## 2.174933e-05 1.273912e-02 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 139 140 141 142 143 144
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 145 146 147 148 149 150
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 151 152 153 154 155 156
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 157 158 159 160 161 162
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 163 164 165 166 167 168
## 2.810625e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 169 170 171 172 173 174
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 175 176 177 178 179 180
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 7.381892e-02 2.174933e-05
## 181 182 183 184 185 186
## 2.838903e-02 2.174933e-05 2.174933e-05 2.174933e-05 1.570643e-03 2.174933e-05
## 187 188 189 190 191 192
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 193 194 195 196 197 198
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 199 200 201 202 203 204
## 2.174933e-05 1.335152e-04 7.152876e-03 2.174933e-05 3.612723e-05 2.174933e-05
## 205 206 207 208 209 210
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 211 212 213 214 215 216
## 2.372145e-05 2.174933e-05 2.810625e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 217 218 219 220 221 222
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.372145e-05 2.174933e-05
## 223 224 225 226 227 228
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.372145e-05
## 229 230 231 232 233 234
## 2.174933e-05 2.174933e-05 8.849406e-01 2.174933e-05 2.174933e-05 2.174933e-05
## 235 236 237 238 239 240
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 241 242 243 244 245 246
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 247 248 249 250 251 252
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 253 254 255 256 257 258
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 259 260 261 262 263 264
## 3.021095e-02 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 3.612723e-05
## 265 266 267 268 269 270
## 2.174933e-05 2.372145e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 271 272 273 274 275 276
## 2.174933e-05 2.174933e-05 2.372145e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 277 278 279 280 281 282
## 1.222411e-04 2.174933e-05 2.174933e-05 2.174933e-05 2.810625e-05 2.174933e-05
## 283 284 285 286 287 288
## 2.174933e-05 2.583357e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 289 290 291 292 293 294
## 2.174933e-05 2.174933e-05 2.174933e-05 3.929307e-05 2.174933e-05 2.174933e-05
## 295 296 297 298 299 300
## 2.174933e-05 9.727993e-02 9.369997e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 301 302 303 304 305 306
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 307 308 309 310 311 312
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 313 314 315 316 317 318
## 2.174933e-05 2.174933e-05 2.174933e-05 3.963990e-04 2.174933e-05 2.174933e-05
## 319 320 321 322 323 324
## 7.152876e-03 2.174933e-05 1.796726e-02 2.174933e-05 2.174933e-05 2.810625e-05
## 325 326 327 328 329 330
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 331 332 333 334 335 336
## 2.174933e-05 2.372145e-05 2.174933e-05 1.913967e-02 2.174933e-05 2.174933e-05
## 337 338 339 340 341 342
## 2.174933e-05 7.185379e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 343 344 345 346 347 348
## 2.174933e-05 2.485777e-03 2.174933e-05 3.322685e-05 2.174933e-05 1.118882e-04
## 349 350 351 352 353 354
## 2.174933e-05 2.174933e-05 7.702738e-04 2.372145e-05 2.174933e-05 2.174933e-05
## 355 356 357 358 359 360
## 6.996241e-03 2.174933e-05 2.174933e-05 2.372145e-05 2.174933e-05 2.174933e-05
## 361 362 363 364 365 366
## 2.174933e-05 2.174933e-05 3.322685e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 367 368 369 370 371 372
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 7.152876e-03 6.030046e-05
## 373 374 375 376 377 378
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 379 380 381 382 383 384
## 2.174933e-05 3.056231e-05 7.152876e-03 2.174933e-05 2.174933e-05 2.174933e-05
## 385 386 387 388 389 390
## 2.174933e-05 2.174933e-05 1.335152e-04 2.174933e-05 2.174933e-05 2.174933e-05
## 391 392 393 394 395 396
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.372145e-05 2.174933e-05
## 397 398 399 400 401 402
## 2.174933e-05 2.174933e-05 5.528323e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 403 404 405 406 407 408
## 2.174933e-05 2.174933e-05 3.021095e-02 2.174933e-05 2.372145e-05 2.174933e-05
## 409 410 411 412 413 414
## 3.056231e-05 7.152876e-03 2.174933e-05 2.174933e-05 3.929307e-05 2.174933e-05
## 415 416 417 418 419 420
## 2.174933e-05 2.174933e-05 7.111225e-02 2.174933e-05 5.528323e-05 7.152876e-03
## 421 422 423 424 425 426
## 2.174933e-05 4.655102e-05 8.574799e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 427 428 429 430 431 432
## 2.174933e-05 2.174933e-05 2.174933e-05 3.056231e-05 2.174933e-05 2.583357e-05
## 433 434 435 436 437 438
## 2.174933e-05 2.583357e-05 2.174933e-05 2.810625e-05 2.174933e-05 2.174933e-05
## 439 440 441 442 443 444
## 2.174933e-05 2.174933e-05 1.222411e-04 2.174933e-05 5.071376e-05 2.174933e-05
## 445 446 447 448 449 450
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 451 452 453 454 455 456
## 3.056231e-05 2.174933e-05 2.174933e-05 2.174933e-05 6.030046e-05 2.174933e-05
## 457 458 459 460 461 462
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 9.369997e-05
## 463 464 465 466 467 468
## 2.174933e-05 2.372145e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 469 470 471 472 473 474
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 475 476 477 478 479 480
## 2.174933e-05 2.810625e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 481 482 483 484 485 486
## 3.322685e-05 2.174933e-05 2.174933e-05 2.372145e-05 2.174933e-05 2.174933e-05
## 487 488 489 490 491 492
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.372145e-05 2.174933e-05
## 493 494 495 496 497 498
## 2.174933e-05 2.174933e-05 2.174933e-05 6.773844e-03 2.583357e-05 2.583357e-05
## 499 500 501 502 503 504
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 505 506 507 508 509 510
## 6.775879e-01 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 511 512 513 514 515 516
## 2.174933e-05 2.174933e-05 2.583357e-05 2.583357e-05 2.810625e-05 2.372145e-05
## 517 518 519 520 521 522
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 523 524 525 526 527 528
## 2.174933e-05 2.174933e-05 3.612723e-05 2.174933e-05 2.810625e-05 2.583357e-05
## 529 530 531 532 533 534
## 2.174933e-05 2.174933e-05 7.848345e-05 2.174933e-05 1.150560e-03 2.174933e-05
## 535 536 537 538 539 540
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 541 542 543 544 545 546
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 547 548 549 550 551 552
## 2.174933e-05 2.583357e-05 2.174933e-05 2.174933e-05 2.810625e-05 2.174933e-05
## 553 554 555 556 557 558
## 2.174933e-05 7.152876e-03 2.174933e-05 2.174933e-05 3.021095e-02 2.174933e-05
## 559 560 561 562 563 564
## 2.174933e-05 7.185379e-05 2.174933e-05 2.174933e-05 3.612723e-05 2.174933e-05
## 565 566 567 568 569 570
## 2.174933e-05 2.174933e-05 2.635251e+00 1.523294e-02 2.174933e-05 2.174933e-05
## 571 572 573 574 575 576
## 2.174933e-05 8.756098e-03 2.174933e-05 2.174933e-05 6.914970e-03 2.174933e-05
## 577 578 579 580 581 582
## 2.174933e-05 3.021095e-02 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 583 584 585 586 587 588
## 5.282779e-03 2.174933e-05 2.174933e-05 2.174933e-05 3.664304e-04 2.174933e-05
## 589 590 591 592 593 594
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 595 596 597 598 599 600
## 6.914970e-03 2.810625e-05 2.174933e-05 2.174933e-05 2.174933e-05 3.056231e-05
## 601 602 603 604 605 606
## 2.174933e-05 2.463827e+00 3.056231e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 607 608 609 610 611 612
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 613 614 615 616 617 618
## 2.174933e-05 2.174933e-05 4.275628e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 619 620 621 622 623 624
## 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05 2.174933e-05
## 625 626
## 2.174933e-05 2.583357e-05
max(cooks.distance(model))
## [1] 2.635251
Yes, there are values we should be concerned since according to cooks distance we have values above 1.
plot(model)
qqnorm(model$residuals, main = "model")
qqline(model$residuals)
The qq-plot show a nice amount of numbers allow the y-axis = o consider
well distributed but also some values that diverge.
New_Data = data.frame(Cites_Patent_Count = c(340))
predict(model, New_Data, type = "response")
## 1
## 0.9602238
New_Data1 = data.frame(Cites_Patent_Count = c(300))
predict(model, New_Data, type = "response")
## 1
## 0.9602238
Datermining the largest leverage data point.
train <- cbind(TrainData, leverage = hatvalues(model))
head(train)
## Patent_Number Cites_Patent_Count Cited_by_Patent_Count leverage
## 1 PL 3341367 T3 0 0 0.00164921
## 2 HR P20210871 T1 0 0 0.00164921
## 3 CR 20210284 A 0 0 0.00164921
## 4 US 2021/0205309 A1 0 0 0.00164921
## 5 JP 2021100972 A 3 0 0.00160247
## 6 AU 2021/203768 A1 0 0 0.00164921
max(hatvalues(model))
## [1] 0.2995991
#Design Matrix
X= model.matrix(model)
x_new = c(x = c(1,340))
t(x_new)%*%solve(t(X)%*%X)%*%x_new
## [,1]
## [1,] 0.3086801
Since it is greater than 0.29 is considered extrapolation.
x_new = c(x = c(1,300))
t(x_new)%*%solve(t(X)%*%X)%*%x_new
## [,1]
## [1,] 0.2398486
Since it is lower than 0.29 is not considered extrapolation. 5. Fill in the blanks based on the regression model that you created in #2, and the corresponding prediction that you made in part b of #3:
predict(model, New_Data1, interval = "confidence", level = 0.95, type = "response")
## fit lwr upr
## 1 0.8534046 0.5434141 1.163395
predict(model, New_Data1, interval = "confidence", level = 0.95, type = "response")
## fit lwr upr
## 1 0.8534046 0.5434141 1.163395
The four types of mutating joins learned in class
are:
* Inner Join
* Full join
* Left join
* Right join
SCount <- read.csv("/Users/jusimioni/Desktop/SequenceCounts.csv")
colnames(SCount)
## [1] "Patent_No." "Sequence_Count"
head(SCount)
## Patent_No. Sequence_Count
## 1 CA 189065 S 0
## 2 NI 202000072 A 0
## 3 KR 20210032013 A 80
## 4 PH 12020550461 A1 0
## 5 TW I722568 B 0
## 6 CN 112533674 A 0
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
TrainData %>% inner_join(SCount, by = c("Patent_Number" = "Patent_No.")) -> joined_data
joined_data
## Patent_Number Cites_Patent_Count Cited_by_Patent_Count Sequence_Count
## 1 CA 189065 S 0 0 0
## 2 NI 202000072 A 0 0 0
## 3 KR 20210032013 A 0 0 80
## 4 PH 12020550461 A1 0 0 0
## 5 TW I722568 B 0 0 0
## 6 CN 112533674 A 0 2 0
## 7 CO 2021002954 A2 0 0 0
## 8 JP 2021040642 A 1 0 0
## 9 AU 2019/341683 A1 0 0 0
## 10 JP 2021042204 A 4 0 0
## 11 US 2021/0077463 A1 0 1 24
## 12 CA 3153676 A1 0 0 0
## 13 CA 200622 S 0 0 0
## 14 WO 2021/048736 A1 7 0 0
## 15 AU 2021/200232 A1 0 0 0
## 16 PE 20210554 A1 0 0 0
## 17 AR 115987 A1 0 0 0
## 18 US 10946086 B2 111 1 0
## 19 TW 202111125 A 0 0 0
## 20 US 10947494 B2 11 0 0
## 21 CN 112513070 A 0 1 0
## 22 TW 202110480 A 0 0 0
## 23 US 10946031 B2 9 0 0
## 24 US 10945994 B2 16 0 2
## 25 BR 112020025701 A2 0 0 0
## 26 TW 202110446 A 0 0 0
## 27 PE 20210488 A1 0 0 0
## 28 KR 20210029234 A 0 0 0
## 29 ES 2811523 T3 0 0 0
## 30 US 2021/0069219 A1 0 0 0
## 31 JP 2021038265 A 4 0 0
## 32 US 2021/0069326 A1 0 0 41
## 33 EA 037318 B1 2 0 0
## 34 AU 2019/329884 A1 0 0 0
## 35 AU 2016/286108 B2 2 0 0
## 36 AU 2019/323790 A1 0 0 0
## 37 JP 2021036873 A 3 0 0
## 38 EP 3334760 B1 0 0 0
## 39 UA 123270 C2 0 0 0
## 40 EP 3091981 B1 0 0 0
## 41 LT C2534153 I2 0 0 0
## 42 US 10941216 B2 20 0 82
## 43 BR 112020024956 A2 0 0 0
## 44 US 10941118 B2 10 0 0
## 45 CN 112469473 A 0 0 0
## 46 PL 3096785 T3 0 0 0
## 47 PL 3310784 T3 0 0 0
## 48 CO 2021002230 A2 0 0 0
## 49 CO 2021001893 A2 0 0 0
## 50 CO 2020015305 A2 0 0 0
## 51 PE 20210468 A1 0 0 0
## 52 EA 037288 B1 4 0 0
## 53 PE 20210412 A1 0 0 0
## 54 US 2021/0061902 A1 0 0 225
## 55 US 2021/0061923 A1 0 0 0
## 56 EP 2968155 B1 12 0 0
## 57 EP 3784664 A1 0 0 0
## 58 MA 52360 A 0 0 0
## 59 BR 112014018192 A8 0 0 0
## 60 BR 112020020941 A2 0 0 0
## 61 ES 2808987 T3 0 0 0
## 62 BR 112020024470 A2 0 0 0
## 63 US 10934279 B2 17 0 0
## 64 TW 202108167 A 0 0 0
## 65 TW I720272 B 1 0 0
## 66 IL 280292 A 0 0 0
## 67 TW I720448 B 0 0 0
## 68 IL 279591 A 0 0 0
## 69 IL 280317 A 0 0 0
## 70 IL 279926 A 0 0 0
## 71 DO P2020000191 A 0 0 0
## 72 IL 248667 B 0 0 0
## 73 IL 254131 B 0 0 0
## 74 IL 244437 B 0 0 0
## 75 IL 255444 B 0 0 0
## 76 PT 3137114 T 0 0 0
## 77 RU 2743793 C1 3 0 0
## 78 CL 2020002507 A1 0 0 0
## 79 UY 38786 A 0 0 0
## 80 SG 11202100888W A 0 0 0
## 81 US 2021/0054087 A1 0 0 502
## 82 AU 2018/208844 B2 5 0 0
## 83 EP 3781132 A1 0 0 0
## 84 EP 3341367 B1 0 0 0
## 85 EP 3148587 B1 1 0 0
## 86 ES 2807591 T3 0 0 0
## 87 US 10927178 B2 0 0 173
## 88 BR 112020022897 A2 0 0 0
## 89 KR 20210020095 A 0 0 0
## 90 KR 20210019529 A 0 0 0
## 91 CN 112384282 A 0 0 0
## 92 AU 2018/349259 B2 1 0 0
## 93 US 2021/0047298 A1 0 0 0
## 94 JP 2021020890 A 0 0 0
## 95 AU 2019/325400 A1 0 0 0
## 96 AU 2019/336197 A1 0 0 0
## 97 WO 2021/029854 A1 4 1 0
## 98 JP 2021019632 A 2 0 0
## 99 US 2021/0047325 A1 2 0 2
## 100 CA 3150424 A1 0 0 0
## 101 BR PI0915166 B1 0 0 0
## 102 KR 20210018529 A 0 0 6
## 103 EP 3774863 A1 0 0 0
## 104 KR 20210018341 A 0 0 0
## 105 BR 112020019368 A2 0 0 0
## 106 US 10918708 B2 139 2 40
## 107 PT 3327027 T 0 0 0
## 108 DK 3242888 T3 0 0 0
## 109 PH 12020550596 A1 0 0 0
## 110 DK 3328849 T5 0 0 0
## 111 CL 2020002748 A1 0 0 0
## 112 US 2021/0040182 A1 0 0 518
## 113 US 2021/0040045 A1 0 0 0
## 114 TW I718758 B 2 0 0
## 115 TW I718644 B 2 0 0
## 116 TW I718427 B 3 0 0
## 117 TW I718144 B 1 0 0
## 118 CN 112336854 A 0 0 0
## 119 BR 112020022595 A2 0 0 0
## 120 KR 20210014763 A 0 0 0
## 121 BR 112020023891 A2 0 0 0
## 122 BR 112020017935 A2 0 0 0
## 123 KR 20210013777 A 0 0 62
## 124 US 2021/0032308 A1 0 0 130
## 125 AU 2019/304198 A1 0 0 0
## 126 AU 2016/280137 B2 6 0 0
## 127 AU 2019/290212 A1 0 0 0
## 128 RU 2742260 C2 3 0 0
## 129 JP 2021011493 A 2 0 0
## 130 KR 20210013167 A 0 0 200
## 131 KR 20210013165 A 0 0 890
## 132 US 10906955 B2 15 0 25
## 133 CN 112313219 A 0 0 0
## 134 US 10906888 B2 49 0 0
## 135 TW 202104224 A 0 0 0
## 136 IL 254073 B 0 0 0
## 137 IL 279396 A 0 0 0
## 138 IL 278924 A 0 0 0
## 139 IL 278926 A 0 0 0
## 140 IL 279224 A 0 0 0
## 141 IL 279300 A 0 0 0
## 142 IL 255224 B 0 0 0
## 143 IL 279270 A 0 0 0
## 144 IL 252158 B 0 0 0
## 145 SI 3500564 T1 0 0 0
## 146 UY 38768 A 0 0 0
## 147 EC SP20078651 A 0 0 0
## 148 UY 38752 A 0 0 0
## 149 KR 20210011076 A 2 0 798
## 150 SG 11202100021T A 0 0 0
## 151 US 2021/0024497 A1 0 0 0
## 152 AU 2019/210579 B2 3 0 0
## 153 US 2021/0023134 A1 0 0 10
## 154 JP 2021008452 A 0 0 0
## 155 US 2021/0024903 A1 0 1 761
## 156 US 2021/0024589 A1 0 0 81
## 157 US 2021/0023200 A1 0 0 326
## 158 US 2021/0023193 A1 0 2 40
## 159 PT 3143019 T 0 0 0
## 160 HU E050811 T2 0 0 0
## 161 WO 2021/014360 A1 14 0 0
## 162 US 2021/0024536 A1 0 0 0
## 163 AR 115498 A1 0 0 0
## 164 EP 3770164 A1 6 0 0
## 165 EP 3768669 A1 0 0 0
## 166 BR 112020021689 A2 0 0 0
## 167 CN 112263677 A 0 0 0
## 168 US 10899802 B2 335 0 22
## 169 US 10899840 B2 36 1 36
## 170 DK 3137114 T3 0 0 0
## 171 PL 3339303 T3 0 0 0
## 172 GE P20217211 B 0 0 0
## 173 WO 2021/009676 A1 20 1 0
## 174 CA 3147266 A1 0 0 0
## 175 US 2021/0017206 A1 0 0 0
## 176 US 2021/0017553 A1 2 1 5
## 177 US 2021/0017205 A1 0 0 0
## 178 US 2021/0017254 A1 0 0 12
## 179 US 2021/0017172 A1 0 2 0
## 180 CR 20200612 A 0 0 0
## 181 EP 3765474 A1 0 0 0
## 182 KR 20210008177 A 0 0 4
## 183 EP 3765027 A1 0 0 0
## 184 EP 3766885 A1 77 0 0
## 185 EP 3765019 A1 0 0 0
## 186 SA 516371030 B1 0 0 0
## 187 MA 52012 A 0 0 0
## 188 US 10894083 B2 33 0 10
## 189 PE 20210127 A1 0 0 0
## 190 BR 112016029612 B1 0 0 0
## 191 DK 3327027 T3 0 0 0
## 192 MY 182282 A 0 0 0
## 193 MY 182168 A 0 0 0
## 194 CN 112236446 A 0 0 0
## 195 US 2021/0009601 A1 0 0 15
## 196 US 2021/0009657 A1 2 1 36
## 197 RU 2740309 C2 3 0 0
## 198 EP 3761954 A1 0 0 0
## 199 PT 3328867 T 0 0 0
## 200 CA 2928908 C 0 0 66
## 201 CN 112218618 A 4 0 0
## 202 MY 181891 A 0 0 0
## 203 US 10888611 B2 331 0 22
## 204 RU 2740135 C1 4 0 0
## 205 NI 202000058 A 0 0 0
## 206 KR 20210002757 A 0 0 46
## 207 CN 112195169 A 0 0 0
## 208 KR 20210002642 A 0 0 2
## 209 US 2021/0002369 A1 0 0 11
## 210 US 2021/0002371 A1 0 0 9
## 211 US 2021/0002251 A1 0 0 0
## 212 JP 2021001170 A 0 0 0
## 213 AU 2020/281105 A1 0 0 0
## 214 US 2021/0000930 A1 0 0 3
## 215 AU 2019/299666 A1 0 0 0
## 216 WO 2021/001741 A1 8 0 11
## 217 EP 3327027 B1 0 0 0
## 218 EP 3759129 A1 0 0 0
## 219 EP 3328849 B9 0 0 0
## 220 EP 3759134 A1 0 0 0
## 221 EP 3758753 A1 0 0 0
## 222 BR 112020015641 A2 0 0 0
TrainData %>% full_join(SCount, by = c("Patent_Number" = "Patent_No.")) -> joined_data
joined_data
## Patent_Number Cites_Patent_Count Cited_by_Patent_Count Sequence_Count
## 1 PL 3341367 T3 0 0 NA
## 2 HR P20210871 T1 0 0 NA
## 3 CR 20210284 A 0 0 NA
## 4 US 2021/0205309 A1 0 0 NA
## 5 JP 2021100972 A 3 0 NA
## 6 AU 2021/203768 A1 0 0 NA
## 7 ES 2841400 T3 0 0 NA
## 8 US 2021/0206757 A1 1 0 NA
## 9 PE 20211235 A1 0 0 NA
## 10 US 2021/0205463 A1 0 0 NA
## 11 AR 117073 A1 0 0 NA
## 12 EP 3327027 B9 0 0 NA
## 13 EP 3096786 B1 10 0 NA
## 14 EP 3844288 A1 0 0 NA
## 15 EP 3843740 A1 0 0 NA
## 16 EP 3096783 B1 16 0 NA
## 17 BR 112021006223 A2 0 0 NA
## 18 DK 2909307 T3 0 0 NA
## 19 AU 2018/372109 B2 1 0 NA
## 20 TW 202124414 A 0 0 NA
## 21 US 2021/0198365 A1 0 0 NA
## 22 TW 202123937 A 0 0 NA
## 23 US 2021/0196810 A1 0 2 NA
## 24 AU 2018/275359 B2 1 0 NA
## 25 US 2021/0196823 A1 0 0 NA
## 26 AU 2019/404250 A1 0 0 NA
## 27 JP 2021098716 A 2 0 NA
## 28 TW I732431 B 1 0 NA
## 29 US 2021/0198305 A1 0 1 NA
## 30 TW 202123940 A 0 0 NA
## 31 IL 267288 B 0 0 NA
## 32 IL 270760 B 0 0 NA
## 33 EC SP21034668 A 0 0 NA
## 34 IL 282446 A 0 0 NA
## 35 EP 3233054 B1 0 0 NA
## 36 DO P2021000088 A 0 0 NA
## 37 IL 283032 A 0 0 NA
## 38 IL 282771 A 0 0 NA
## 39 EP 3841121 A2 0 0 NA
## 40 KR 20210080468 A 0 0 NA
## 41 PT 3478679 T 0 0 NA
## 42 RS 61896 B1 0 0 NA
## 43 IL 283149 A 0 0 NA
## 44 ZA 201505468 B 0 0 NA
## 45 EP 3840777 A2 0 0 NA
## 46 EP 3571202 B1 0 0 NA
## 47 IL 283058 A 0 0 NA
## 48 RS 61897 B1 0 0 NA
## 49 EC SP21037191 A 0 0 NA
## 50 ES 2837018 T3 0 0 NA
## 51 SG 11202104585Y A 0 0 NA
## 52 RU 2750454 C2 2 0 NA
## 53 PL 3327027 T3 0 0 NA
## 54 PL 3137114 T3 0 0 NA
## 55 PH 12020500671 A1 0 0 NA
## 56 HU E053911 T4 0 0 NA
## 57 HU E053287 T2 0 0 NA
## 58 HR P20210769 T1 0 0 NA
## 59 NZ 748652 A 0 0 NA
## 60 GE P20217265 B 0 0 NA
## 61 CN 113039178 A 0 0 NA
## 62 HR P20210770 T1 0 0 NA
## 63 LT 3318565 T 0 0 NA
## 64 WO 2021/124210 A1 6 0 NA
## 65 CA 3165135 A1 0 0 NA
## 66 US 2021/0186974 A1 0 0 NA
## 67 US 2021/0188991 A1 0 0 NA
## 68 WO 2021/124155 A1 2 1 NA
## 69 CR 20210163 A 0 0 NA
## 70 US 2021/0188829 A1 0 0 NA
## 71 WO 2021/124267 A1 6 0 NA
## 72 CA 3164623 A1 0 0 NA
## 73 WO 2021/124096 A1 13 0 NA
## 74 WO 2021/124073 A1 147 0 NA
## 75 JP 2021095402 A 0 0 NA
## 76 US 2021/0188979 A1 0 0 NA
## 77 SA 516380328 B1 0 0 NA
## 78 CA 3164804 A1 0 0 NA
## 79 WO 2021/124095 A1 3 0 NA
## 80 WO 2021/124152 A1 10 0 NA
## 81 NI 202100012 A 0 0 NA
## 82 BR 112016002008 B1 0 0 NA
## 83 CA 2810359 C 0 0 NA
## 84 ES 2834927 T3 0 0 NA
## 85 DK 3478679 T3 0 0 NA
## 86 KR 20210074335 A 0 0 NA
## 87 CA 3103120 A1 0 0 NA
## 88 JP 2021091683 A 0 0 NA
## 89 US 2021/0177857 A1 0 0 NA
## 90 JP 2021091692 A 5 0 NA
## 91 JP 2021091663 A 0 0 NA
## 92 CA 3163979 A1 0 0 NA
## 93 US 2021/0180008 A1 0 0 NA
## 94 PT 3497103 T 0 0 NA
## 95 US 2021/0179716 A1 0 1 NA
## 96 WO 2021/116874 A1 13 0 NA
## 97 ES 2834060 T3 0 0 NA
## 98 AR 116797 A1 0 0 NA
## 99 EP 3630789 A4 1 0 NA
## 100 CN 112969461 A 0 0 NA
## 101 US 11034678 B2 35 0 NA
## 102 PE 20211093 A1 0 0 NA
## 103 DK 3497103 T3 0 0 NA
## 104 PE 20211095 A1 0 0 NA
## 105 PH 12020552006 A1 0 0 NA
## 106 CN 112955147 A 0 0 NA
## 107 ES 2832893 T3 0 0 NA
## 108 JP 2021087420 A 0 0 NA
## 109 CO 2021006924 A2 0 0 NA
## 110 JP 2021088558 A 0 0 NA
## 111 JP 2021088548 A 0 0 NA
## 112 LT 3397631 T 0 0 NA
## 113 KR 20210068532 A 0 0 NA
## 114 PE 20211070 A1 0 0 NA
## 115 CU 24517 B1 0 0 NA
## 116 CU 24522 B1 0 0 NA
## 117 KR 20210066837 A 0 0 NA
## 118 PH 12020500648 A1 0 0 NA
## 119 MY 185765 A 0 0 NA
## 120 CN 112912395 A 0 0 NA
## 121 US 2021/0163455 A1 0 0 NA
## 122 US 2021/0163588 A1 0 0 NA
## 123 AU 2019/376078 A1 0 0 NA
## 124 RU 2748949 C2 3 0 NA
## 125 EP 3242888 B8 0 0 NA
## 126 EP 3828198 A1 8 0 NA
## 127 US 11020469 B2 142 1 NA
## 128 US 11020376 B2 10 0 NA
## 129 SA 516371506 B1 0 0 NA
## 130 BR 112021004058 A2 0 0 NA
## 131 CN 112888691 A 0 0 NA
## 132 TW I729530 B 1 0 NA
## 133 BR 112021004935 A2 0 0 NA
## 134 US 11021528 B2 98 0 NA
## 135 TW 202120482 A 0 0 NA
## 136 PH 12020552069 A1 0 0 NA
## 137 IL 281634 A 0 0 NA
## 138 IL 264818 B 0 0 NA
## 139 IL 282215 A 0 0 NA
## 140 IL 257731 B 0 0 NA
## 141 IL 281670 A 0 0 NA
## 142 DK 3091981 T3 0 0 NA
## 143 IL 259563 B 0 0 NA
## 144 SI 3341367 T1 0 0 NA
## 145 IL 270090 B 0 0 NA
## 146 IL 282167 A 0 0 NA
## 147 MA 43518 B1 0 0 NA
## 148 IL 282083 A 0 0 NA
## 149 PH 12020551160 A1 0 0 NA
## 150 IL 281968 A 0 0 NA
## 151 RS 61719 B1 0 0 NA
## 152 EC SP21023404 A 0 0 NA
## 153 CO 2021001926 A2 0 0 NA
## 154 IL 267967 B 0 0 NA
## 155 CO 2021006114 A2 0 0 NA
## 156 PT 3397631 T 0 0 NA
## 157 HU E052790 T2 0 0 NA
## 158 SG 11202103667Q A 0 0 NA
## 159 HR P20210579 T1 0 0 NA
## 160 SG 11202104394X A 0 0 NA
## 161 PT 3318565 T 0 0 NA
## 162 HU E052692 T2 0 0 NA
## 163 AU 2017/311645 B2 3 0 NA
## 164 AU 2021/202540 A1 0 0 NA
## 165 AU 2019/387290 A1 0 0 NA
## 166 AU 2021/202522 A1 0 0 NA
## 167 AU 2019/378184 A1 0 0 NA
## 168 ZA 201905543 B 0 0 NA
## 169 EP 3823723 A1 0 0 NA
## 170 KR 20210060549 A 0 0 NA
## 171 EP 2909307 B1 0 0 NA
## 172 ZA 202002207 B 0 0 NA
## 173 AR 116603 A1 0 0 NA
## 174 ZA 201904616 B 0 0 NA
## 175 BR PI0114704 B8 0 0 NA
## 176 BR PI0918841 B8 0 0 NA
## 177 BR PI0811280 B8 0 0 NA
## 178 BR PI0417810 B8 0 0 NA
## 179 US 11014911 B2 13 3 NA
## 180 BR PI0414105 B8 0 0 NA
## 181 US 11014909 B2 121 0 NA
## 182 BR PI0915166 B8 0 0 NA
## 183 BR PI0414130 B8 0 0 NA
## 184 BR PI0208811 B8 0 0 NA
## 185 US 11014908 B2 54 0 NA
## 186 DK 3318565 T3 0 0 NA
## 187 BR PI0620081 B8 0 0 NA
## 188 BR PI0110955 B8 0 0 NA
## 189 BR PI0813412 B8 0 0 NA
## 190 BR PI0608819 B8 0 0 NA
## 191 DK 3397631 T3 0 0 NA
## 192 BR 112021003956 A2 0 0 NA
## 193 BR PI9916853 B8 0 0 NA
## 194 BR PI0807346 B8 0 0 NA
## 195 BR PI0514537 B8 0 0 NA
## 196 BR PI9917007 B8 0 0 NA
## 197 DK 3431475 T3 0 0 NA
## 198 KR 20210057726 A 0 0 NA
## 199 CR 20210146 A 0 0 NA
## 200 WO 2021/094953 A1 21 0 NA
## 201 US 2021/0145957 A1 0 1 NA
## 202 US 2021/0145817 A1 0 0 NA
## 203 WO 2021/094917 A1 6 0 NA
## 204 CA 3160806 A1 0 0 NA
## 205 CA 3161153 A1 0 0 NA
## 206 BR 112021003039 A2 0 0 NA
## 207 CN 112823167 A 0 0 NA
## 208 PH 12020550908 A1 0 0 NA
## 209 DO P2021000055 A 0 0 NA
## 210 HR P20210472 T1 0 0 NA
## 211 AU 2016/222928 B2 1 0 NA
## 212 PT 3431475 T 0 0 NA
## 213 JP 2021073274 A 3 0 NA
## 214 CR 20210110 A 0 0 NA
## 215 AR 116504 A1 0 0 NA
## 216 CU 20200070 A7 0 0 NA
## 217 KR 20210053929 A 0 0 NA
## 218 EP 3817810 A1 0 0 NA
## 219 AR 116464 A1 0 0 NA
## 220 CU 24511 B1 0 0 NA
## 221 TW I726942 B 1 0 NA
## 222 BR 112021002530 A2 0 0 NA
## 223 TW I727380 B 0 0 NA
## 224 BR 112021003173 A2 0 0 NA
## 225 BR PI0811280 B1 0 0 NA
## 226 PH 12020551666 A1 0 0 NA
## 227 KR 20210049840 A 0 0 NA
## 228 US 2021/0128729 A1 1 0 NA
## 229 CA 3159573 A1 0 0 NA
## 230 KR 20210049866 A 0 0 NA
## 231 WO 2021/084429 A1 68 4 NA
## 232 ES 2823279 T3 0 0 NA
## 233 AR 116417 A1 0 0 NA
## 234 ES 2823049 T3 0 0 NA
## 235 EP 3497103 B1 0 0 NA
## 236 TW 202116770 A 0 0 NA
## 237 SI 3328867 T1 0 0 NA
## 238 CL 2020003222 A1 0 0 NA
## 239 UY 38899 A 0 0 NA
## 240 SI 3145934 T1 0 0 NA
## 241 DO P2021000045 A 0 0 NA
## 242 JP 2021066727 A 0 0 NA
## 243 UY 38892 A 0 0 NA
## 244 SG 11202101158T A 0 0 NA
## 245 SG 11202101827R A 0 0 NA
## 246 SG 11202103111T A 0 0 NA
## 247 IL 280963 A 0 0 NA
## 248 EC SP21018584 A 0 0 NA
## 249 SG 11202102674R A 0 0 NA
## 250 SG 11202102047P A 0 0 NA
## 251 IL 281490 A 0 0 NA
## 252 IL 270761 B 0 0 NA
## 253 AU 2019/351960 A1 0 0 NA
## 254 IL 246853 B 0 0 NA
## 255 IL 281093 A 0 0 NA
## 256 IL 281094 A 0 0 NA
## 257 IL 263505 B 0 0 NA
## 258 IL 258311 B 0 0 NA
## 259 US 2021/0121555 A1 0 2 NA
## 260 IL 246985 B 0 0 NA
## 261 EC SP20057847 A 0 0 NA
## 262 IL 270764 B 0 0 NA
## 263 AU 2019/359540 A1 0 0 NA
## 264 AU 2019/204623 B2 6 0 NA
## 265 IL 264687 B 0 0 NA
## 266 EP 3668495 A4 1 0 NA
## 267 EP 3810192 A1 0 0 NA
## 268 ZA 201800197 B 0 0 NA
## 269 EP 3810089 A1 0 0 NA
## 270 EP 3119395 B1 0 0 NA
## 271 HU E052146 T2 0 0 NA
## 272 EP 3810761 A1 0 0 NA
## 273 EP 3630788 A4 1 0 NA
## 274 ZA 201903620 B 0 0 NA
## 275 MA 52987 A 0 0 NA
## 276 KR 20210046717 A 0 0 NA
## 277 US 10988463 B2 20 0 NA
## 278 CR 20200484 A 0 0 NA
## 279 LT 3341367 T 0 0 NA
## 280 PT 3334760 T 0 0 NA
## 281 JP 2021061851 A 3 0 NA
## 282 ES 2820824 T3 0 0 NA
## 283 US 2021/0115159 A1 0 0 NA
## 284 JP 2021061845 A 2 0 NA
## 285 US 2021/0115413 A1 0 0 NA
## 286 PT 2968155 T 0 0 NA
## 287 AR 116295 A1 0 0 NA
## 288 EP 3806855 A1 0 0 NA
## 289 EP 3806955 A1 0 0 NA
## 290 MA 52856 A 0 0 NA
## 291 MA 52882 A 0 0 NA
## 292 EP 3808768 A1 7 0 NA
## 293 EP 3478679 B1 0 0 NA
## 294 ME 03793 B 0 0 NA
## 295 CA 2859755 C 0 0 NA
## 296 US 10982198 B2 151 1 NA
## 297 US 10980815 B2 17 0 NA
## 298 ME 03743 B 0 0 NA
## 299 PT 3341367 T 0 0 NA
## 300 DK 3334760 T3 0 0 NA
## 301 PL 3328867 T3 0 0 NA
## 302 DK 2968155 T3 0 0 NA
## 303 ES 2819676 T3 0 0 NA
## 304 CO 2021003928 A2 0 0 NA
## 305 TW 202115048 A 0 0 NA
## 306 CN 112672791 A 0 0 NA
## 307 TW 202115120 A 0 0 NA
## 308 HR P20210292 T1 0 0 NA
## 309 PE 20210708 A1 0 0 NA
## 310 TW 202115086 A 0 0 NA
## 311 AU 2019/346012 A1 0 0 NA
## 312 US 2021/0107979 A1 0 0 NA
## 313 US 2021/0106658 A1 0 0 NA
## 314 AU 2021/201721 A1 0 0 NA
## 315 DO P2021000036 A 0 0 NA
## 316 EP 3804724 A1 34 0 NA
## 317 EP 3328867 B9 0 0 NA
## 318 ES 2818806 T3 0 0 NA
## 319 EP 3318565 B1 0 1 NA
## 320 EP 3802594 A1 0 0 NA
## 321 US 10975065 B2 41 1 NA
## 322 BR 112016012262 B1 0 0 NA
## 323 DK 3341367 T3 0 0 NA
## 324 KR 20210040182 A 3 0 NA
## 325 GE P20217242 B 0 0 NA
## 326 AU 2021/201608 A1 0 0 NA
## 327 US 2021/0100903 A1 0 0 NA
## 328 US 2021/0101998 A1 0 0 NA
## 329 US 2021/0101970 A1 0 0 NA
## 330 ES 2817751 T3 0 0 NA
## 331 US 2021/0100798 A1 0 0 NA
## 332 AU 2016/379097 C1 1 0 NA
## 333 CO 2021003391 A2 0 0 NA
## 334 WO 2021/064590 A1 43 1 NA
## 335 US 2021/0100796 A1 0 0 NA
## 336 US 2021/0101943 A1 0 0 NA
## 337 AU 2019/344107 A1 0 0 NA
## 338 EP 3431475 B1 14 0 NA
## 339 CU 20200062 A7 0 0 NA
## 340 EP 3397631 B1 0 0 NA
## 341 CU 24507 B1 0 0 NA
## 342 MA 45153 A 0 0 NA
## 343 BR PI0913954 A2 0 0 NA
## 344 US 10966980 B2 62 0 NA
## 345 PL 3386541 T3 0 0 NA
## 346 US 10968242 B2 5 0 NA
## 347 PL 3500564 T3 0 0 NA
## 348 US 10967068 B2 19 0 NA
## 349 CA 3095028 A1 0 0 NA
## 350 MY 184433 A 0 0 NA
## 351 WO 2021/059181 A1 43 0 NA
## 352 AU 2016/322813 B2 1 0 NA
## 353 AU 2019/363840 A1 0 0 NA
## 354 CA 3155669 A1 0 0 NA
## 355 WO 2021/059136 A1 11 1 NA
## 356 TW 202112392 A 0 0 NA
## 357 CA 3155569 A1 0 0 NA
## 358 EA 037476 B1 1 0 NA
## 359 CR 20200615 A 0 0 NA
## 360 SI 3327027 T1 0 0 NA
## 361 MA 48241 A1 0 0 NA
## 362 SI 3143019 T1 0 0 NA
## 363 EP 3798222 A1 5 0 NA
## 364 EP 3797121 A1 0 0 NA
## 365 EP 3796983 A2 0 0 NA
## 366 RS 61516 B1 0 0 NA
## 367 EC SP21012501 A 0 0 NA
## 368 SG 10202100861T A 0 0 NA
## 369 CA 3024216 C 0 0 NA
## 370 BR 112021000109 A2 0 0 NA
## 371 CN 112584861 A 0 1 NA
## 372 US 10961297 B2 12 0 NA
## 373 SG 11202101503R A 0 0 NA
## 374 SG 10202101887W A 0 0 NA
## 375 SG 11202101502Y A 0 0 NA
## 376 HU E051722 T2 0 0 NA
## 377 HU E051731 T2 0 0 NA
## 378 HU E051728 T2 0 0 NA
## 379 HU E051898 T2 0 0 NA
## 380 RU 2745565 C2 4 0 NA
## 381 CN 112566637 A 0 1 NA
## 382 CN 112566937 A 0 0 NA
## 383 KR 20210033504 A 0 0 NA
## 384 CN 112566658 A 0 0 NA
## 385 CA 3155090 A1 0 0 NA
## 386 US 2021/0085773 A1 0 0 NA
## 387 WO 2021/053207 A1 21 0 NA
## 388 GE P20217234 B 0 0 NA
## 389 AR 116046 A1 0 0 NA
## 390 AR 116010 A1 0 0 NA
## 391 US 2021/0087204 A1 0 0 NA
## 392 US 2021/0087180 A1 0 0 NA
## 393 IL 237792 B 0 0 NA
## 394 LT 3137114 T 0 0 NA
## 395 IL 280725 A 1 0 NA
## 396 EP 3793609 A1 0 0 NA
## 397 MA 45656 A 0 0 NA
## 398 NI 202000091 A 0 0 NA
## 399 US 10952972 B2 11 0 NA
## 400 CA 189065 S 0 0 0
## 401 NI 202000072 A 0 0 0
## 402 KR 20210032013 A 0 0 80
## 403 PH 12020550461 A1 0 0 0
## 404 TW I722568 B 0 0 0
## 405 CN 112533674 A 0 2 0
## 406 CO 2021002954 A2 0 0 0
## 407 JP 2021040642 A 1 0 0
## 408 AU 2019/341683 A1 0 0 0
## 409 JP 2021042204 A 4 0 0
## 410 US 2021/0077463 A1 0 1 24
## 411 CA 3153676 A1 0 0 0
## 412 CA 200622 S 0 0 0
## 413 WO 2021/048736 A1 7 0 0
## 414 AU 2021/200232 A1 0 0 0
## 415 PE 20210554 A1 0 0 0
## 416 AR 115987 A1 0 0 0
## 417 US 10946086 B2 111 1 0
## 418 TW 202111125 A 0 0 0
## 419 US 10947494 B2 11 0 0
## 420 CN 112513070 A 0 1 0
## 421 TW 202110480 A 0 0 0
## 422 US 10946031 B2 9 0 0
## 423 US 10945994 B2 16 0 2
## 424 BR 112020025701 A2 0 0 0
## 425 TW 202110446 A 0 0 0
## 426 PE 20210488 A1 0 0 0
## 427 KR 20210029234 A 0 0 0
## 428 ES 2811523 T3 0 0 0
## 429 US 2021/0069219 A1 0 0 0
## 430 JP 2021038265 A 4 0 0
## 431 US 2021/0069326 A1 0 0 41
## 432 EA 037318 B1 2 0 0
## 433 AU 2019/329884 A1 0 0 0
## 434 AU 2016/286108 B2 2 0 0
## 435 AU 2019/323790 A1 0 0 0
## 436 JP 2021036873 A 3 0 0
## 437 EP 3334760 B1 0 0 0
## 438 UA 123270 C2 0 0 0
## 439 EP 3091981 B1 0 0 0
## 440 LT C2534153 I2 0 0 0
## 441 US 10941216 B2 20 0 82
## 442 BR 112020024956 A2 0 0 0
## 443 US 10941118 B2 10 0 0
## 444 CN 112469473 A 0 0 0
## 445 PL 3096785 T3 0 0 0
## 446 PL 3310784 T3 0 0 0
## 447 CO 2021002230 A2 0 0 0
## 448 CO 2021001893 A2 0 0 0
## 449 CO 2020015305 A2 0 0 0
## 450 PE 20210468 A1 0 0 0
## 451 EA 037288 B1 4 0 0
## 452 PE 20210412 A1 0 0 0
## 453 US 2021/0061902 A1 0 0 225
## 454 US 2021/0061923 A1 0 0 0
## 455 EP 2968155 B1 12 0 0
## 456 EP 3784664 A1 0 0 0
## 457 MA 52360 A 0 0 0
## 458 BR 112014018192 A8 0 0 0
## 459 BR 112020020941 A2 0 0 0
## 460 ES 2808987 T3 0 0 0
## 461 BR 112020024470 A2 0 0 0
## 462 US 10934279 B2 17 0 0
## 463 TW 202108167 A 0 0 0
## 464 TW I720272 B 1 0 0
## 465 IL 280292 A 0 0 0
## 466 TW I720448 B 0 0 0
## 467 IL 279591 A 0 0 0
## 468 IL 280317 A 0 0 0
## 469 IL 279926 A 0 0 0
## 470 DO P2020000191 A 0 0 0
## 471 IL 248667 B 0 0 0
## 472 IL 254131 B 0 0 0
## 473 IL 244437 B 0 0 0
## 474 IL 255444 B 0 0 0
## 475 PT 3137114 T 0 0 0
## 476 RU 2743793 C1 3 0 0
## 477 CL 2020002507 A1 0 0 0
## 478 UY 38786 A 0 0 0
## 479 SG 11202100888W A 0 0 0
## 480 US 2021/0054087 A1 0 0 502
## 481 AU 2018/208844 B2 5 0 0
## 482 EP 3781132 A1 0 0 0
## 483 EP 3341367 B1 0 0 0
## 484 EP 3148587 B1 1 0 0
## 485 ES 2807591 T3 0 0 0
## 486 US 10927178 B2 0 0 173
## 487 BR 112020022897 A2 0 0 0
## 488 KR 20210020095 A 0 0 0
## 489 KR 20210019529 A 0 0 0
## 490 CN 112384282 A 0 0 0
## 491 AU 2018/349259 B2 1 0 0
## 492 US 2021/0047298 A1 0 0 0
## 493 JP 2021020890 A 0 0 0
## 494 AU 2019/325400 A1 0 0 0
## 495 AU 2019/336197 A1 0 0 0
## 496 WO 2021/029854 A1 4 1 0
## 497 JP 2021019632 A 2 0 0
## 498 US 2021/0047325 A1 2 0 2
## 499 CA 3150424 A1 0 0 0
## 500 BR PI0915166 B1 0 0 0
## 501 KR 20210018529 A 0 0 6
## 502 EP 3774863 A1 0 0 0
## 503 KR 20210018341 A 0 0 0
## 504 BR 112020019368 A2 0 0 0
## 505 US 10918708 B2 139 2 40
## 506 PT 3327027 T 0 0 0
## 507 DK 3242888 T3 0 0 0
## 508 PH 12020550596 A1 0 0 0
## 509 DK 3328849 T5 0 0 0
## 510 CL 2020002748 A1 0 0 0
## 511 US 2021/0040182 A1 0 0 518
## 512 US 2021/0040045 A1 0 0 0
## 513 TW I718758 B 2 0 0
## 514 TW I718644 B 2 0 0
## 515 TW I718427 B 3 0 0
## 516 TW I718144 B 1 0 0
## 517 CN 112336854 A 0 0 0
## 518 BR 112020022595 A2 0 0 0
## 519 KR 20210014763 A 0 0 0
## 520 BR 112020023891 A2 0 0 0
## 521 BR 112020017935 A2 0 0 0
## 522 KR 20210013777 A 0 0 62
## 523 US 2021/0032308 A1 0 0 130
## 524 AU 2019/304198 A1 0 0 0
## 525 AU 2016/280137 B2 6 0 0
## 526 AU 2019/290212 A1 0 0 0
## 527 RU 2742260 C2 3 0 0
## 528 JP 2021011493 A 2 0 0
## 529 KR 20210013167 A 0 0 200
## 530 KR 20210013165 A 0 0 890
## 531 US 10906955 B2 15 0 25
## 532 CN 112313219 A 0 0 0
## 533 US 10906888 B2 49 0 0
## 534 TW 202104224 A 0 0 0
## 535 IL 254073 B 0 0 0
## 536 IL 279396 A 0 0 0
## 537 IL 278924 A 0 0 0
## 538 IL 278926 A 0 0 0
## 539 IL 279224 A 0 0 0
## 540 IL 279300 A 0 0 0
## 541 IL 255224 B 0 0 0
## 542 IL 279270 A 0 0 0
## 543 IL 252158 B 0 0 0
## 544 SI 3500564 T1 0 0 0
## 545 UY 38768 A 0 0 0
## 546 EC SP20078651 A 0 0 0
## 547 UY 38752 A 0 0 0
## 548 KR 20210011076 A 2 0 798
## 549 SG 11202100021T A 0 0 0
## 550 US 2021/0024497 A1 0 0 0
## 551 AU 2019/210579 B2 3 0 0
## 552 US 2021/0023134 A1 0 0 10
## 553 JP 2021008452 A 0 0 0
## 554 US 2021/0024903 A1 0 1 761
## 555 US 2021/0024589 A1 0 0 81
## 556 US 2021/0023200 A1 0 0 326
## 557 US 2021/0023193 A1 0 2 40
## 558 PT 3143019 T 0 0 0
## 559 HU E050811 T2 0 0 0
## 560 WO 2021/014360 A1 14 0 0
## 561 US 2021/0024536 A1 0 0 0
## 562 AR 115498 A1 0 0 0
## 563 EP 3770164 A1 6 0 0
## 564 EP 3768669 A1 0 0 0
## 565 BR 112020021689 A2 0 0 0
## 566 CN 112263677 A 0 0 0
## 567 US 10899802 B2 335 0 22
## 568 US 10899840 B2 36 1 36
## 569 DK 3137114 T3 0 0 0
## 570 PL 3339303 T3 0 0 0
## 571 GE P20217211 B 0 0 0
## 572 WO 2021/009676 A1 20 1 0
## 573 CA 3147266 A1 0 0 0
## 574 US 2021/0017206 A1 0 0 0
## 575 US 2021/0017553 A1 2 1 5
## 576 US 2021/0017205 A1 0 0 0
## 577 US 2021/0017254 A1 0 0 12
## 578 US 2021/0017172 A1 0 2 0
## 579 CR 20200612 A 0 0 0
## 580 EP 3765474 A1 0 0 0
## 581 KR 20210008177 A 0 0 4
## 582 EP 3765027 A1 0 0 0
## 583 EP 3766885 A1 77 0 0
## 584 EP 3765019 A1 0 0 0
## 585 SA 516371030 B1 0 0 0
## 586 MA 52012 A 0 0 0
## 587 US 10894083 B2 33 0 10
## 588 PE 20210127 A1 0 0 0
## 589 BR 112016029612 B1 0 0 0
## 590 DK 3327027 T3 0 0 0
## 591 MY 182282 A 0 0 0
## 592 MY 182168 A 0 0 0
## 593 CN 112236446 A 0 0 0
## 594 US 2021/0009601 A1 0 0 15
## 595 US 2021/0009657 A1 2 1 36
## 596 RU 2740309 C2 3 0 0
## 597 EP 3761954 A1 0 0 0
## 598 PT 3328867 T 0 0 0
## 599 CA 2928908 C 0 0 66
## 600 CN 112218618 A 4 0 0
## 601 MY 181891 A 0 0 0
## 602 US 10888611 B2 331 0 22
## 603 RU 2740135 C1 4 0 0
## 604 NI 202000058 A 0 0 0
## 605 KR 20210002757 A 0 0 46
## 606 CN 112195169 A 0 0 0
## 607 KR 20210002642 A 0 0 2
## 608 US 2021/0002369 A1 0 0 11
## 609 US 2021/0002371 A1 0 0 9
## 610 US 2021/0002251 A1 0 0 0
## 611 JP 2021001170 A 0 0 0
## 612 AU 2020/281105 A1 0 0 0
## 613 US 2021/0000930 A1 0 0 3
## 614 AU 2019/299666 A1 0 0 0
## 615 WO 2021/001741 A1 8 0 11
## 616 EP 3327027 B1 0 0 0
## 617 EP 3759129 A1 0 0 0
## 618 EP 3328849 B9 0 0 0
## 619 EP 3759134 A1 0 0 0
## 620 EP 3758753 A1 0 0 0
## 621 BR 112020015641 A2 0 0 0
## 622 BR 112020015603 A2 0 0 NA
## 623 CN 112168957 A 0 0 NA
## 624 DK 3143019 T3 0 0 NA
## 625 CN 112156176 A 0 0 NA
## 626 TW I715285 B 2 0 NA
TrainData %>% right_join(SCount, by = c("Patent_Number" = "Patent_No.")) -> joined_data
joined_data
## Patent_Number Cites_Patent_Count Cited_by_Patent_Count Sequence_Count
## 1 CA 189065 S 0 0 0
## 2 NI 202000072 A 0 0 0
## 3 KR 20210032013 A 0 0 80
## 4 PH 12020550461 A1 0 0 0
## 5 TW I722568 B 0 0 0
## 6 CN 112533674 A 0 2 0
## 7 CO 2021002954 A2 0 0 0
## 8 JP 2021040642 A 1 0 0
## 9 AU 2019/341683 A1 0 0 0
## 10 JP 2021042204 A 4 0 0
## 11 US 2021/0077463 A1 0 1 24
## 12 CA 3153676 A1 0 0 0
## 13 CA 200622 S 0 0 0
## 14 WO 2021/048736 A1 7 0 0
## 15 AU 2021/200232 A1 0 0 0
## 16 PE 20210554 A1 0 0 0
## 17 AR 115987 A1 0 0 0
## 18 US 10946086 B2 111 1 0
## 19 TW 202111125 A 0 0 0
## 20 US 10947494 B2 11 0 0
## 21 CN 112513070 A 0 1 0
## 22 TW 202110480 A 0 0 0
## 23 US 10946031 B2 9 0 0
## 24 US 10945994 B2 16 0 2
## 25 BR 112020025701 A2 0 0 0
## 26 TW 202110446 A 0 0 0
## 27 PE 20210488 A1 0 0 0
## 28 KR 20210029234 A 0 0 0
## 29 ES 2811523 T3 0 0 0
## 30 US 2021/0069219 A1 0 0 0
## 31 JP 2021038265 A 4 0 0
## 32 US 2021/0069326 A1 0 0 41
## 33 EA 037318 B1 2 0 0
## 34 AU 2019/329884 A1 0 0 0
## 35 AU 2016/286108 B2 2 0 0
## 36 AU 2019/323790 A1 0 0 0
## 37 JP 2021036873 A 3 0 0
## 38 EP 3334760 B1 0 0 0
## 39 UA 123270 C2 0 0 0
## 40 EP 3091981 B1 0 0 0
## 41 LT C2534153 I2 0 0 0
## 42 US 10941216 B2 20 0 82
## 43 BR 112020024956 A2 0 0 0
## 44 US 10941118 B2 10 0 0
## 45 CN 112469473 A 0 0 0
## 46 PL 3096785 T3 0 0 0
## 47 PL 3310784 T3 0 0 0
## 48 CO 2021002230 A2 0 0 0
## 49 CO 2021001893 A2 0 0 0
## 50 CO 2020015305 A2 0 0 0
## 51 PE 20210468 A1 0 0 0
## 52 EA 037288 B1 4 0 0
## 53 PE 20210412 A1 0 0 0
## 54 US 2021/0061902 A1 0 0 225
## 55 US 2021/0061923 A1 0 0 0
## 56 EP 2968155 B1 12 0 0
## 57 EP 3784664 A1 0 0 0
## 58 MA 52360 A 0 0 0
## 59 BR 112014018192 A8 0 0 0
## 60 BR 112020020941 A2 0 0 0
## 61 ES 2808987 T3 0 0 0
## 62 BR 112020024470 A2 0 0 0
## 63 US 10934279 B2 17 0 0
## 64 TW 202108167 A 0 0 0
## 65 TW I720272 B 1 0 0
## 66 IL 280292 A 0 0 0
## 67 TW I720448 B 0 0 0
## 68 IL 279591 A 0 0 0
## 69 IL 280317 A 0 0 0
## 70 IL 279926 A 0 0 0
## 71 DO P2020000191 A 0 0 0
## 72 IL 248667 B 0 0 0
## 73 IL 254131 B 0 0 0
## 74 IL 244437 B 0 0 0
## 75 IL 255444 B 0 0 0
## 76 PT 3137114 T 0 0 0
## 77 RU 2743793 C1 3 0 0
## 78 CL 2020002507 A1 0 0 0
## 79 UY 38786 A 0 0 0
## 80 SG 11202100888W A 0 0 0
## 81 US 2021/0054087 A1 0 0 502
## 82 AU 2018/208844 B2 5 0 0
## 83 EP 3781132 A1 0 0 0
## 84 EP 3341367 B1 0 0 0
## 85 EP 3148587 B1 1 0 0
## 86 ES 2807591 T3 0 0 0
## 87 US 10927178 B2 0 0 173
## 88 BR 112020022897 A2 0 0 0
## 89 KR 20210020095 A 0 0 0
## 90 KR 20210019529 A 0 0 0
## 91 CN 112384282 A 0 0 0
## 92 AU 2018/349259 B2 1 0 0
## 93 US 2021/0047298 A1 0 0 0
## 94 JP 2021020890 A 0 0 0
## 95 AU 2019/325400 A1 0 0 0
## 96 AU 2019/336197 A1 0 0 0
## 97 WO 2021/029854 A1 4 1 0
## 98 JP 2021019632 A 2 0 0
## 99 US 2021/0047325 A1 2 0 2
## 100 CA 3150424 A1 0 0 0
## 101 BR PI0915166 B1 0 0 0
## 102 KR 20210018529 A 0 0 6
## 103 EP 3774863 A1 0 0 0
## 104 KR 20210018341 A 0 0 0
## 105 BR 112020019368 A2 0 0 0
## 106 US 10918708 B2 139 2 40
## 107 PT 3327027 T 0 0 0
## 108 DK 3242888 T3 0 0 0
## 109 PH 12020550596 A1 0 0 0
## 110 DK 3328849 T5 0 0 0
## 111 CL 2020002748 A1 0 0 0
## 112 US 2021/0040182 A1 0 0 518
## 113 US 2021/0040045 A1 0 0 0
## 114 TW I718758 B 2 0 0
## 115 TW I718644 B 2 0 0
## 116 TW I718427 B 3 0 0
## 117 TW I718144 B 1 0 0
## 118 CN 112336854 A 0 0 0
## 119 BR 112020022595 A2 0 0 0
## 120 KR 20210014763 A 0 0 0
## 121 BR 112020023891 A2 0 0 0
## 122 BR 112020017935 A2 0 0 0
## 123 KR 20210013777 A 0 0 62
## 124 US 2021/0032308 A1 0 0 130
## 125 AU 2019/304198 A1 0 0 0
## 126 AU 2016/280137 B2 6 0 0
## 127 AU 2019/290212 A1 0 0 0
## 128 RU 2742260 C2 3 0 0
## 129 JP 2021011493 A 2 0 0
## 130 KR 20210013167 A 0 0 200
## 131 KR 20210013165 A 0 0 890
## 132 US 10906955 B2 15 0 25
## 133 CN 112313219 A 0 0 0
## 134 US 10906888 B2 49 0 0
## 135 TW 202104224 A 0 0 0
## 136 IL 254073 B 0 0 0
## 137 IL 279396 A 0 0 0
## 138 IL 278924 A 0 0 0
## 139 IL 278926 A 0 0 0
## 140 IL 279224 A 0 0 0
## 141 IL 279300 A 0 0 0
## 142 IL 255224 B 0 0 0
## 143 IL 279270 A 0 0 0
## 144 IL 252158 B 0 0 0
## 145 SI 3500564 T1 0 0 0
## 146 UY 38768 A 0 0 0
## 147 EC SP20078651 A 0 0 0
## 148 UY 38752 A 0 0 0
## 149 KR 20210011076 A 2 0 798
## 150 SG 11202100021T A 0 0 0
## 151 US 2021/0024497 A1 0 0 0
## 152 AU 2019/210579 B2 3 0 0
## 153 US 2021/0023134 A1 0 0 10
## 154 JP 2021008452 A 0 0 0
## 155 US 2021/0024903 A1 0 1 761
## 156 US 2021/0024589 A1 0 0 81
## 157 US 2021/0023200 A1 0 0 326
## 158 US 2021/0023193 A1 0 2 40
## 159 PT 3143019 T 0 0 0
## 160 HU E050811 T2 0 0 0
## 161 WO 2021/014360 A1 14 0 0
## 162 US 2021/0024536 A1 0 0 0
## 163 AR 115498 A1 0 0 0
## 164 EP 3770164 A1 6 0 0
## 165 EP 3768669 A1 0 0 0
## 166 BR 112020021689 A2 0 0 0
## 167 CN 112263677 A 0 0 0
## 168 US 10899802 B2 335 0 22
## 169 US 10899840 B2 36 1 36
## 170 DK 3137114 T3 0 0 0
## 171 PL 3339303 T3 0 0 0
## 172 GE P20217211 B 0 0 0
## 173 WO 2021/009676 A1 20 1 0
## 174 CA 3147266 A1 0 0 0
## 175 US 2021/0017206 A1 0 0 0
## 176 US 2021/0017553 A1 2 1 5
## 177 US 2021/0017205 A1 0 0 0
## 178 US 2021/0017254 A1 0 0 12
## 179 US 2021/0017172 A1 0 2 0
## 180 CR 20200612 A 0 0 0
## 181 EP 3765474 A1 0 0 0
## 182 KR 20210008177 A 0 0 4
## 183 EP 3765027 A1 0 0 0
## 184 EP 3766885 A1 77 0 0
## 185 EP 3765019 A1 0 0 0
## 186 SA 516371030 B1 0 0 0
## 187 MA 52012 A 0 0 0
## 188 US 10894083 B2 33 0 10
## 189 PE 20210127 A1 0 0 0
## 190 BR 112016029612 B1 0 0 0
## 191 DK 3327027 T3 0 0 0
## 192 MY 182282 A 0 0 0
## 193 MY 182168 A 0 0 0
## 194 CN 112236446 A 0 0 0
## 195 US 2021/0009601 A1 0 0 15
## 196 US 2021/0009657 A1 2 1 36
## 197 RU 2740309 C2 3 0 0
## 198 EP 3761954 A1 0 0 0
## 199 PT 3328867 T 0 0 0
## 200 CA 2928908 C 0 0 66
## 201 CN 112218618 A 4 0 0
## 202 MY 181891 A 0 0 0
## 203 US 10888611 B2 331 0 22
## 204 RU 2740135 C1 4 0 0
## 205 NI 202000058 A 0 0 0
## 206 KR 20210002757 A 0 0 46
## 207 CN 112195169 A 0 0 0
## 208 KR 20210002642 A 0 0 2
## 209 US 2021/0002369 A1 0 0 11
## 210 US 2021/0002371 A1 0 0 9
## 211 US 2021/0002251 A1 0 0 0
## 212 JP 2021001170 A 0 0 0
## 213 AU 2020/281105 A1 0 0 0
## 214 US 2021/0000930 A1 0 0 3
## 215 AU 2019/299666 A1 0 0 0
## 216 WO 2021/001741 A1 8 0 11
## 217 EP 3327027 B1 0 0 0
## 218 EP 3759129 A1 0 0 0
## 219 EP 3328849 B9 0 0 0
## 220 EP 3759134 A1 0 0 0
## 221 EP 3758753 A1 0 0 0
## 222 BR 112020015641 A2 0 0 0
ttdata <- read.csv("/Users/jusimioni/Desktop/TrainingandTestingData.csv")
head(ttdata)
## Patent_Number Cites_Patent_Count Cited_by_Patent_Count Partition
## 1 PL 3341367 T3 0 0 0
## 2 HR P20210871 T1 0 0 0
## 3 CR 20210284 A 0 0 0
## 4 US 2021/0205309 A1 0 0 0
## 5 JP 2021100972 A 3 0 0
## 6 AU 2021/203768 A1 0 0 0
partition1 <- group_by(ttdata, Partition, Patent_Number)
patent_mean <- summarise(partition1, Patent_Count_mean = mean(Cites_Patent_Count, na.rm = TRUE))
## `summarise()` has grouped output by 'Partition'. You can override using the
## `.groups` argument.
patent_mean
## # A tibble: 725 × 3
## # Groups: Partition [2]
## Partition Patent_Number Patent_Count_mean
## <int> <chr> <dbl>
## 1 0 AR 115498 A1 0
## 2 0 AR 115987 A1 0
## 3 0 AR 116010 A1 0
## 4 0 AR 116046 A1 0
## 5 0 AR 116295 A1 0
## 6 0 AR 116417 A1 0
## 7 0 AR 116464 A1 0
## 8 0 AR 116504 A1 0
## 9 0 AR 116603 A1 0
## 10 0 AR 116797 A1 0
## # … with 715 more rows
desc <- arrange(patent_mean, desc(Patent_Count_mean))
desc
## # A tibble: 725 × 3
## # Groups: Partition [2]
## Partition Patent_Number Patent_Count_mean
## <int> <chr> <dbl>
## 1 0 US 10899802 B2 335
## 2 0 US 10888611 B2 331
## 3 1 US 11208633 B2 161
## 4 0 US 10982198 B2 151
## 5 0 WO 2021/124073 A1 147
## 6 0 US 11020469 B2 142
## 7 0 US 10918708 B2 139
## 8 0 US 11014909 B2 121
## 9 0 US 10946086 B2 111
## 10 0 US 11021528 B2 98
## # … with 715 more rows
filter(desc, Patent_Count_mean > 0)
## # A tibble: 137 × 3
## # Groups: Partition [2]
## Partition Patent_Number Patent_Count_mean
## <int> <chr> <dbl>
## 1 0 US 10899802 B2 335
## 2 0 US 10888611 B2 331
## 3 1 US 11208633 B2 161
## 4 0 US 10982198 B2 151
## 5 0 WO 2021/124073 A1 147
## 6 0 US 11020469 B2 142
## 7 0 US 10918708 B2 139
## 8 0 US 11014909 B2 121
## 9 0 US 10946086 B2 111
## 10 0 US 11021528 B2 98
## # … with 127 more rows