library(readr)
protein_class = c(“homosapiens”, “erythrocruorin”, “hemerythrin”, “hemocyanin”, “leghemoglobin”, “myoglobin”, “hemoglobin”)
protein_class_0_aa <- read_csv("~/Dropbox/git_projects/random_forest/4_exploratory/mono/protein_class_0_aa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_integer(),
## TotalAA = col_integer()
## )
## See spec(...) for full column specifications.
summary(protein_class_0_aa)
## Class TotalAA G P
## Min. :0 Min. : 4.0 Min. :0.00000 Min. :0.00000
## 1st Qu.:0 1st Qu.: 272.0 1st Qu.:0.05031 1st Qu.:0.04245
## Median :0 Median : 448.0 Median :0.06435 Median :0.05575
## Mean :0 Mean : 608.1 Mean :0.06731 Mean :0.06200
## 3rd Qu.:0 3rd Qu.: 721.0 3rd Qu.:0.07968 3rd Qu.:0.07477
## Max. :0 Max. :34350.0 Max. :0.46474 Max. :0.39241
## A V L I
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.05464 1st Qu.:0.04830 1st Qu.:0.08023 1st Qu.:0.02981
## Median :0.06887 Median :0.06008 Median :0.09709 Median :0.04280
## Mean :0.07232 Mean :0.06064 Mean :0.09839 Mean :0.04358
## 3rd Qu.:0.08599 3rd Qu.:0.07200 3rd Qu.:0.11528 3rd Qu.:0.05577
## Max. :0.30723 Max. :0.18852 Max. :0.32323 Max. :0.21538
## M C F Y
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.01556 1st Qu.:0.01231 1st Qu.:0.02597 1st Qu.:0.01873
## Median :0.02135 Median :0.01930 Median :0.03556 Median :0.02681
## Mean :0.02270 Mean :0.02362 Mean :0.03699 Mean :0.02792
## 3rd Qu.:0.02794 3rd Qu.:0.02821 3rd Qu.:0.04600 3rd Qu.:0.03571
## Max. :0.13836 Max. :0.36816 Max. :0.17391 Max. :0.24194
## W H K R
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.006515 1st Qu.:0.01750 1st Qu.:0.03893 1st Qu.:0.04398
## Median :0.011364 Median :0.02391 Median :0.05522 Median :0.05556
## Mean :0.012872 Mean :0.02520 Mean :0.05774 Mean :0.05863
## 3rd Qu.:0.017467 3rd Qu.:0.03102 3rd Qu.:0.07248 3rd Qu.:0.06962
## Max. :0.232877 Max. :0.30000 Max. :0.31250 Max. :0.47059
## Q N E D
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.03425 1st Qu.:0.02481 1st Qu.:0.05208 1st Qu.:0.03727
## Median :0.04405 Median :0.03488 Median :0.06693 Median :0.04750
## Mean :0.04616 Mean :0.03552 Mean :0.06942 Mean :0.04766
## 3rd Qu.:0.05491 3rd Qu.:0.04501 3rd Qu.:0.08309 3rd Qu.:0.05714
## Max. :0.98750 Max. :0.13000 Max. :0.38235 Max. :0.20000
## S T
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.06192 1st Qu.:0.04196
## Median :0.07568 Median :0.05117
## Mean :0.07877 Mean :0.05253
## 3rd Qu.:0.09232 3rd Qu.:0.06103
## Max. :0.41660 Max. :0.34949
protein_class_1_aa <- read_csv("~/Dropbox/git_projects/random_forest/4_exploratory/mono/protein_class_1_aa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_integer(),
## TotalAA = col_integer()
## )
## See spec(...) for full column specifications.
summary(protein_class_1_aa)
## Class TotalAA G P
## Min. :1 Min. :136.0 Min. :0.03974 Min. :0.01863
## 1st Qu.:1 1st Qu.:141.2 1st Qu.:0.05617 1st Qu.:0.02778
## Median :1 Median :147.0 Median :0.06784 Median :0.03126
## Mean :1 Mean :148.4 Mean :0.06591 Mean :0.03044
## 3rd Qu.:1 3rd Qu.:151.0 3rd Qu.:0.07496 3rd Qu.:0.03497
## Max. :1 Max. :170.0 Max. :0.08276 Max. :0.03797
## A V L I
## Min. :0.06383 Min. :0.05517 Min. :0.04317 Min. :0.03797
## 1st Qu.:0.07178 1st Qu.:0.06349 1st Qu.:0.08019 1st Qu.:0.04301
## Median :0.10190 Median :0.06733 Median :0.09711 Median :0.05334
## Mean :0.10598 Mean :0.07161 Mean :0.09424 Mean :0.05552
## 3rd Qu.:0.12247 3rd Qu.:0.08019 3rd Qu.:0.10685 3rd Qu.:0.06581
## Max. :0.20497 Max. :0.09317 Max. :0.16471 Max. :0.07914
## M C F Y
## Min. :0.00000 Min. :0.000000 Min. :0.02941 Min. :0.005882
## 1st Qu.:0.00000 1st Qu.:0.008488 1st Qu.:0.04945 1st Qu.:0.008387
## Median :0.00959 Median :0.014337 Median :0.06185 Median :0.013975
## Mean :0.01160 Mean :0.015320 Mean :0.06679 Mean :0.015654
## 3rd Qu.:0.01784 3rd Qu.:0.020654 3rd Qu.:0.08556 3rd Qu.:0.020654
## Max. :0.03311 Max. :0.026846 Max. :0.10596 Max. :0.028777
## W H K R
## Min. :0.006623 Min. :0.01266 Min. :0.04027 Min. :0.01863
## 1st Qu.:0.015203 1st Qu.:0.03174 1st Qu.:0.04977 1st Qu.:0.02718
## Median :0.019427 Median :0.04667 Median :0.05920 Median :0.04818
## Mean :0.018245 Mean :0.04758 Mean :0.06085 Mean :0.04734
## 3rd Qu.:0.021017 3rd Qu.:0.06714 3rd Qu.:0.07153 3rd Qu.:0.06196
## Max. :0.027397 Max. :0.07857 Max. :0.09494 Max. :0.09272
## Q N E D
## Min. :0.01342 Min. :0.006897 Min. :0.01899 Min. :0.04969
## 1st Qu.:0.02722 1st Qu.:0.016597 1st Qu.:0.03403 1st Qu.:0.06676
## Median :0.03454 Median :0.030255 Median :0.05577 Median :0.07671
## Mean :0.03843 Mean :0.030074 Mean :0.05217 Mean :0.07642
## 3rd Qu.:0.04904 3rd Qu.:0.040473 3rd Qu.:0.06662 3rd Qu.:0.08452
## Max. :0.06475 Max. :0.050633 Max. :0.08054 Max. :0.10596
## S T
## Min. :0.03546 Min. :0.01351
## 1st Qu.:0.04145 1st Qu.:0.02691
## Median :0.04914 Median :0.04079
## Mean :0.05493 Mean :0.04090
## 3rd Qu.:0.06453 3rd Qu.:0.05598
## Max. :0.09317 Max. :0.06618
protein_class_2_aa <- read_csv("~/Dropbox/git_projects/random_forest/4_exploratory/mono/protein_class_2_aa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_integer(),
## TotalAA = col_integer()
## )
## See spec(...) for full column specifications.
summary(protein_class_2_aa)
## Class TotalAA G P
## Min. :2 Min. :113.0 Min. :0.03676 Min. :0.02206
## 1st Qu.:2 1st Qu.:113.2 1st Qu.:0.05162 1st Qu.:0.02824
## Median :2 Median :117.0 Median :0.05646 Median :0.03419
## Mean :2 Mean :117.7 Mean :0.06162 Mean :0.03414
## 3rd Qu.:2 3rd Qu.:117.0 3rd Qu.:0.06142 3rd Qu.:0.03532
## Max. :2 Max. :136.0 Max. :0.10619 Max. :0.05128
## A V L I
## Min. :0.04386 Min. :0.02655 Min. :0.06667 Min. :0.02941
## 1st Qu.:0.05478 1st Qu.:0.03441 1st Qu.:0.07033 1st Qu.:0.03449
## Median :0.06943 Median :0.04349 Median :0.07386 Median :0.04637
## Mean :0.07529 Mean :0.04502 Mean :0.07702 Mean :0.05066
## 3rd Qu.:0.08494 3rd Qu.:0.05096 3rd Qu.:0.08333 3rd Qu.:0.06592
## Max. :0.14159 Max. :0.08824 Max. :0.09559 Max. :0.07965
## M C F Y
## Min. :0.00885 Min. :0.000000 Min. :0.02941 Min. :0.02564
## 1st Qu.:0.01102 1st Qu.:0.002083 1st Qu.:0.06838 1st Qu.:0.03419
## Median :0.02353 Median :0.008547 Median :0.07697 Median :0.04276
## Mean :0.02450 Mean :0.007845 Mean :0.07379 Mean :0.04145
## 3rd Qu.:0.03419 3rd Qu.:0.008830 3rd Qu.:0.07965 3rd Qu.:0.05088
## Max. :0.04274 Max. :0.017699 Max. :0.10256 Max. :0.05882
## W H K R
## Min. :0.01667 Min. :0.03676 Min. :0.04274 Min. :0.00000
## 1st Qu.:0.02295 1st Qu.:0.04601 1st Qu.:0.07506 1st Qu.:0.02581
## Median :0.02564 Median :0.05646 Median :0.09402 Median :0.03419
## Mean :0.02560 Mean :0.05299 Mean :0.08518 Mean :0.03292
## 3rd Qu.:0.02632 3rd Qu.:0.05983 3rd Qu.:0.09713 3rd Qu.:0.03510
## Max. :0.03540 Max. :0.06195 Max. :0.10833 Max. :0.09559
## Q N E D
## Min. :0.01471 Min. :0.02655 Min. :0.03540 Min. :0.04274
## 1st Qu.:0.03419 1st Qu.:0.04487 1st Qu.:0.05275 1st Qu.:0.05983
## Median :0.03524 Median :0.05286 Median :0.06410 Median :0.08149
## Mean :0.03699 Mean :0.05124 Mean :0.07144 Mean :0.07723
## 3rd Qu.:0.04387 3rd Qu.:0.05702 3rd Qu.:0.09252 3rd Qu.:0.09627
## Max. :0.05310 Max. :0.07692 Max. :0.11966 Max. :0.10619
## S T
## Min. :0.01709 Min. :0.01667
## 1st Qu.:0.02295 1st Qu.:0.03449
## Median :0.03037 Median :0.04031
## Mean :0.02896 Mean :0.04611
## 3rd Qu.:0.03486 3rd Qu.:0.05752
## Max. :0.04167 Max. :0.08547
protein_class_3_aa <- read_csv("~/Dropbox/git_projects/random_forest/4_exploratory/mono/protein_class_3_aa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_integer(),
## TotalAA = col_integer()
## )
## See spec(...) for full column specifications.
summary(protein_class_3_aa)
## Class TotalAA G P
## Min. :3 Min. : 7.00 Min. :0.00000 Min. :0.00000
## 1st Qu.:3 1st Qu.: 20.25 1st Qu.:0.03253 1st Qu.:0.03253
## Median :3 Median : 26.50 Median :0.05701 Median :0.04758
## Mean :3 Mean : 337.62 Mean :0.05230 Mean :0.04388
## 3rd Qu.:3 3rd Qu.: 564.50 3rd Qu.:0.06742 3rd Qu.:0.05533
## Max. :3 Max. :3408.00 Max. :0.18182 Max. :0.10000
## A V L I
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.04212 1st Qu.:0.04936 1st Qu.:0.07461 1st Qu.:0.03483
## Median :0.05510 Median :0.06469 Median :0.08951 Median :0.04718
## Mean :0.05893 Mean :0.07263 Mean :0.10708 Mean :0.04610
## 3rd Qu.:0.07143 3rd Qu.:0.08371 3rd Qu.:0.14123 3rd Qu.:0.05735
## Max. :0.25000 Max. :0.33333 Max. :0.25000 Max. :0.21429
## M C F Y
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.000000 Median :0.04799 Median :0.04047
## Mean :0.01061 Mean :0.006165 Mean :0.03852 Mean :0.03516
## 3rd Qu.:0.02170 3rd Qu.:0.013300 3rd Qu.:0.05964 3rd Qu.:0.04747
## Max. :0.09091 Max. :0.040000 Max. :0.13636 Max. :0.10000
## W H K R
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.04750 1st Qu.:0.00000
## Median :0.000000 Median :0.04881 Median :0.06230 Median :0.04374
## Mean :0.005594 Mean :0.04519 Mean :0.07243 Mean :0.03714
## 3rd Qu.:0.013459 3rd Qu.:0.06367 3rd Qu.:0.08665 3rd Qu.:0.05301
## Max. :0.024000 Max. :0.15789 Max. :0.19048 Max. :0.14286
## Q N E D
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.02929 1st Qu.:0.03497 1st Qu.:0.00000 1st Qu.:0.04821
## Median :0.05000 Median :0.04702 Median :0.05772 Median :0.07796
## Mean :0.06447 Mean :0.04728 Mean :0.04752 Mean :0.08086
## 3rd Qu.:0.08333 3rd Qu.:0.05825 3rd Qu.:0.07151 3rd Qu.:0.09821
## Max. :0.16667 Max. :0.14286 Max. :0.14286 Max. :0.27273
## S T
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.04410 1st Qu.:0.02833
## Median :0.05612 Median :0.04867
## Mean :0.06130 Mean :0.04845
## 3rd Qu.:0.07692 3rd Qu.:0.06092
## Max. :0.16667 Max. :0.18182
protein_class_4_aa <- read_csv("~/Dropbox/git_projects/random_forest/4_exploratory/mono/protein_class_4_aa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_integer(),
## TotalAA = col_integer()
## )
## See spec(...) for full column specifications.
summary(protein_class_4_aa)
## Class TotalAA G P
## Min. :4 Min. :144.0 Min. :0.04167 Min. :0.02069
## 1st Qu.:4 1st Qu.:145.0 1st Qu.:0.05337 1st Qu.:0.02759
## Median :4 Median :148.0 Median :0.06757 Median :0.03247
## Mean :4 Mean :215.8 Mean :0.06655 Mean :0.03036
## 3rd Qu.:4 3rd Qu.:154.0 3rd Qu.:0.07188 3rd Qu.:0.03314
## Max. :4 Max. :523.0 Max. :0.10134 Max. :0.03472
## A V L I
## Min. :0.08987 Min. :0.07534 Min. :0.07457 Min. :0.01351
## 1st Qu.:0.12094 1st Qu.:0.09253 1st Qu.:0.08997 1st Qu.:0.02759
## Median :0.13636 Median :0.10811 Median :0.09091 Median :0.04138
## Mean :0.13959 Mean :0.10076 Mean :0.09262 Mean :0.04032
## 3rd Qu.:0.16493 3rd Qu.:0.11039 3rd Qu.:0.09895 3rd Qu.:0.05844
## Max. :0.18621 Max. :0.11090 Max. :0.11644 Max. :0.06119
## M C F Y
## Min. :0.006849 Min. :0.000000 Min. :0.02677 Min. :0.01299
## 1st Qu.:0.006920 1st Qu.:0.000000 1st Qu.:0.04638 1st Qu.:0.02027
## Median :0.012987 Median :0.000000 Median :0.04828 Median :0.02083
## Mean :0.014701 Mean :0.001912 Mean :0.04714 Mean :0.02186
## 3rd Qu.:0.020552 3rd Qu.:0.000000 3rd Qu.:0.05300 3rd Qu.:0.02486
## Max. :0.028681 Max. :0.011472 Max. :0.06207 Max. :0.03448
## W H K R
## Min. :0.005736 Min. :0.01370 Min. :0.08031 Min. :0.006494
## 1st Qu.:0.010318 1st Qu.:0.01384 1st Qu.:0.08220 1st Qu.:0.010205
## Median :0.013514 Median :0.02027 Median :0.09459 Median :0.013699
## Mean :0.012277 Mean :0.02095 Mean :0.09057 Mean :0.014851
## 3rd Qu.:0.013841 3rd Qu.:0.02637 3rd Qu.:0.09689 3rd Qu.:0.013889
## Max. :0.019481 Max. :0.03247 Max. :0.09740 Max. :0.030593
## Q N E D
## Min. :0.02103 Min. :0.02083 Min. :0.05479 Min. :0.03247
## 1st Qu.:0.03003 1st Qu.:0.02486 1st Qu.:0.06100 1st Qu.:0.04564
## Median :0.03448 Median :0.03448 Median :0.06250 Median :0.04795
## Mean :0.03466 Mean :0.03456 Mean :0.06528 Mean :0.04914
## 3rd Qu.:0.04110 3rd Qu.:0.04545 3rd Qu.:0.06890 3rd Qu.:0.05536
## Max. :0.04795 Max. :0.04795 Max. :0.08442 Max. :0.06250
## S T
## Min. :0.04828 Min. :0.02703
## 1st Qu.:0.06287 1st Qu.:0.04152
## Median :0.07457 Median :0.04861
## Mean :0.07265 Mean :0.04926
## 3rd Qu.:0.08211 3rd Qu.:0.05657
## Max. :0.09028 Max. :0.06757
protein_class_5_aa <- read_csv("~/Dropbox/git_projects/random_forest/4_exploratory/mono/protein_class_5_aa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_integer(),
## TotalAA = col_integer()
## )
## See spec(...) for full column specifications.
summary(protein_class_5_aa)
## Class TotalAA G P
## Min. :5 Min. : 74.0 Min. :0.04027 Min. :0.006803
## 1st Qu.:5 1st Qu.:153.0 1st Qu.:0.07792 1st Qu.:0.025974
## Median :5 Median :154.0 Median :0.09091 Median :0.025974
## Mean :5 Mean :150.8 Mean :0.08522 Mean :0.028144
## 3rd Qu.:5 3rd Qu.:154.0 3rd Qu.:0.09524 3rd Qu.:0.032468
## Max. :5 Max. :378.0 Max. :0.14151 Max. :0.053691
## A V L I
## Min. :0.04698 Min. :0.02649 Min. :0.04828 Min. :0.01961
## 1st Qu.:0.08442 1st Qu.:0.03961 1st Qu.:0.10390 1st Qu.:0.04339
## Median :0.09091 Median :0.04545 Median :0.11565 Median :0.05195
## Mean :0.10066 Mean :0.04878 Mean :0.10985 Mean :0.05130
## 3rd Qu.:0.11039 3rd Qu.:0.05299 3rd Qu.:0.11688 3rd Qu.:0.05844
## Max. :0.19728 Max. :0.11966 Max. :0.15625 Max. :0.08966
## M C F Y
## Min. :0.00000 Min. :0.000000 Min. :0.02381 Min. :0.00000
## 1st Qu.:0.01948 1st Qu.:0.000000 1st Qu.:0.04545 1st Qu.:0.01299
## Median :0.02041 Median :0.000000 Median :0.04545 Median :0.01299
## Mean :0.02443 Mean :0.002208 Mean :0.04812 Mean :0.01395
## 3rd Qu.:0.02597 3rd Qu.:0.006494 3rd Qu.:0.05162 3rd Qu.:0.01299
## Max. :0.05882 Max. :0.032680 Max. :0.10417 Max. :0.03774
## W H K R
## Min. :0.00000 Min. :0.006803 Min. :0.01709 Min. :0.00000
## 1st Qu.:0.01299 1st Qu.:0.050767 1st Qu.:0.11688 1st Qu.:0.01299
## Median :0.01299 Median :0.058442 Median :0.12987 Median :0.01361
## Mean :0.01249 Mean :0.056105 Mean :0.12025 Mean :0.01746
## 3rd Qu.:0.01299 3rd Qu.:0.071429 3rd Qu.:0.12987 3rd Qu.:0.01948
## Max. :0.02759 Max. :0.090909 Max. :0.15625 Max. :0.03896
## Q N E D
## Min. :0.01361 Min. :0.00000 Min. :0.01911 Min. :0.01835
## 1st Qu.:0.02597 1st Qu.:0.01299 1st Qu.:0.07792 1st Qu.:0.04707
## Median :0.03896 Median :0.01948 Median :0.08442 Median :0.05195
## Mean :0.03947 Mean :0.02383 Mean :0.07879 Mean :0.05364
## 3rd Qu.:0.04545 3rd Qu.:0.02597 3rd Qu.:0.09091 3rd Qu.:0.05844
## Max. :0.07143 Max. :0.07438 Max. :0.11688 Max. :0.10828
## S T
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.03896 1st Qu.:0.02597
## Median :0.04545 Median :0.03247
## Mean :0.04584 Mean :0.03838
## 3rd Qu.:0.05195 3rd Qu.:0.04545
## Max. :0.12500 Max. :0.10256
protein_class_6_aa <- read_csv("~/Dropbox/git_projects/random_forest/4_exploratory/mono/protein_class_6_aa.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## Class = col_integer(),
## TotalAA = col_integer()
## )
## See spec(...) for full column specifications.
summary(protein_class_6_aa)
## Class TotalAA G P
## Min. :6 Min. : 15.0 Min. :0.00000 Min. :0.00000
## 1st Qu.:6 1st Qu.: 141.0 1st Qu.:0.06338 1st Qu.:0.02740
## Median :6 Median : 145.0 Median :0.07586 Median :0.04082
## Mean :6 Mean : 143.7 Mean :0.07339 Mean :0.03793
## 3rd Qu.:6 3rd Qu.: 146.0 3rd Qu.:0.08844 3rd Qu.:0.04930
## Max. :6 Max. :1156.0 Max. :0.11159 Max. :0.10000
## A V L I
## Min. :0.02941 Min. :0.03333 Min. :0.06667 Min. :0.000000
## 1st Qu.:0.09589 1st Qu.:0.07857 1st Qu.:0.12057 1st Qu.:0.000000
## Median :0.10959 Median :0.09220 Median :0.12329 Median :0.006803
## Mean :0.11209 Mean :0.09778 Mean :0.12544 Mean :0.008851
## 3rd Qu.:0.12766 3rd Qu.:0.11724 3rd Qu.:0.13014 3rd Qu.:0.014085
## Max. :0.40000 Max. :0.13333 Max. :0.18182 Max. :0.111111
## M C F Y
## Min. :0.000000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.006849 1st Qu.:0.006897 1st Qu.:0.04965 1st Qu.:0.01418
## Median :0.013605 Median :0.007092 Median :0.05479 Median :0.02055
## Mean :0.012146 Mean :0.010325 Mean :0.05435 Mean :0.02022
## 3rd Qu.:0.014184 3rd Qu.:0.013699 3rd Qu.:0.05674 3rd Qu.:0.02128
## Max. :0.066667 Max. :0.064014 Max. :0.07092 Max. :0.05263
## W H K R
## Min. :0.000000 Min. :0.00000 Min. :0.03521 Min. :0.00000
## 1st Qu.:0.007092 1st Qu.:0.05479 1st Qu.:0.07534 1st Qu.:0.02055
## Median :0.013605 Median :0.06164 Median :0.07801 Median :0.02128
## Mean :0.011644 Mean :0.06137 Mean :0.08010 Mean :0.02275
## 3rd Qu.:0.013699 3rd Qu.:0.07092 3rd Qu.:0.08451 3rd Qu.:0.02721
## Max. :0.094118 Max. :0.08571 Max. :0.26667 Max. :0.07692
## Q N E D
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.007092 1st Qu.:0.02128 1st Qu.:0.02817 1st Qu.:0.04795
## Median :0.020548 Median :0.03546 Median :0.04255 Median :0.05674
## Mean :0.020088 Mean :0.03614 Mean :0.04110 Mean :0.05599
## 3rd Qu.:0.027397 3rd Qu.:0.04795 3rd Qu.:0.05479 3rd Qu.:0.06383
## Max. :0.088235 Max. :0.08772 Max. :0.10000 Max. :0.08511
## S T
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.04762 1st Qu.:0.03425
## Median :0.06863 Median :0.04828
## Mean :0.06536 Mean :0.05075
## 3rd Qu.:0.08451 3rd Qu.:0.06383
## Max. :0.11348 Max. :0.10563
setwd("~/Dropbox/git_projects/random_forest/data")
boxplot(protein_class_0_aa$TotalAA, protein_class_1_aa$TotalAA,
protein_class_2_aa$TotalAA, protein_class_3_aa$TotalAA,
protein_class_4_aa$TotalAA, protein_class_5_aa$TotalAA,
protein_class_6_aa$TotalAA,
log = "y", horizontal = FALSE,
names = c("homosapiens", "erythrocruorin", "hemerythrin",
"hemocyanin", "leghemoglobin", "myoglobin", "hemoglobin"),
main = "Total AA Versus Protein Category",
ylab = "Total Amino Acids per Protein")
# Save copy to file.
svg(filename="boxplot_total_aa_vs_protein_category.svg")
boxplot(protein_class_0_aa$TotalAA, protein_class_1_aa$TotalAA,
protein_class_2_aa$TotalAA, protein_class_3_aa$TotalAA,
protein_class_4_aa$TotalAA, protein_class_5_aa$TotalAA,
protein_class_6_aa$TotalAA,
log = "y", horizontal = FALSE,
names = c("homosapiens", "erythrocruorin", "hemerythrin",
"hemocyanin", "leghemoglobin", "myoglobin", "hemoglobin"),
main = "Total AA Versus Protein Category",
ylab = "Total Amino Acids per Protein")
dev.off()
## png
## 2
setwd("~/Dropbox/git_projects/random_forest/2_single_aa")
trimmed_homo_sapien_class_0 = subset(protein_class_0_aa, protein_class_0_aa[,2] > 27)
write.csv(x = trimmed_homo_sapien_class_0,
file = "trimmed_homo_sapien_class_0.csv",
row.names = FALSE)
trimmed_hemocyanin_class_4 = subset(protein_class_4_aa, protein_class_4_aa[,2] > 27)
write.csv(x = trimmed_hemocyanin_class_4,
file = "trimmed_hemocyanin_class_4.csv",
row.names = FALSE)
trimmed_hemoglobin_class_6 = subset(protein_class_6_aa, protein_class_6_aa[,2] > 27)
write.csv(x = trimmed_hemoglobin_class_6,
file = "trimmed_hemoglobin_class_6.csv",
row.names = FALSE)
setwd("~/Dropbox/git_projects/random_forest/data")
boxplot(trimmed_homo_sapien_class_0$TotalAA, protein_class_1_aa$TotalAA,
protein_class_2_aa$TotalAA, protein_class_3_aa$TotalAA,
trimmed_hemocyanin_class_4$TotalAA, protein_class_5_aa$TotalAA,
trimmed_hemoglobin_class_6$TotalAA,
log = "y", horizontal = FALSE,
names = c("homosapiens", "erythrocruorin", "hemerythrin",
"hemocyanin", "leghemoglobin", "myoglobin", "hemoglobin"),
main = "Total AA Versus Protein Category",
ylab = "Total Amino Acids per Protein")
# Save copy to file.
svg(filename="boxplot_total_aa_vs_protein_category.svg")
boxplot(trimmed_homo_sapien_class_0$TotalAA, protein_class_1_aa$TotalAA,
protein_class_2_aa$TotalAA, protein_class_3_aa$TotalAA,
trimmed_hemocyanin_class_4$TotalAA, protein_class_5_aa$TotalAA,
trimmed_hemoglobin_class_6$TotalAA,
log = "y", horizontal = FALSE,
names = c("homosapiens", "erythrocruorin", "hemerythrin",
"hemocyanin", "leghemoglobin", "myoglobin", "hemoglobin"),
main = "Total AA Versus Protein Category",
ylab = "Total Amino Acids per Protein")
dev.off()
## png
## 2
summary(trimmed_homo_sapien_class_0)
## Class TotalAA G P
## Min. :0 Min. : 31.0 Min. :0.00000 Min. :0.00000
## 1st Qu.:0 1st Qu.: 272.0 1st Qu.:0.05032 1st Qu.:0.04245
## Median :0 Median : 448.0 Median :0.06435 Median :0.05575
## Mean :0 Mean : 608.5 Mean :0.06730 Mean :0.06199
## 3rd Qu.:0 3rd Qu.: 721.0 3rd Qu.:0.07968 3rd Qu.:0.07476
## Max. :0 Max. :34350.0 Max. :0.46474 Max. :0.39241
## A V L I
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.05464 1st Qu.:0.04831 1st Qu.:0.08024 1st Qu.:0.02982
## Median :0.06886 Median :0.06008 Median :0.09712 Median :0.04281
## Mean :0.07231 Mean :0.06066 Mean :0.09841 Mean :0.04359
## 3rd Qu.:0.08597 3rd Qu.:0.07200 3rd Qu.:0.11529 3rd Qu.:0.05578
## Max. :0.30723 Max. :0.18852 Max. :0.32323 Max. :0.21538
## M C F Y
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.01557 1st Qu.:0.01232 1st Qu.:0.02597 1st Qu.:0.01874
## Median :0.02135 Median :0.01931 Median :0.03556 Median :0.02682
## Mean :0.02270 Mean :0.02362 Mean :0.03700 Mean :0.02793
## 3rd Qu.:0.02794 3rd Qu.:0.02820 3rd Qu.:0.04600 3rd Qu.:0.03571
## Max. :0.13836 Max. :0.36816 Max. :0.17391 Max. :0.24194
## W H K R
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.006523 1st Qu.:0.01750 1st Qu.:0.03894 1st Qu.:0.04399
## Median :0.011364 Median :0.02392 Median :0.05521 Median :0.05556
## Mean :0.012874 Mean :0.02518 Mean :0.05772 Mean :0.05860
## 3rd Qu.:0.017467 3rd Qu.:0.03102 3rd Qu.:0.07246 3rd Qu.:0.06960
## Max. :0.232877 Max. :0.13725 Max. :0.31250 Max. :0.47059
## Q N E D
## Min. :0.00000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.03425 1st Qu.:0.02484 1st Qu.:0.05208 1st Qu.:0.03729
## Median :0.04405 Median :0.03488 Median :0.06693 Median :0.04750
## Mean :0.04618 Mean :0.03554 Mean :0.06942 Mean :0.04764
## 3rd Qu.:0.05492 3rd Qu.:0.04502 3rd Qu.:0.08309 3rd Qu.:0.05714
## Max. :0.98750 Max. :0.13000 Max. :0.38235 Max. :0.19908
## S T
## Min. :0.00000 Min. :0.00000
## 1st Qu.:0.06195 1st Qu.:0.04197
## Median :0.07568 Median :0.05117
## Mean :0.07879 Mean :0.05254
## 3rd Qu.:0.09232 3rd Qu.:0.06103
## Max. :0.41660 Max. :0.34949
summary(trimmed_hemocyanin_class_4)
## Class TotalAA G P
## Min. :4 Min. :144.0 Min. :0.04167 Min. :0.02069
## 1st Qu.:4 1st Qu.:145.0 1st Qu.:0.05337 1st Qu.:0.02759
## Median :4 Median :148.0 Median :0.06757 Median :0.03247
## Mean :4 Mean :215.8 Mean :0.06655 Mean :0.03036
## 3rd Qu.:4 3rd Qu.:154.0 3rd Qu.:0.07188 3rd Qu.:0.03314
## Max. :4 Max. :523.0 Max. :0.10134 Max. :0.03472
## A V L I
## Min. :0.08987 Min. :0.07534 Min. :0.07457 Min. :0.01351
## 1st Qu.:0.12094 1st Qu.:0.09253 1st Qu.:0.08997 1st Qu.:0.02759
## Median :0.13636 Median :0.10811 Median :0.09091 Median :0.04138
## Mean :0.13959 Mean :0.10076 Mean :0.09262 Mean :0.04032
## 3rd Qu.:0.16493 3rd Qu.:0.11039 3rd Qu.:0.09895 3rd Qu.:0.05844
## Max. :0.18621 Max. :0.11090 Max. :0.11644 Max. :0.06119
## M C F Y
## Min. :0.006849 Min. :0.000000 Min. :0.02677 Min. :0.01299
## 1st Qu.:0.006920 1st Qu.:0.000000 1st Qu.:0.04638 1st Qu.:0.02027
## Median :0.012987 Median :0.000000 Median :0.04828 Median :0.02083
## Mean :0.014701 Mean :0.001912 Mean :0.04714 Mean :0.02186
## 3rd Qu.:0.020552 3rd Qu.:0.000000 3rd Qu.:0.05300 3rd Qu.:0.02486
## Max. :0.028681 Max. :0.011472 Max. :0.06207 Max. :0.03448
## W H K R
## Min. :0.005736 Min. :0.01370 Min. :0.08031 Min. :0.006494
## 1st Qu.:0.010318 1st Qu.:0.01384 1st Qu.:0.08220 1st Qu.:0.010205
## Median :0.013514 Median :0.02027 Median :0.09459 Median :0.013699
## Mean :0.012277 Mean :0.02095 Mean :0.09057 Mean :0.014851
## 3rd Qu.:0.013841 3rd Qu.:0.02637 3rd Qu.:0.09689 3rd Qu.:0.013889
## Max. :0.019481 Max. :0.03247 Max. :0.09740 Max. :0.030593
## Q N E D
## Min. :0.02103 Min. :0.02083 Min. :0.05479 Min. :0.03247
## 1st Qu.:0.03003 1st Qu.:0.02486 1st Qu.:0.06100 1st Qu.:0.04564
## Median :0.03448 Median :0.03448 Median :0.06250 Median :0.04795
## Mean :0.03466 Mean :0.03456 Mean :0.06528 Mean :0.04914
## 3rd Qu.:0.04110 3rd Qu.:0.04545 3rd Qu.:0.06890 3rd Qu.:0.05536
## Max. :0.04795 Max. :0.04795 Max. :0.08442 Max. :0.06250
## S T
## Min. :0.04828 Min. :0.02703
## 1st Qu.:0.06287 1st Qu.:0.04152
## Median :0.07457 Median :0.04861
## Mean :0.07265 Mean :0.04926
## 3rd Qu.:0.08211 3rd Qu.:0.05657
## Max. :0.09028 Max. :0.06757
summary(trimmed_hemoglobin_class_6)
## Class TotalAA G P
## Min. :6 Min. : 30.0 Min. :0.00000 Min. :0.01370
## 1st Qu.:6 1st Qu.: 141.0 1st Qu.:0.06230 1st Qu.:0.02740
## Median :6 Median : 145.0 Median :0.07586 Median :0.04082
## Mean :6 Mean : 144.4 Mean :0.07343 Mean :0.03796
## 3rd Qu.:6 3rd Qu.: 146.0 3rd Qu.:0.08844 3rd Qu.:0.04916
## Max. :6 Max. :1156.0 Max. :0.11159 Max. :0.10000
## A V L I
## Min. :0.02941 Min. :0.03333 Min. :0.07353 Min. :0.000000
## 1st Qu.:0.09589 1st Qu.:0.07829 1st Qu.:0.12057 1st Qu.:0.000000
## Median :0.10959 Median :0.09220 Median :0.12329 Median :0.006803
## Mean :0.11111 Mean :0.09759 Mean :0.12557 Mean :0.008896
## 3rd Qu.:0.12766 3rd Qu.:0.11724 3rd Qu.:0.13014 3rd Qu.:0.014085
## Max. :0.16901 Max. :0.13103 Max. :0.18182 Max. :0.111111
## M C F Y
## Min. :0.000000 Min. :0.000000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.006849 1st Qu.:0.006897 1st Qu.:0.04965 1st Qu.:0.01418
## Median :0.013605 Median :0.007092 Median :0.05479 Median :0.02055
## Mean :0.012036 Mean :0.010378 Mean :0.05463 Mean :0.02033
## 3rd Qu.:0.014184 3rd Qu.:0.013699 3rd Qu.:0.05674 3rd Qu.:0.02128
## Max. :0.066667 Max. :0.064014 Max. :0.07092 Max. :0.05263
## W H K R
## Min. :0.000000 Min. :0.00000 Min. :0.03521 Min. :0.00000
## 1st Qu.:0.007092 1st Qu.:0.05479 1st Qu.:0.07534 1st Qu.:0.02055
## Median :0.013605 Median :0.06164 Median :0.07801 Median :0.02128
## Mean :0.011705 Mean :0.06152 Mean :0.07948 Mean :0.02287
## 3rd Qu.:0.013699 3rd Qu.:0.07092 3rd Qu.:0.08363 3rd Qu.:0.02721
## Max. :0.094118 Max. :0.08571 Max. :0.09929 Max. :0.07692
## Q N E D
## Min. :0.000000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.007092 1st Qu.:0.02128 1st Qu.:0.02817 1st Qu.:0.04795
## Median :0.020548 Median :0.03546 Median :0.04706 Median :0.05674
## Mean :0.020192 Mean :0.03632 Mean :0.04132 Mean :0.05628
## 3rd Qu.:0.027397 3rd Qu.:0.04795 3rd Qu.:0.05479 3rd Qu.:0.06383
## Max. :0.088235 Max. :0.08772 Max. :0.10000 Max. :0.08511
## S T
## Min. :0.02041 Min. :0.006803
## 1st Qu.:0.04762 1st Qu.:0.034247
## Median :0.07042 Median :0.049296
## Mean :0.06553 Mean :0.051013
## 3rd Qu.:0.08451 3rd Qu.:0.063830
## Max. :0.11348 Max. :0.105634