Chetan Kumar
This is an R Markdown document on sourcing data from the MariaDB a mySQL server with R.
# Install RMySQL
library(RMySQL)## Warning: package 'RMySQL' was built under R version 4.0.4
## Loading required package: DBI
## Warning: package 'DBI' was built under R version 4.0.4
# dbConnect is used to connect to various kind of databases like mysql
# A connection is opened and is assigned to ucscDB
ucscDB <- dbConnect(MySQL(), user = "genome", host = "genome-mysql.cse.ucsc.edu")
# "show database" is a my SQL command which is run by R through dbGetQuery function
result <- dbGetQuery(ucscDB, "show databases;");
#dbDisconnect is used to disconnect from mysql server
dbDisconnect(ucscDB);## [1] TRUE
result## Database
## 1 acaChl1
## 2 ailMel1
## 3 allMis1
## 4 allSin1
## 5 amaVit1
## 6 anaPla1
## 7 ancCey1
## 8 angJap1
## 9 anoCar1
## 10 anoCar2
## 11 anoGam1
## 12 anoGam3
## 13 apaSpi1
## 14 apaVit1
## 15 apiMel1
## 16 apiMel2
## 17 aplCal1
## 18 aptFor1
## 19 aptMan1
## 20 aquChr2
## 21 araMac1
## 22 ascSuu1
## 23 balAcu1
## 24 balPav1
## 25 bisBis1
## 26 bosTau2
## 27 bosTau3
## 28 bosTau4
## 29 bosTau5
## 30 bosTau6
## 31 bosTau7
## 32 bosTau8
## 33 bosTau9
## 34 bosTauMd3
## 35 braFlo1
## 36 bruMal2
## 37 bucRhi1
## 38 burXyl1
## 39 caeAng2
## 40 caeJap1
## 41 caeJap4
## 42 caePb1
## 43 caePb2
## 44 caePb3
## 45 caeRem2
## 46 caeRem3
## 47 caeRem4
## 48 caeSp111
## 49 caeSp51
## 50 calAnn1
## 51 calJac1
## 52 calJac3
## 53 calJac4
## 54 calMil1
## 55 canFam1
## 56 canFam2
## 57 canFam3
## 58 canFam4
## 59 canFam5
## 60 capCar1
## 61 carCri1
## 62 cavPor3
## 63 cb1
## 64 cb3
## 65 cb4
## 66 ce10
## 67 ce11
## 68 ce2
## 69 ce4
## 70 ce6
## 71 cerSim1
## 72 chaVoc2
## 73 cheMyd1
## 74 chlSab2
## 75 chlUnd1
## 76 choHof1
## 77 chrPic1
## 78 chrPic2
## 79 ci1
## 80 ci2
## 81 ci3
## 82 colLiv1
## 83 colStr1
## 84 corBra1
## 85 corCor1
## 86 cotJap2
## 87 criGri1
## 88 criGriChoV1
## 89 criGriChoV2
## 90 cucCan1
## 91 danRer1
## 92 danRer10
## 93 danRer11
## 94 danRer2
## 95 danRer3
## 96 danRer4
## 97 danRer5
## 98 danRer6
## 99 danRer7
## 100 dasNov3
## 101 dipOrd1
## 102 dirImm1
## 103 dm1
## 104 dm2
## 105 dm3
## 106 dm6
## 107 dp2
## 108 dp3
## 109 droAna1
## 110 droAna2
## 111 droEre1
## 112 droGri1
## 113 droMoj1
## 114 droMoj2
## 115 droPer1
## 116 droSec1
## 117 droSim1
## 118 droSim2
## 119 droVir1
## 120 droVir2
## 121 droYak1
## 122 droYak2
## 123 eboVir3
## 124 echTel1
## 125 echTel2
## 126 egrGar1
## 127 enhLutNer1
## 128 equCab1
## 129 equCab2
## 130 equCab3
## 131 eriEur1
## 132 eriEur2
## 133 eurHel1
## 134 falChe1
## 135 falPer1
## 136 felCat3
## 137 felCat4
## 138 felCat5
## 139 felCat8
## 140 felCat9
## 141 ficAlb2
## 142 fr1
## 143 fr2
## 144 fr3
## 145 fulGla1
## 146 gadMor1
## 147 galGal2
## 148 galGal3
## 149 galGal4
## 150 galGal5
## 151 galGal6
## 152 galVar1
## 153 gasAcu1
## 154 gavSte1
## 155 gbMeta
## 156 geoFor1
## 157 go
## 158 go080130
## 159 go140213
## 160 go150121
## 161 go180426
## 162 gorGor3
## 163 gorGor4
## 164 gorGor5
## 165 gorGor6
## 166 haeCon2
## 167 halAlb1
## 168 halLeu1
## 169 hetBac1
## 170 hetGla1
## 171 hetGla2
## 172 hg16
## 173 hg17
## 174 hg18
## 175 hg19
## 176 hg19Patch10
## 177 hg19Patch13
## 178 hg38
## 179 hg38Patch11
## 180 hgFixed
## 181 hgcentral
## 182 information_schema
## 183 latCha1
## 184 lepDis1
## 185 letCam1
## 186 loaLoa1
## 187 loxAfr3
## 188 macEug1
## 189 macEug2
## 190 macFas5
## 191 manPen1
## 192 melGal1
## 193 melGal5
## 194 melHap1
## 195 melInc2
## 196 melUnd1
## 197 merNub1
## 198 mesUni1
## 199 micMur1
## 200 micMur2
## 201 mm10
## 202 mm10Patch4
## 203 mm39
## 204 mm5
## 205 mm6
## 206 mm7
## 207 mm8
## 208 mm9
## 209 monDom1
## 210 monDom4
## 211 monDom5
## 212 musFur1
## 213 myoLuc2
## 214 nanPar1
## 215 nasLar1
## 216 necAme1
## 217 neoSch1
## 218 nipNip1
## 219 nomLeu1
## 220 nomLeu2
## 221 nomLeu3
## 222 ochPri2
## 223 ochPri3
## 224 oncVol1
## 225 opiHoa1
## 226 oreNil1
## 227 oreNil2
## 228 oreNil3
## 229 ornAna1
## 230 ornAna2
## 231 oryCun2
## 232 oryLat2
## 233 otoGar3
## 234 oviAri1
## 235 oviAri3
## 236 oviAri4
## 237 panPan1
## 238 panPan2
## 239 panPan3
## 240 panRed1
## 241 panTro1
## 242 panTro2
## 243 panTro3
## 244 panTro4
## 245 panTro5
## 246 panTro6
## 247 papAnu2
## 248 papAnu4
## 249 papHam1
## 250 pelCri1
## 251 pelSin1
## 252 performance_schema
## 253 petMar1
## 254 petMar2
## 255 petMar3
## 256 phaCar1
## 257 phaLep1
## 258 phoRub1
## 259 picPub1
## 260 ponAbe2
## 261 ponAbe3
## 262 priExs1
## 263 priPac1
## 264 priPac3
## 265 proCap1
## 266 proteins120806
## 267 proteins121210
## 268 proteins140122
## 269 proteins150225
## 270 proteins160229
## 271 proteins180404
## 272 proteome
## 273 pteGut1
## 274 pteVam1
## 275 pygAde1
## 276 pytBiv1
## 277 rheMac1
## 278 rheMac10
## 279 rheMac2
## 280 rheMac3
## 281 rheMac8
## 282 rhiRox1
## 283 rn3
## 284 rn4
## 285 rn5
## 286 rn6
## 287 sacCer1
## 288 sacCer2
## 289 sacCer3
## 290 saiBol1
## 291 sarHar1
## 292 serCan1
## 293 sorAra1
## 294 sorAra2
## 295 sp120323
## 296 sp121210
## 297 sp140122
## 298 sp150225
## 299 sp160229
## 300 sp180404
## 301 speTri2
## 302 strCam1
## 303 strPur1
## 304 strPur2
## 305 strRat2
## 306 susScr11
## 307 susScr2
## 308 susScr3
## 309 taeGut1
## 310 taeGut2
## 311 tarSyr1
## 312 tarSyr2
## 313 tauEry1
## 314 tetNig1
## 315 tetNig2
## 316 thaSir1
## 317 tinGut2
## 318 triMan1
## 319 triSpi1
## 320 triSui1
## 321 tupBel1
## 322 turTru2
## 323 tytAlb1
## 324 uniProt
## 325 vicPac1
## 326 vicPac2
## 327 visiGene
## 328 wuhCor1
## 329 xenLae2
## 330 xenTro1
## 331 xenTro2
## 332 xenTro3
## 333 xenTro7
## 334 xenTro9
## 335 zonAlb1
# Connect to hg19 table
hg19 <- dbConnect(MySQL(),user = "genome", db = "hg19", host = "genome-mysql.cse.ucsc.edu")
# List all tables
allTables <- dbListTables(hg19)
# Number of tables
length(allTables)## [1] 12518
allTables[1:5]## [1] "HInv" "HInvGeneMrna" "acembly" "acemblyClass" "acemblyPep"
# List all Fields from "affyU133Plus2" table in "hg19" database
dbListFields(hg19, "affyU133Plus2")## [1] "bin" "matches" "misMatches" "repMatches" "nCount"
## [6] "qNumInsert" "qBaseInsert" "tNumInsert" "tBaseInsert" "strand"
## [11] "qName" "qSize" "qStart" "qEnd" "tName"
## [16] "tSize" "tStart" "tEnd" "blockCount" "blockSizes"
## [21] "qStarts" "tStarts"
# To send a query to database use dbGetQuery
dbGetQuery(hg19, "Select count(*) from affyU133Plus2")## count(*)
## 1 58463
# To Read data from the table
affyData <- dbReadTable(hg19, "affyU133Plus2")## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18 imported as
## numeric
head(affyData)## bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert
## 1 585 530 4 0 23 3 41 3
## 2 585 3355 17 0 109 9 67 9
## 3 585 4156 14 0 83 16 18 2
## 4 585 4667 9 0 68 21 42 3
## 5 585 5180 14 0 167 10 38 1
## 6 585 468 5 0 14 0 0 0
## tBaseInsert strand qName qSize qStart qEnd tName tSize tStart
## 1 898 - 225995_x_at 637 5 603 chr1 249250621 14361
## 2 11621 - 225035_x_at 3635 0 3548 chr1 249250621 14381
## 3 93 - 226340_x_at 4318 3 4274 chr1 249250621 14399
## 4 5743 - 1557034_s_at 4834 48 4834 chr1 249250621 14406
## 5 29 - 231811_at 5399 0 5399 chr1 249250621 19688
## 6 0 - 236841_at 487 0 487 chr1 249250621 27542
## tEnd blockCount
## 1 15816 5
## 2 29483 17
## 3 18745 18
## 4 24893 23
## 5 25078 11
## 6 28029 1
## blockSizes
## 1 93,144,229,70,21,
## 2 73,375,71,165,303,360,198,661,201,1,260,250,74,73,98,155,163,
## 3 690,10,32,33,376,4,5,15,5,11,7,41,277,859,141,51,443,1253,
## 4 99,352,286,24,49,14,6,5,8,149,14,44,98,12,10,355,837,59,8,1500,133,624,58,
## 5 131,26,1300,6,4,11,4,7,358,3359,155,
## 6 487,
## qStarts
## 1 34,132,278,541,611,
## 2 87,165,540,647,818,1123,1484,1682,2343,2545,2546,2808,3058,3133,3206,3317,3472,
## 3 44,735,746,779,813,1190,1195,1201,1217,1223,1235,1243,1285,1564,2423,2565,2617,3062,
## 4 0,99,452,739,764,814,829,836,842,851,1001,1016,1061,1160,1173,1184,1540,2381,2441,2450,3951,4103,4728,
## 5 0,132,159,1460,1467,1472,1484,1489,1497,1856,5244,
## 6 0,
## tStarts
## 1 14361,14454,14599,14968,15795,
## 2 14381,14454,14969,15075,15240,15543,15903,16104,16853,17054,17232,17492,17914,17988,18267,24736,29320,
## 3 14399,15089,15099,15131,15164,15540,15544,15549,15564,15569,15580,15587,15628,15906,16857,16998,17049,17492,
## 4 14406,20227,20579,20865,20889,20938,20952,20958,20963,20971,21120,21134,21178,21276,21288,21298,21653,22492,22551,22559,24059,24211,24835,
## 5 19688,19819,19845,21145,21151,21155,21166,21170,21177,21535,24923,
## 6 27542,
# Select a specific subset
query <- dbSendQuery(hg19, "select * from affyU133Plus2 where misMatches between 1 and 3")## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17 imported as
## numeric
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18 imported as
## numeric
affyMis <- fetch(query);
quantile(affyMis$misMatches)## 0% 25% 50% 75% 100%
## 1 1 2 2 3
# To fetch first 10 rows set n = 10
affyMisSmall <- fetch(query, n = 10);
# Use dbClearResult to clear query send to my sql server through dbSendQuery
dbClearResult(query)## [1] TRUE
dim(affyMisSmall)## [1] 10 22
dbDisconnect(hg19)## [1] TRUE