Reading Data Soures from MySQL in R Programming

Chetan Kumar

Connecting to a SQL server with R:

This is an R Markdown document on sourcing data from the MariaDB a mySQL server with R.

# Install RMySQL
library(RMySQL)
## Warning: package 'RMySQL' was built under R version 4.0.4

## Loading required package: DBI

## Warning: package 'DBI' was built under R version 4.0.4
# dbConnect is used to connect to various kind of databases like mysql
# A connection is opened and is assigned to ucscDB
ucscDB <- dbConnect(MySQL(), user = "genome", host = "genome-mysql.cse.ucsc.edu")
# "show database" is a my SQL command which is run by R through dbGetQuery function
result <- dbGetQuery(ucscDB, "show databases;"); 
#dbDisconnect is used to disconnect from mysql server
dbDisconnect(ucscDB);
## [1] TRUE
result
##               Database
## 1              acaChl1
## 2              ailMel1
## 3              allMis1
## 4              allSin1
## 5              amaVit1
## 6              anaPla1
## 7              ancCey1
## 8              angJap1
## 9              anoCar1
## 10             anoCar2
## 11             anoGam1
## 12             anoGam3
## 13             apaSpi1
## 14             apaVit1
## 15             apiMel1
## 16             apiMel2
## 17             aplCal1
## 18             aptFor1
## 19             aptMan1
## 20             aquChr2
## 21             araMac1
## 22             ascSuu1
## 23             balAcu1
## 24             balPav1
## 25             bisBis1
## 26             bosTau2
## 27             bosTau3
## 28             bosTau4
## 29             bosTau5
## 30             bosTau6
## 31             bosTau7
## 32             bosTau8
## 33             bosTau9
## 34           bosTauMd3
## 35             braFlo1
## 36             bruMal2
## 37             bucRhi1
## 38             burXyl1
## 39             caeAng2
## 40             caeJap1
## 41             caeJap4
## 42              caePb1
## 43              caePb2
## 44              caePb3
## 45             caeRem2
## 46             caeRem3
## 47             caeRem4
## 48            caeSp111
## 49             caeSp51
## 50             calAnn1
## 51             calJac1
## 52             calJac3
## 53             calJac4
## 54             calMil1
## 55             canFam1
## 56             canFam2
## 57             canFam3
## 58             canFam4
## 59             canFam5
## 60             capCar1
## 61             carCri1
## 62             cavPor3
## 63                 cb1
## 64                 cb3
## 65                 cb4
## 66                ce10
## 67                ce11
## 68                 ce2
## 69                 ce4
## 70                 ce6
## 71             cerSim1
## 72             chaVoc2
## 73             cheMyd1
## 74             chlSab2
## 75             chlUnd1
## 76             choHof1
## 77             chrPic1
## 78             chrPic2
## 79                 ci1
## 80                 ci2
## 81                 ci3
## 82             colLiv1
## 83             colStr1
## 84             corBra1
## 85             corCor1
## 86             cotJap2
## 87             criGri1
## 88         criGriChoV1
## 89         criGriChoV2
## 90             cucCan1
## 91             danRer1
## 92            danRer10
## 93            danRer11
## 94             danRer2
## 95             danRer3
## 96             danRer4
## 97             danRer5
## 98             danRer6
## 99             danRer7
## 100            dasNov3
## 101            dipOrd1
## 102            dirImm1
## 103                dm1
## 104                dm2
## 105                dm3
## 106                dm6
## 107                dp2
## 108                dp3
## 109            droAna1
## 110            droAna2
## 111            droEre1
## 112            droGri1
## 113            droMoj1
## 114            droMoj2
## 115            droPer1
## 116            droSec1
## 117            droSim1
## 118            droSim2
## 119            droVir1
## 120            droVir2
## 121            droYak1
## 122            droYak2
## 123            eboVir3
## 124            echTel1
## 125            echTel2
## 126            egrGar1
## 127         enhLutNer1
## 128            equCab1
## 129            equCab2
## 130            equCab3
## 131            eriEur1
## 132            eriEur2
## 133            eurHel1
## 134            falChe1
## 135            falPer1
## 136            felCat3
## 137            felCat4
## 138            felCat5
## 139            felCat8
## 140            felCat9
## 141            ficAlb2
## 142                fr1
## 143                fr2
## 144                fr3
## 145            fulGla1
## 146            gadMor1
## 147            galGal2
## 148            galGal3
## 149            galGal4
## 150            galGal5
## 151            galGal6
## 152            galVar1
## 153            gasAcu1
## 154            gavSte1
## 155             gbMeta
## 156            geoFor1
## 157                 go
## 158           go080130
## 159           go140213
## 160           go150121
## 161           go180426
## 162            gorGor3
## 163            gorGor4
## 164            gorGor5
## 165            gorGor6
## 166            haeCon2
## 167            halAlb1
## 168            halLeu1
## 169            hetBac1
## 170            hetGla1
## 171            hetGla2
## 172               hg16
## 173               hg17
## 174               hg18
## 175               hg19
## 176        hg19Patch10
## 177        hg19Patch13
## 178               hg38
## 179        hg38Patch11
## 180            hgFixed
## 181          hgcentral
## 182 information_schema
## 183            latCha1
## 184            lepDis1
## 185            letCam1
## 186            loaLoa1
## 187            loxAfr3
## 188            macEug1
## 189            macEug2
## 190            macFas5
## 191            manPen1
## 192            melGal1
## 193            melGal5
## 194            melHap1
## 195            melInc2
## 196            melUnd1
## 197            merNub1
## 198            mesUni1
## 199            micMur1
## 200            micMur2
## 201               mm10
## 202         mm10Patch4
## 203               mm39
## 204                mm5
## 205                mm6
## 206                mm7
## 207                mm8
## 208                mm9
## 209            monDom1
## 210            monDom4
## 211            monDom5
## 212            musFur1
## 213            myoLuc2
## 214            nanPar1
## 215            nasLar1
## 216            necAme1
## 217            neoSch1
## 218            nipNip1
## 219            nomLeu1
## 220            nomLeu2
## 221            nomLeu3
## 222            ochPri2
## 223            ochPri3
## 224            oncVol1
## 225            opiHoa1
## 226            oreNil1
## 227            oreNil2
## 228            oreNil3
## 229            ornAna1
## 230            ornAna2
## 231            oryCun2
## 232            oryLat2
## 233            otoGar3
## 234            oviAri1
## 235            oviAri3
## 236            oviAri4
## 237            panPan1
## 238            panPan2
## 239            panPan3
## 240            panRed1
## 241            panTro1
## 242            panTro2
## 243            panTro3
## 244            panTro4
## 245            panTro5
## 246            panTro6
## 247            papAnu2
## 248            papAnu4
## 249            papHam1
## 250            pelCri1
## 251            pelSin1
## 252 performance_schema
## 253            petMar1
## 254            petMar2
## 255            petMar3
## 256            phaCar1
## 257            phaLep1
## 258            phoRub1
## 259            picPub1
## 260            ponAbe2
## 261            ponAbe3
## 262            priExs1
## 263            priPac1
## 264            priPac3
## 265            proCap1
## 266     proteins120806
## 267     proteins121210
## 268     proteins140122
## 269     proteins150225
## 270     proteins160229
## 271     proteins180404
## 272           proteome
## 273            pteGut1
## 274            pteVam1
## 275            pygAde1
## 276            pytBiv1
## 277            rheMac1
## 278           rheMac10
## 279            rheMac2
## 280            rheMac3
## 281            rheMac8
## 282            rhiRox1
## 283                rn3
## 284                rn4
## 285                rn5
## 286                rn6
## 287            sacCer1
## 288            sacCer2
## 289            sacCer3
## 290            saiBol1
## 291            sarHar1
## 292            serCan1
## 293            sorAra1
## 294            sorAra2
## 295           sp120323
## 296           sp121210
## 297           sp140122
## 298           sp150225
## 299           sp160229
## 300           sp180404
## 301            speTri2
## 302            strCam1
## 303            strPur1
## 304            strPur2
## 305            strRat2
## 306           susScr11
## 307            susScr2
## 308            susScr3
## 309            taeGut1
## 310            taeGut2
## 311            tarSyr1
## 312            tarSyr2
## 313            tauEry1
## 314            tetNig1
## 315            tetNig2
## 316            thaSir1
## 317            tinGut2
## 318            triMan1
## 319            triSpi1
## 320            triSui1
## 321            tupBel1
## 322            turTru2
## 323            tytAlb1
## 324            uniProt
## 325            vicPac1
## 326            vicPac2
## 327           visiGene
## 328            wuhCor1
## 329            xenLae2
## 330            xenTro1
## 331            xenTro2
## 332            xenTro3
## 333            xenTro7
## 334            xenTro9
## 335            zonAlb1
# Connect to hg19 table
hg19 <- dbConnect(MySQL(),user = "genome", db = "hg19", host = "genome-mysql.cse.ucsc.edu")
# List all tables
allTables <- dbListTables(hg19)
# Number of tables
length(allTables)
## [1] 12518
allTables[1:5]
## [1] "HInv"         "HInvGeneMrna" "acembly"      "acemblyClass" "acemblyPep"
# List all Fields from "affyU133Plus2" table in "hg19" database
dbListFields(hg19, "affyU133Plus2")
##  [1] "bin"         "matches"     "misMatches"  "repMatches"  "nCount"     
##  [6] "qNumInsert"  "qBaseInsert" "tNumInsert"  "tBaseInsert" "strand"     
## [11] "qName"       "qSize"       "qStart"      "qEnd"        "tName"      
## [16] "tSize"       "tStart"      "tEnd"        "blockCount"  "blockSizes" 
## [21] "qStarts"     "tStarts"
# To send a query to database use dbGetQuery
dbGetQuery(hg19, "Select count(*) from affyU133Plus2")
##   count(*)
## 1    58463
# To Read data from the table
affyData <- dbReadTable(hg19, "affyU133Plus2")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18 imported as
## numeric
head(affyData)
##   bin matches misMatches repMatches nCount qNumInsert qBaseInsert tNumInsert
## 1 585     530          4          0     23          3          41          3
## 2 585    3355         17          0    109          9          67          9
## 3 585    4156         14          0     83         16          18          2
## 4 585    4667          9          0     68         21          42          3
## 5 585    5180         14          0    167         10          38          1
## 6 585     468          5          0     14          0           0          0
##   tBaseInsert strand        qName qSize qStart qEnd tName     tSize tStart
## 1         898      -  225995_x_at   637      5  603  chr1 249250621  14361
## 2       11621      -  225035_x_at  3635      0 3548  chr1 249250621  14381
## 3          93      -  226340_x_at  4318      3 4274  chr1 249250621  14399
## 4        5743      - 1557034_s_at  4834     48 4834  chr1 249250621  14406
## 5          29      -    231811_at  5399      0 5399  chr1 249250621  19688
## 6           0      -    236841_at   487      0  487  chr1 249250621  27542
##    tEnd blockCount
## 1 15816          5
## 2 29483         17
## 3 18745         18
## 4 24893         23
## 5 25078         11
## 6 28029          1
##                                                                   blockSizes
## 1                                                          93,144,229,70,21,
## 2              73,375,71,165,303,360,198,661,201,1,260,250,74,73,98,155,163,
## 3                 690,10,32,33,376,4,5,15,5,11,7,41,277,859,141,51,443,1253,
## 4 99,352,286,24,49,14,6,5,8,149,14,44,98,12,10,355,837,59,8,1500,133,624,58,
## 5                                       131,26,1300,6,4,11,4,7,358,3359,155,
## 6                                                                       487,
##                                                                                                  qStarts
## 1                                                                                    34,132,278,541,611,
## 2                        87,165,540,647,818,1123,1484,1682,2343,2545,2546,2808,3058,3133,3206,3317,3472,
## 3                   44,735,746,779,813,1190,1195,1201,1217,1223,1235,1243,1285,1564,2423,2565,2617,3062,
## 4 0,99,452,739,764,814,829,836,842,851,1001,1016,1061,1160,1173,1184,1540,2381,2441,2450,3951,4103,4728,
## 5                                                     0,132,159,1460,1467,1472,1484,1489,1497,1856,5244,
## 6                                                                                                     0,
##                                                                                                                                      tStarts
## 1                                                                                                             14361,14454,14599,14968,15795,
## 2                                     14381,14454,14969,15075,15240,15543,15903,16104,16853,17054,17232,17492,17914,17988,18267,24736,29320,
## 3                               14399,15089,15099,15131,15164,15540,15544,15549,15564,15569,15580,15587,15628,15906,16857,16998,17049,17492,
## 4 14406,20227,20579,20865,20889,20938,20952,20958,20963,20971,21120,21134,21178,21276,21288,21298,21653,22492,22551,22559,24059,24211,24835,
## 5                                                                         19688,19819,19845,21145,21151,21155,21166,21170,21177,21535,24923,
## 6                                                                                                                                     27542,
# Select a specific subset
query <- dbSendQuery(hg19, "select * from affyU133Plus2 where misMatches between 1 and 3")
## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 0 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 1 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 2 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 3 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 4 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 5 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 6 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 7 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 8 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 11 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 12 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 13 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 15 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 16 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 17 imported as
## numeric

## Warning in .local(conn, statement, ...): Unsigned INTEGER in col 18 imported as
## numeric
affyMis <- fetch(query);
quantile(affyMis$misMatches)
##   0%  25%  50%  75% 100% 
##    1    1    2    2    3
# To fetch first 10 rows set n = 10 
affyMisSmall <- fetch(query, n = 10);

# Use dbClearResult to clear query send to my sql server through dbSendQuery
dbClearResult(query)
## [1] TRUE
dim(affyMisSmall)
## [1] 10 22
dbDisconnect(hg19)
## [1] TRUE