Visualizing Inventory Counts Across DMAs

*Data spans only the month of January

*VDP = Vehicle Description Page, a proxy for supply

dir()
##  [1] "carsEventFactQuery.csv"           "carsRecordFactQuery.csv"         
##  [3] "figure"                           "make_model_count_DMA.html"       
##  [5] "make_model_count_DMA.md"          "make_model_count_DMA.Rmd"        
##  [7] "usersByLocationQuery.csv"         "Vast Data.Rproj"                 
##  [9] "vdpsByLocationQuery.csv"          "vdpsByMakeModelLocationQuery.csv"
vdp_count = read.csv("vdpsByLocationQuery.csv", header=TRUE, sep=",")
dim(vdp_count)
## [1] 211   2
head(vdp_count)
##           dma_name vdpCount
## 1         NEW YORK   472608
## 2      LOS ANGELES   321576
## 3          CHICAGO   232150
## 4 DALLAS-FT. WORTH   203205
## 5     PHILADELPHIA   176544
## 6          ATLANTA   168819
library(sqldf)
city = sqldf("SELECT dma_name, vdpCount FROM vdp_count ORDER BY vdpCount DESC")
city
##                       dma_name vdpCount
## 1                     NEW YORK   472608
## 2                  LOS ANGELES   321576
## 3                      CHICAGO   232150
## 4             DALLAS-FT. WORTH   203205
## 5                 PHILADELPHIA   176544
## 6                      ATLANTA   168819
## 7                   WASHINGTON   161463
## 8                      HOUSTON   150676
## 9         MIAMI-FT. LAUDERDALE   140781
## 10         BOSTON (MANCHESTER)   116901
## 11  SAN FRANCISCO-OAK-SAN JOSE   112659
## 12  ORLANDO-DAYTONA BCH-MELBRN   108976
## 13              SEATTLE-TACOMA    97254
## 14   TAMPA-ST. PETE (SARASOTA)    96528
## 15    CLEVELAND-AKRON (CANTON)    95663
## 16                     DETROIT    90679
## 17                   CHARLOTTE    84136
## 18          PHOENIX (PRESCOTT)    81514
## 19                      DENVER    80972
## 20  RALEIGH-DURHAM (FAYETVLLE)    77705
## 21        HARTFORD & NEW HAVEN    76699
## 22    SACRAMNTO-STKTON-MODESTO    74269
## 23                    PORTLAND    69163
## 24                   NASHVILLE    56099
## 25                    COLUMBUS    54763
## 26  WEST PALM BEACH-FT. PIERCE    52037
## 27        MINNEAPOLIS-ST. PAUL    51552
## 28                  PITTSBURGH    51337
## 29   BIRMINGHAM (ANN AND TUSC)    49101
## 30                   BALTIMORE    48951
## 31                   ST. LOUIS    48841
## 32                 SAN ANTONIO    45972
## 33  GREENSBORO-H.POINT-W.SALEM    45351
## 34  HARRISBURG-LNCSTR-LEB-YORK    45255
## 35                  CINCINNATI    44222
## 36                INDIANAPOLIS    41225
## 37                       OMAHA    40599
## 38                   SAN DIEGO    39978
## 39               OKLAHOMA CITY    39928
## 40                 NEW ORLEANS    39391
## 41                      AUSTIN    37935
## 42                   MILWAUKEE    35780
## 43              SALT LAKE CITY    33448
## 44  GRAND RAPIDS-KALMZOO-B.CRK    31706
## 45  NORFOLK-PORTSMTH-NEWPT NWS    31321
## 46                JACKSONVILLE    31307
## 47                   LAS VEGAS    31026
## 48                 KANSAS CITY    30575
## 49  MOBILE-PENSACOLA (FT WALT)    30524
## 50                     MEMPHIS    30312
## 51         RICHMOND-PETERSBURG    29879
## 52                  LOUISVILLE    29758
## 53      PROVIDENCE-NEW BEDFORD    28271
## 54  GREENVLL-SPART-ASHEVLL-AND    28081
## 55                   LEXINGTON    26913
## 56                       TULSA    26320
## 57                    COLUMBIA    24038
## 58                   KNOXVILLE    23985
## 59            FT. MYERS-NAPLES    23275
## 60                  YOUNGSTOWN    21640
## 61                      DAYTON    21229
## 62                   FT. WAYNE    19997
## 63                 BATON ROUGE    19350
## 64                      TOLEDO    19330
## 65   HUNTSVILLE-DECATUR (FLOR)    18695
## 66      FLINT-SAGINAW-BAY CITY    18346
## 67                  CHARLESTON    18218
## 68     COLORADO SPRINGS-PUEBLO    18152
## 69                    SYRACUSE    16037
## 70       WILKES BARRE-SCRANTON    15609
## 71     LINCOLN & HASTINGS-KRNY    15377
## 72           ROANOKE-LYNCHBURG    15048
## 73                     BUFFALO    14672
## 74        ALBUQUERQUE-SANTA FE    14559
## 75     ALBANY-SCHENECTADY-TROY    14226
## 76                     JACKSON    13945
## 77   DAVENPORT-R.ISLAND-MOLINE    13241
## 78             DES MOINES-AMES    13227
## 79                   ROCHESTER    13177
## 80       SIOUX FALLS(MITCHELL)    13154
## 81  CHAMPAIGN&SPRNGFLD-DECATUR    13077
## 82              FRESNO-VISALIA    12765
## 83  PADUCAH-CAPE GIRARD-HARSBG    12619
## 84      LITTLE ROCK-PINE BLUFF    12532
## 85        EL PASO (LAS CRUCES)    12375
## 86                 CHATTANOOGA    12309
## 87         SPRINGFIELD-HOLYOKE    12150
## 88     WICHITA-HUTCHINSON PLUS    12079
## 89           WACO-TEMPLE-BRYAN    11773
## 90  CEDAR RAPIDS-WTRLO-IWC&DUB    11729
## 91          PEORIA-BLOOMINGTON    11228
## 92       MYRTLE BEACH-FLORENCE    11052
## 93                     LANSING    10929
## 94                  TRI-CITIES    10545
## 95                     MADISON    10417
## 96                   LAFAYETTE     9908
## 97                     SPOKANE     9869
## 98                    SAVANNAH     9701
## 99  GREENVILLE-N.BERN-WASHNGTN     9625
## 100 FT. SMITH-FAY-SPRNGDL-RGRS     9511
## 101         SOUTH BEND-ELKHART     9127
## 102            PORTLAND-AUBURN     9125
## 103         GREEN BAY-APPLETON     8667
## 104                      MACON     8623
## 105                SPRINGFIELD     8597
## 106  TYLER-LONGVIEW(LFKN&NCGD)     8551
## 107 HARLINGEN-WSLCO-BRNSVL-MCA     8490
## 108      TUCSON (SIERRA VISTA)     8413
## 109                       RENO     8308
## 110 JOHNSTOWN-ALTOONA-ST COLGE     7972
## 111                       ERIE     7860
## 112                  LAFAYETTE     7835
## 113                    AUGUSTA     7658
## 114      CHARLESTON-HUNTINGTON     7624
## 115                      BOISE     7607
## 116    TALLAHASSEE-THOMASVILLE     7583
## 117                SHERMAN-ADA     7563
## 118                   ROCKFORD     7503
## 119                 SIOUX CITY     7485
## 120           MONTGOMERY-SELMA     7107
## 121                Unknown DMA     7023
## 122     BURLINGTON-PLATTSBURGH     6748
## 123                 SHREVEPORT     6692
## 124                    LUBBOCK     6335
## 125 SANTABARBRA-SANMAR-SANLUOB     6249
## 126     WICHITA FALLS & LAWTON     6136
## 127                 WILMINGTON     6104
## 128                BAKERSFIELD     6101
## 129                   HONOLULU     6082
## 130         ABILENE-SWEETWATER     5991
## 131       BEAUMONT-PORT ARTHUR     5954
## 132               PALM SPRINGS     5909
## 133                   AMARILLO     5908
## 134         HATTIESBURG-LAUREL     5873
## 135             CORPUS CHRISTI     5789
## 136           MONTEREY-SALINAS     5769
## 137            BILOXI-GULFPORT     5533
## 138                 EVANSVILLE     5441
## 139 YAKIMA-PASCO-RCHLND-KNNWCK     5436
## 140 COLUMBUS-TUPELO-WEST POINT     5412
## 141               HARRISONBURG     5069
## 142                GAINESVILLE     4986
## 143     TRAVERSE CITY-CADILLAC     4979
## 144                   BILLINGS     4780
## 145       LA CROSSE-EAU CLAIRE     4701
## 146         WAUSAU-RHINELANDER     4465
## 147                     EUGENE     4273
## 148     QUINCY-HANNIBAL-KEOKUK     4255
## 149 ROCHESTR-MASON CITY-AUSTIN     4150
## 150            CHARLOTTESVILLE     3869
## 151                 ZANESVILLE     3612
## 152                     ALBANY     3516
## 153          FARGO-VALLEY CITY     3470
## 154                   MISSOULA     3447
## 155                   COLUMBUS     3377
## 156                     TOPEKA     3375
## 157                    JACKSON     3365
## 158                PANAMA CITY     3364
## 159             ODESSA-MIDLAND     3277
## 160                  JONESBORO     3127
## 161                       BEND     3034
## 162                      UTICA     2974
## 163                 RAPID CITY     2963
## 164                  ANCHORAGE     2888
## 165          CLARKSBURG-WESTON     2851
## 166   MINOT-BISMARCK-DICKINSON     2574
## 167    GRAND JUNCTION-MONTROSE     2570
## 168           MONROE-EL DORADO     2385
## 169              BOWLING GREEN     2376
## 170    COLUMBIA-JEFFERSON CITY     2361
## 171                     DOTHAN     2327
## 172      WHEELING-STEUBENVILLE     2269
## 173                 BINGHAMTON     2205
## 174 BLUEFIELD-BECKLEY-OAK HILL     2164
## 175                  SALISBURY     2113
## 176                 ALEXANDRIA     2103
## 177                 TWIN FALLS     2016
## 178           JOPLIN-PITTSBURG     1998
## 179      IDAHO FALLS-POCATELLO     1963
## 180            DULUTH-SUPERIOR     1956
## 181      MEDFORD-KLAMATH FALLS     1756
## 182              CHICO-REDDING     1625
## 183                TERRE HAUTE     1571
## 184              BUTTE-BOZEMAN     1458
## 185                       LIMA     1355
## 186               LAKE CHARLES     1266
## 187                PARKERSBURG     1221
## 188                 ST. JOSEPH     1071
## 189           ELMIRA (CORNING)     1067
## 190                    MANKATO     1057
## 191            CASPER-RIVERTON      976
## 192             YUMA-EL CENTRO      964
## 193                GREAT FALLS      932
## 194                     EUREKA      912
## 195                     BANGOR      786
## 196                     LAREDO      768
## 197                  WATERTOWN      656
## 198         OTTUMWA-KIRKSVILLE      607
## 199                   VICTORIA      579
## 200                 SAN ANGELO      519
## 201        CHEYENNE-SCOTTSBLUF      463
## 202                   MERIDIAN      452
## 203                     HELENA      448
## 204       GREENWOOD-GREENVILLE      396
## 205                  MARQUETTE      328
## 206                  FAIRBANKS      252
## 207               NORTH PLATTE      220
## 208                     ALPENA      143
## 209                     JUNEAU      122
## 210                   GLENDIVE       95
## 211               PRESQUE ISLE       59
#visualize 211 dma vdp counts
library(ggplot2)
hist(city$vdpCount, breaks=211, main="Distribution of VDP Count", xlab="VDP Count", col="green", cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.5)

plot of chunk unnamed-chunk-1

Insights

The vast majority of DMAs carried an inventory of less than 100,000 vehicles in the month of January. If we consider supply information a proxy for market size, we can conclude that DMAs with an inventory in excess of 200,000 vehicles are most likely in or outside of larger cities.

#testing that hypothesis
big_cities = sqldf("SELECT dma_name, vdpCount FROM vdp_count WHERE vdpCount >= 200000 ORDER BY vdpCount DESC")
big_cities
##           dma_name vdpCount
## 1         NEW YORK   472608
## 2      LOS ANGELES   321576
## 3          CHICAGO   232150
## 4 DALLAS-FT. WORTH   203205
#Almost a 270k difference between the 4th largest market and the 1st largest market!
range(big_cities$vdpCount)
## [1] 203205 472608
472608-203205
## [1] 269403

Implications

For the purposes of our analysis, focusing on larger markets is advantageous, as the financial implications of market inefficiencies scale with market size. Identifying and correcting inefficiencies in these markets should be the “top priority” for OEMs.

Quantifying Product Value

Given the implications of these findings, we should attempt to quantify the potential value of this product at various market sizes. After incorporating a dimension of time into the analysis, we can calculate the average inventory value for each DMA. As of right now, we can hypothesize that the average inventory value for a given DMA will fall into one of five buckets:

Grouping and prioritizing the markets accordingly will add more structure to our analysis. A visual representation of market size groupings amongst statistical outliers is provided below.

The number of buckets and the range of values contained in each can be revised once we have a better idea of how inventory levels change over time.

*An interesting follow up question here would be whether or not consumer preferences are more heterogeneous in these larger markets relative to their smaller counterparts. If so, it might be more challenging for OEMs to meet demand.

#boxplot to capture outliers for visualization purposes
outliers = boxplot(city$vdpCount, boxfill = "light gray", outlty = 2, outpch = 21:25, bg = "red", lwd = 2, medcol = "dark blue", medcex = 2, medpch = 20, main = "Outliers Amongst DMAs", range = 1.5, cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.5)

plot of chunk unnamed-chunk-3

#pulling outliers from city dataset
outliers$out
##  [1] 472608 321576 232150 203205 176544 168819 161463 150676 140781 116901
## [11] 112659 108976  97254  96528  95663  90679  84136  81514  80972  77705
## [21]  76699  74269  69163
viz = city[city$vdpCount %in% outliers$out,]
dim(viz)
## [1] 23  2



Note: I'm only using the 23 outliers here because it is difficult to visualize 211 DMAs. However, all of the observations omitted from the visualization would fall into the first bucket corresponding to the smallest market size (inventory <= 100k).

# Dotplot: Grouped Sorted and Colored
# Sort by vdpCount, dma  
#add factor to city data frame assinging 1, 2, 3, 4 based on market size
library(dplyr)
x = viz

#groupings
x$size = NA
x[1,3] = 5
x[2,3] = 4
x[3:4, 3] = 3
x[5:12, 3] = 2
x[13:23, 3] = 1

#color-coded
x$color[x$size == 5] = "red"
x$color[x$size == 4] = "dark green"
x$color[x$size == 3] = "purple"
x$color[x$size == 2] = "dark orange"
x$color[x$size == 1] = "blue"


#visualize
dotchart(x$vdpCount, labels = city$dma_name, main = "Inventory (VDP) Count by DMA", cex.main = 1.5, cex.axis = 1.5, bg=x$color, group = x$size, color = x$color, lwd=10)

plot of chunk unnamed-chunk-4