*Data spans only the month of January
*VDP = Vehicle Description Page, a proxy for supply
dir()
## [1] "carsEventFactQuery.csv" "carsRecordFactQuery.csv"
## [3] "figure" "make_model_count_DMA.html"
## [5] "make_model_count_DMA.md" "make_model_count_DMA.Rmd"
## [7] "usersByLocationQuery.csv" "Vast Data.Rproj"
## [9] "vdpsByLocationQuery.csv" "vdpsByMakeModelLocationQuery.csv"
vdp_count = read.csv("vdpsByLocationQuery.csv", header=TRUE, sep=",")
dim(vdp_count)
## [1] 211 2
head(vdp_count)
## dma_name vdpCount
## 1 NEW YORK 472608
## 2 LOS ANGELES 321576
## 3 CHICAGO 232150
## 4 DALLAS-FT. WORTH 203205
## 5 PHILADELPHIA 176544
## 6 ATLANTA 168819
library(sqldf)
city = sqldf("SELECT dma_name, vdpCount FROM vdp_count ORDER BY vdpCount DESC")
city
## dma_name vdpCount
## 1 NEW YORK 472608
## 2 LOS ANGELES 321576
## 3 CHICAGO 232150
## 4 DALLAS-FT. WORTH 203205
## 5 PHILADELPHIA 176544
## 6 ATLANTA 168819
## 7 WASHINGTON 161463
## 8 HOUSTON 150676
## 9 MIAMI-FT. LAUDERDALE 140781
## 10 BOSTON (MANCHESTER) 116901
## 11 SAN FRANCISCO-OAK-SAN JOSE 112659
## 12 ORLANDO-DAYTONA BCH-MELBRN 108976
## 13 SEATTLE-TACOMA 97254
## 14 TAMPA-ST. PETE (SARASOTA) 96528
## 15 CLEVELAND-AKRON (CANTON) 95663
## 16 DETROIT 90679
## 17 CHARLOTTE 84136
## 18 PHOENIX (PRESCOTT) 81514
## 19 DENVER 80972
## 20 RALEIGH-DURHAM (FAYETVLLE) 77705
## 21 HARTFORD & NEW HAVEN 76699
## 22 SACRAMNTO-STKTON-MODESTO 74269
## 23 PORTLAND 69163
## 24 NASHVILLE 56099
## 25 COLUMBUS 54763
## 26 WEST PALM BEACH-FT. PIERCE 52037
## 27 MINNEAPOLIS-ST. PAUL 51552
## 28 PITTSBURGH 51337
## 29 BIRMINGHAM (ANN AND TUSC) 49101
## 30 BALTIMORE 48951
## 31 ST. LOUIS 48841
## 32 SAN ANTONIO 45972
## 33 GREENSBORO-H.POINT-W.SALEM 45351
## 34 HARRISBURG-LNCSTR-LEB-YORK 45255
## 35 CINCINNATI 44222
## 36 INDIANAPOLIS 41225
## 37 OMAHA 40599
## 38 SAN DIEGO 39978
## 39 OKLAHOMA CITY 39928
## 40 NEW ORLEANS 39391
## 41 AUSTIN 37935
## 42 MILWAUKEE 35780
## 43 SALT LAKE CITY 33448
## 44 GRAND RAPIDS-KALMZOO-B.CRK 31706
## 45 NORFOLK-PORTSMTH-NEWPT NWS 31321
## 46 JACKSONVILLE 31307
## 47 LAS VEGAS 31026
## 48 KANSAS CITY 30575
## 49 MOBILE-PENSACOLA (FT WALT) 30524
## 50 MEMPHIS 30312
## 51 RICHMOND-PETERSBURG 29879
## 52 LOUISVILLE 29758
## 53 PROVIDENCE-NEW BEDFORD 28271
## 54 GREENVLL-SPART-ASHEVLL-AND 28081
## 55 LEXINGTON 26913
## 56 TULSA 26320
## 57 COLUMBIA 24038
## 58 KNOXVILLE 23985
## 59 FT. MYERS-NAPLES 23275
## 60 YOUNGSTOWN 21640
## 61 DAYTON 21229
## 62 FT. WAYNE 19997
## 63 BATON ROUGE 19350
## 64 TOLEDO 19330
## 65 HUNTSVILLE-DECATUR (FLOR) 18695
## 66 FLINT-SAGINAW-BAY CITY 18346
## 67 CHARLESTON 18218
## 68 COLORADO SPRINGS-PUEBLO 18152
## 69 SYRACUSE 16037
## 70 WILKES BARRE-SCRANTON 15609
## 71 LINCOLN & HASTINGS-KRNY 15377
## 72 ROANOKE-LYNCHBURG 15048
## 73 BUFFALO 14672
## 74 ALBUQUERQUE-SANTA FE 14559
## 75 ALBANY-SCHENECTADY-TROY 14226
## 76 JACKSON 13945
## 77 DAVENPORT-R.ISLAND-MOLINE 13241
## 78 DES MOINES-AMES 13227
## 79 ROCHESTER 13177
## 80 SIOUX FALLS(MITCHELL) 13154
## 81 CHAMPAIGN&SPRNGFLD-DECATUR 13077
## 82 FRESNO-VISALIA 12765
## 83 PADUCAH-CAPE GIRARD-HARSBG 12619
## 84 LITTLE ROCK-PINE BLUFF 12532
## 85 EL PASO (LAS CRUCES) 12375
## 86 CHATTANOOGA 12309
## 87 SPRINGFIELD-HOLYOKE 12150
## 88 WICHITA-HUTCHINSON PLUS 12079
## 89 WACO-TEMPLE-BRYAN 11773
## 90 CEDAR RAPIDS-WTRLO-IWC&DUB 11729
## 91 PEORIA-BLOOMINGTON 11228
## 92 MYRTLE BEACH-FLORENCE 11052
## 93 LANSING 10929
## 94 TRI-CITIES 10545
## 95 MADISON 10417
## 96 LAFAYETTE 9908
## 97 SPOKANE 9869
## 98 SAVANNAH 9701
## 99 GREENVILLE-N.BERN-WASHNGTN 9625
## 100 FT. SMITH-FAY-SPRNGDL-RGRS 9511
## 101 SOUTH BEND-ELKHART 9127
## 102 PORTLAND-AUBURN 9125
## 103 GREEN BAY-APPLETON 8667
## 104 MACON 8623
## 105 SPRINGFIELD 8597
## 106 TYLER-LONGVIEW(LFKN&NCGD) 8551
## 107 HARLINGEN-WSLCO-BRNSVL-MCA 8490
## 108 TUCSON (SIERRA VISTA) 8413
## 109 RENO 8308
## 110 JOHNSTOWN-ALTOONA-ST COLGE 7972
## 111 ERIE 7860
## 112 LAFAYETTE 7835
## 113 AUGUSTA 7658
## 114 CHARLESTON-HUNTINGTON 7624
## 115 BOISE 7607
## 116 TALLAHASSEE-THOMASVILLE 7583
## 117 SHERMAN-ADA 7563
## 118 ROCKFORD 7503
## 119 SIOUX CITY 7485
## 120 MONTGOMERY-SELMA 7107
## 121 Unknown DMA 7023
## 122 BURLINGTON-PLATTSBURGH 6748
## 123 SHREVEPORT 6692
## 124 LUBBOCK 6335
## 125 SANTABARBRA-SANMAR-SANLUOB 6249
## 126 WICHITA FALLS & LAWTON 6136
## 127 WILMINGTON 6104
## 128 BAKERSFIELD 6101
## 129 HONOLULU 6082
## 130 ABILENE-SWEETWATER 5991
## 131 BEAUMONT-PORT ARTHUR 5954
## 132 PALM SPRINGS 5909
## 133 AMARILLO 5908
## 134 HATTIESBURG-LAUREL 5873
## 135 CORPUS CHRISTI 5789
## 136 MONTEREY-SALINAS 5769
## 137 BILOXI-GULFPORT 5533
## 138 EVANSVILLE 5441
## 139 YAKIMA-PASCO-RCHLND-KNNWCK 5436
## 140 COLUMBUS-TUPELO-WEST POINT 5412
## 141 HARRISONBURG 5069
## 142 GAINESVILLE 4986
## 143 TRAVERSE CITY-CADILLAC 4979
## 144 BILLINGS 4780
## 145 LA CROSSE-EAU CLAIRE 4701
## 146 WAUSAU-RHINELANDER 4465
## 147 EUGENE 4273
## 148 QUINCY-HANNIBAL-KEOKUK 4255
## 149 ROCHESTR-MASON CITY-AUSTIN 4150
## 150 CHARLOTTESVILLE 3869
## 151 ZANESVILLE 3612
## 152 ALBANY 3516
## 153 FARGO-VALLEY CITY 3470
## 154 MISSOULA 3447
## 155 COLUMBUS 3377
## 156 TOPEKA 3375
## 157 JACKSON 3365
## 158 PANAMA CITY 3364
## 159 ODESSA-MIDLAND 3277
## 160 JONESBORO 3127
## 161 BEND 3034
## 162 UTICA 2974
## 163 RAPID CITY 2963
## 164 ANCHORAGE 2888
## 165 CLARKSBURG-WESTON 2851
## 166 MINOT-BISMARCK-DICKINSON 2574
## 167 GRAND JUNCTION-MONTROSE 2570
## 168 MONROE-EL DORADO 2385
## 169 BOWLING GREEN 2376
## 170 COLUMBIA-JEFFERSON CITY 2361
## 171 DOTHAN 2327
## 172 WHEELING-STEUBENVILLE 2269
## 173 BINGHAMTON 2205
## 174 BLUEFIELD-BECKLEY-OAK HILL 2164
## 175 SALISBURY 2113
## 176 ALEXANDRIA 2103
## 177 TWIN FALLS 2016
## 178 JOPLIN-PITTSBURG 1998
## 179 IDAHO FALLS-POCATELLO 1963
## 180 DULUTH-SUPERIOR 1956
## 181 MEDFORD-KLAMATH FALLS 1756
## 182 CHICO-REDDING 1625
## 183 TERRE HAUTE 1571
## 184 BUTTE-BOZEMAN 1458
## 185 LIMA 1355
## 186 LAKE CHARLES 1266
## 187 PARKERSBURG 1221
## 188 ST. JOSEPH 1071
## 189 ELMIRA (CORNING) 1067
## 190 MANKATO 1057
## 191 CASPER-RIVERTON 976
## 192 YUMA-EL CENTRO 964
## 193 GREAT FALLS 932
## 194 EUREKA 912
## 195 BANGOR 786
## 196 LAREDO 768
## 197 WATERTOWN 656
## 198 OTTUMWA-KIRKSVILLE 607
## 199 VICTORIA 579
## 200 SAN ANGELO 519
## 201 CHEYENNE-SCOTTSBLUF 463
## 202 MERIDIAN 452
## 203 HELENA 448
## 204 GREENWOOD-GREENVILLE 396
## 205 MARQUETTE 328
## 206 FAIRBANKS 252
## 207 NORTH PLATTE 220
## 208 ALPENA 143
## 209 JUNEAU 122
## 210 GLENDIVE 95
## 211 PRESQUE ISLE 59
#visualize 211 dma vdp counts
library(ggplot2)
hist(city$vdpCount, breaks=211, main="Distribution of VDP Count", xlab="VDP Count", col="green", cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.5)
Insights
The vast majority of DMAs carried an inventory of less than 100,000 vehicles in the month of January. If we consider supply information a proxy for market size, we can conclude that DMAs with an inventory in excess of 200,000 vehicles are most likely in or outside of larger cities.
#testing that hypothesis
big_cities = sqldf("SELECT dma_name, vdpCount FROM vdp_count WHERE vdpCount >= 200000 ORDER BY vdpCount DESC")
big_cities
## dma_name vdpCount
## 1 NEW YORK 472608
## 2 LOS ANGELES 321576
## 3 CHICAGO 232150
## 4 DALLAS-FT. WORTH 203205
#Almost a 270k difference between the 4th largest market and the 1st largest market!
range(big_cities$vdpCount)
## [1] 203205 472608
472608-203205
## [1] 269403
Implications
For the purposes of our analysis, focusing on larger markets is advantageous, as the financial implications of market inefficiencies scale with market size. Identifying and correcting inefficiencies in these markets should be the “top priority” for OEMs.
Quantifying Product Value
Given the implications of these findings, we should attempt to quantify the potential value of this product at various market sizes. After incorporating a dimension of time into the analysis, we can calculate the average inventory value for each DMA. As of right now, we can hypothesize that the average inventory value for a given DMA will fall into one of five buckets:
Grouping and prioritizing the markets accordingly will add more structure to our analysis. A visual representation of market size groupings amongst statistical outliers is provided below.
The number of buckets and the range of values contained in each can be revised once we have a better idea of how inventory levels change over time.
*An interesting follow up question here would be whether or not consumer preferences are more heterogeneous in these larger markets relative to their smaller counterparts. If so, it might be more challenging for OEMs to meet demand.
#boxplot to capture outliers for visualization purposes
outliers = boxplot(city$vdpCount, boxfill = "light gray", outlty = 2, outpch = 21:25, bg = "red", lwd = 2, medcol = "dark blue", medcex = 2, medpch = 20, main = "Outliers Amongst DMAs", range = 1.5, cex.lab = 1.5, cex.main = 1.5, cex.axis = 1.5)
#pulling outliers from city dataset
outliers$out
## [1] 472608 321576 232150 203205 176544 168819 161463 150676 140781 116901
## [11] 112659 108976 97254 96528 95663 90679 84136 81514 80972 77705
## [21] 76699 74269 69163
viz = city[city$vdpCount %in% outliers$out,]
dim(viz)
## [1] 23 2
Note: I'm only using the 23 outliers here because it is difficult to visualize 211 DMAs. However, all of the observations omitted from the visualization would fall into the first bucket corresponding to the smallest market size (inventory <= 100k).
# Dotplot: Grouped Sorted and Colored
# Sort by vdpCount, dma
#add factor to city data frame assinging 1, 2, 3, 4 based on market size
library(dplyr)
x = viz
#groupings
x$size = NA
x[1,3] = 5
x[2,3] = 4
x[3:4, 3] = 3
x[5:12, 3] = 2
x[13:23, 3] = 1
#color-coded
x$color[x$size == 5] = "red"
x$color[x$size == 4] = "dark green"
x$color[x$size == 3] = "purple"
x$color[x$size == 2] = "dark orange"
x$color[x$size == 1] = "blue"
#visualize
dotchart(x$vdpCount, labels = city$dma_name, main = "Inventory (VDP) Count by DMA", cex.main = 1.5, cex.axis = 1.5, bg=x$color, group = x$size, color = x$color, lwd=10)