This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
Note: this analysis was performed using the open source software R and Rstudio.
The objective is to gain familiarity with data science and statistical computing by using R to develop an introductory analysis on avocado pricing utilizing 3 years of existing data. This analysis may assist individuals and or organizations to make general assumptions about the Avocado market and how best to strategically position an organization within the industry.
Additionally, this analysis will be used to complete additional tasks related to this assignment:
Following the completion of this analysis, the Auto Arima Forecast Package in R will be used to forecast pricing.
The data used in this analysis was distributed as part of a weekly assignment for MKTG6000- Managerial Marketing class at California State University of Bakersfield. The data is said to represent weekly retail information of average Avocado price and sales volume from December 3, 2017 to November 29, 2020 for 38 cities in the US. The data is arranged in columns and reflects date of observation,average price of a single Hass avocado, total number of Hass avocados sold, whether the price/amount is for conventional or organic and the mileage from Bakersfield to each city.
data <- read.csv("avocado_2020.csv")
head(data)
## date average_price total_volume type year geography
## 1 12/3/2017 1.39 139970 conventional 2017 Albany
## 2 12/3/2017 1.44 3577 organic 2017 Albany
## 3 12/3/2017 1.07 504933 conventional 2017 Atlanta
## 4 12/3/2017 1.62 10609 organic 2017 Atlanta
## 5 12/3/2017 1.43 658939 conventional 2017 Baltimore/Washington
## 6 12/3/2017 1.58 38754 organic 2017 Baltimore/Washington
## Mileage
## 1 2832
## 2 2832
## 3 2199
## 4 2199
## 5 2679
## 6 2679
#install.packages('plyr')
library(plyr)
count(data, 'geography')
## geography freq
## 1 Albany 308
## 2 Atlanta 308
## 3 Baltimore/Washington 308
## 4 Boise 308
## 5 Boston 308
## 6 Buffalo/Rochester 308
## 7 Charlotte 308
## 8 Chicago 308
## 9 Cincinnati/Dayton 308
## 10 Columbus 308
## 11 Dallas/Ft. Worth 308
## 12 Denver 308
## 13 Detroit 308
## 14 Grand Rapids 308
## 15 Harrisburg/Scranton 308
## 16 Hartford/Springfield 308
## 17 Houston 308
## 18 Indianapolis 308
## 19 Jacksonville 308
## 20 Las Vegas 308
## 21 Los Angeles 308
## 22 Louisville 308
## 23 Miami/Ft. Lauderdale 308
## 24 Nashville 308
## 25 New Orleans/Mobile 308
## 26 New York 308
## 27 Orlando 308
## 28 Philadelphia 308
## 29 Phoenix/Tucson 308
## 30 Pittsburgh 308
## 31 Portland 308
## 32 Raleigh/Greensboro 308
## 33 Richmond/Norfolk 308
## 34 Sacramento 308
## 35 San Diego 308
## 36 San Francisco 308
## 37 Seattle 308
## 38 Spokane 308
## 39 St. Louis 308
## 40 Syracuse 308
## 41 Tampa 308
count(data, 'average_price')
## average_price freq
## 1 0.50 1
## 2 0.51 1
## 3 0.53 1
## 4 0.54 2
## 5 0.56 4
## 6 0.57 1
## 7 0.58 2
## 8 0.59 4
## 9 0.60 1
## 10 0.61 4
## 11 0.62 7
## 12 0.63 3
## 13 0.64 8
## 14 0.65 8
## 15 0.66 12
## 16 0.67 15
## 17 0.68 15
## 18 0.69 11
## 19 0.70 21
## 20 0.71 21
## 21 0.72 24
## 22 0.73 32
## 23 0.74 34
## 24 0.75 26
## 25 0.76 36
## 26 0.77 32
## 27 0.78 35
## 28 0.79 26
## 29 0.80 40
## 30 0.81 47
## 31 0.82 38
## 32 0.83 40
## 33 0.84 55
## 34 0.85 51
## 35 0.86 48
## 36 0.87 55
## 37 0.88 63
## 38 0.89 78
## 39 0.90 74
## 40 0.91 86
## 41 0.92 85
## 42 0.93 94
## 43 0.94 91
## 44 0.95 109
## 45 0.96 94
## 46 0.97 100
## 47 0.98 112
## 48 0.99 105
## 49 1.00 110
## 50 1.01 126
## 51 1.02 93
## 52 1.03 130
## 53 1.04 132
## 54 1.05 106
## 55 1.06 116
## 56 1.07 106
## 57 1.08 112
## 58 1.09 136
## 59 1.10 141
## 60 1.11 107
## 61 1.12 113
## 62 1.13 137
## 63 1.14 183
## 64 1.15 161
## 65 1.16 159
## 66 1.17 156
## 67 1.18 148
## 68 1.19 164
## 69 1.20 169
## 70 1.21 133
## 71 1.22 141
## 72 1.23 135
## 73 1.24 142
## 74 1.25 148
## 75 1.26 143
## 76 1.27 134
## 77 1.28 142
## 78 1.29 153
## 79 1.30 119
## 80 1.31 138
## 81 1.32 141
## 82 1.33 120
## 83 1.34 137
## 84 1.35 144
## 85 1.36 151
## 86 1.37 149
## 87 1.38 155
## 88 1.39 129
## 89 1.40 131
## 90 1.41 141
## 91 1.42 146
## 92 1.43 139
## 93 1.44 120
## 94 1.45 137
## 95 1.46 140
## 96 1.47 118
## 97 1.48 119
## 98 1.49 105
## 99 1.50 123
## 100 1.51 118
## 101 1.52 108
## 102 1.53 117
## 103 1.54 93
## 104 1.55 119
## 105 1.56 95
## 106 1.57 97
## 107 1.58 89
## 108 1.59 111
## 109 1.60 95
## 110 1.61 95
## 111 1.62 83
## 112 1.63 86
## 113 1.64 87
## 114 1.65 86
## 115 1.66 86
## 116 1.67 69
## 117 1.68 67
## 118 1.69 84
## 119 1.70 65
## 120 1.71 66
## 121 1.72 65
## 122 1.73 70
## 123 1.74 54
## 124 1.75 64
## 125 1.76 58
## 126 1.77 62
## 127 1.78 58
## 128 1.79 56
## 129 1.80 63
## 130 1.81 47
## 131 1.82 53
## 132 1.83 51
## 133 1.84 45
## 134 1.85 48
## 135 1.86 47
## 136 1.87 46
## 137 1.88 44
## 138 1.89 38
## 139 1.90 42
## 140 1.91 49
## 141 1.92 41
## 142 1.93 35
## 143 1.94 35
## 144 1.95 38
## 145 1.96 36
## 146 1.97 34
## 147 1.98 33
## 148 1.99 26
## 149 2.00 23
## 150 2.01 19
## 151 2.02 34
## 152 2.03 30
## 153 2.04 25
## 154 2.05 33
## 155 2.06 23
## 156 2.07 26
## 157 2.08 22
## 158 2.09 16
## 159 2.10 18
## 160 2.11 21
## 161 2.12 16
## 162 2.13 17
## 163 2.14 17
## 164 2.15 21
## 165 2.16 24
## 166 2.17 8
## 167 2.18 16
## 168 2.19 15
## 169 2.20 8
## 170 2.21 18
## 171 2.22 11
## 172 2.23 7
## 173 2.24 9
## 174 2.25 9
## 175 2.26 8
## 176 2.27 12
## 177 2.28 6
## 178 2.29 3
## 179 2.30 4
## 180 2.31 9
## 181 2.32 6
## 182 2.33 7
## 183 2.34 5
## 184 2.35 4
## 185 2.36 6
## 186 2.37 5
## 187 2.38 5
## 188 2.39 9
## 189 2.40 5
## 190 2.41 5
## 191 2.42 2
## 192 2.43 5
## 193 2.44 6
## 194 2.45 4
## 195 2.46 4
## 196 2.48 2
## 197 2.49 3
## 198 2.50 3
## 199 2.51 1
## 200 2.52 2
## 201 2.53 1
## 202 2.54 1
## 203 2.55 2
## 204 2.56 2
## 205 2.57 1
## 206 2.59 1
## 207 2.60 1
## 208 2.62 2
## 209 2.64 2
## 210 2.66 4
## 211 2.67 1
## 212 2.69 2
## 213 2.71 3
## 214 2.72 2
## 215 2.73 1
## 216 2.78 1
mean(data$average_price)
## [1] 1.358841
median(data$average_price)
## [1] 1.32
cor(data$total_volume,data$average_price)
## [1] -0.4169306
data <- read.csv("avocado_2017_18.csv")
head(data)
## date average_price total_volume type year geography
## 1 12/3/2017 1.39 139970 conventional 2017 Albany
## 2 12/3/2017 1.44 3577 organic 2017 Albany
## 3 12/3/2017 1.07 504933 conventional 2017 Atlanta
## 4 12/3/2017 1.62 10609 organic 2017 Atlanta
## 5 12/3/2017 1.43 658939 conventional 2017 Baltimore/Washington
## 6 12/3/2017 1.58 38754 organic 2017 Baltimore/Washington
## Mileage
## 1 2832
## 2 2832
## 3 2199
## 4 2199
## 5 2679
## 6 2679
#install.packages('plyr')
library(plyr)
count(data, 'geography')
## geography freq
## 1 Albany 108
## 2 Atlanta 108
## 3 Baltimore/Washington 108
## 4 Boise 108
## 5 Boston 108
## 6 Buffalo/Rochester 108
## 7 Charlotte 108
## 8 Chicago 108
## 9 Cincinnati/Dayton 108
## 10 Columbus 108
## 11 Dallas/Ft. Worth 108
## 12 Denver 108
## 13 Detroit 108
## 14 Grand Rapids 108
## 15 Harrisburg/Scranton 108
## 16 Hartford/Springfield 108
## 17 Houston 108
## 18 Indianapolis 108
## 19 Jacksonville 108
## 20 Las Vegas 108
## 21 Los Angeles 108
## 22 Louisville 108
## 23 Miami/Ft. Lauderdale 108
## 24 Nashville 108
## 25 New Orleans/Mobile 108
## 26 New York 108
## 27 Orlando 108
## 28 Philadelphia 108
## 29 Phoenix/Tucson 108
## 30 Pittsburgh 108
## 31 Portland 108
## 32 Raleigh/Greensboro 108
## 33 Richmond/Norfolk 108
## 34 Sacramento 108
## 35 San Diego 108
## 36 San Francisco 108
## 37 Seattle 108
## 38 Spokane 108
## 39 St. Louis 108
## 40 Syracuse 108
## 41 Tampa 108
count(data, 'average_price')
## average_price freq
## 1 0.50 1
## 2 0.51 1
## 3 0.53 1
## 4 0.54 1
## 5 0.56 3
## 6 0.57 1
## 7 0.58 2
## 8 0.59 3
## 9 0.61 2
## 10 0.62 1
## 11 0.64 4
## 12 0.66 3
## 13 0.67 6
## 14 0.68 4
## 15 0.69 3
## 16 0.70 1
## 17 0.71 7
## 18 0.72 7
## 19 0.73 12
## 20 0.74 10
## 21 0.75 10
## 22 0.76 8
## 23 0.77 11
## 24 0.78 11
## 25 0.79 8
## 26 0.80 8
## 27 0.81 12
## 28 0.82 4
## 29 0.83 10
## 30 0.84 14
## 31 0.85 12
## 32 0.86 7
## 33 0.87 16
## 34 0.88 17
## 35 0.89 16
## 36 0.90 8
## 37 0.91 16
## 38 0.92 24
## 39 0.93 20
## 40 0.94 18
## 41 0.95 26
## 42 0.96 31
## 43 0.97 27
## 44 0.98 31
## 45 0.99 44
## 46 1.00 30
## 47 1.01 46
## 48 1.02 30
## 49 1.03 43
## 50 1.04 43
## 51 1.05 33
## 52 1.06 32
## 53 1.07 37
## 54 1.08 40
## 55 1.09 51
## 56 1.10 45
## 57 1.11 31
## 58 1.12 42
## 59 1.13 54
## 60 1.14 64
## 61 1.15 59
## 62 1.16 78
## 63 1.17 54
## 64 1.18 53
## 65 1.19 54
## 66 1.20 58
## 67 1.21 45
## 68 1.22 45
## 69 1.23 55
## 70 1.24 51
## 71 1.25 63
## 72 1.26 51
## 73 1.27 63
## 74 1.28 60
## 75 1.29 59
## 76 1.30 49
## 77 1.31 53
## 78 1.32 60
## 79 1.33 36
## 80 1.34 53
## 81 1.35 56
## 82 1.36 64
## 83 1.37 56
## 84 1.38 65
## 85 1.39 57
## 86 1.40 55
## 87 1.41 63
## 88 1.42 57
## 89 1.43 61
## 90 1.44 50
## 91 1.45 59
## 92 1.46 51
## 93 1.47 37
## 94 1.48 45
## 95 1.49 32
## 96 1.50 44
## 97 1.51 38
## 98 1.52 40
## 99 1.53 46
## 100 1.54 36
## 101 1.55 45
## 102 1.56 47
## 103 1.57 38
## 104 1.58 29
## 105 1.59 47
## 106 1.60 41
## 107 1.61 30
## 108 1.62 31
## 109 1.63 34
## 110 1.64 41
## 111 1.65 39
## 112 1.66 38
## 113 1.67 32
## 114 1.68 26
## 115 1.69 33
## 116 1.70 25
## 117 1.71 19
## 118 1.72 25
## 119 1.73 22
## 120 1.74 21
## 121 1.75 23
## 122 1.76 22
## 123 1.77 22
## 124 1.78 20
## 125 1.79 25
## 126 1.80 23
## 127 1.81 20
## 128 1.82 20
## 129 1.83 27
## 130 1.84 15
## 131 1.85 18
## 132 1.86 14
## 133 1.87 9
## 134 1.88 14
## 135 1.89 6
## 136 1.90 12
## 137 1.91 12
## 138 1.92 17
## 139 1.93 11
## 140 1.94 12
## 141 1.95 13
## 142 1.96 5
## 143 1.97 5
## 144 1.98 4
## 145 1.99 7
## 146 2.00 6
## 147 2.01 6
## 148 2.02 11
## 149 2.03 7
## 150 2.04 9
## 151 2.05 9
## 152 2.06 8
## 153 2.07 9
## 154 2.08 7
## 155 2.09 7
## 156 2.10 4
## 157 2.11 8
## 158 2.12 3
## 159 2.13 4
## 160 2.14 7
## 161 2.15 5
## 162 2.16 8
## 163 2.17 1
## 164 2.18 4
## 165 2.19 2
## 166 2.20 3
## 167 2.21 4
## 168 2.22 3
## 169 2.23 2
## 170 2.24 4
## 171 2.25 6
## 172 2.26 3
## 173 2.27 4
## 174 2.28 1
## 175 2.30 3
## 176 2.31 2
## 177 2.32 1
## 178 2.33 1
## 179 2.35 1
## 180 2.39 1
## 181 2.40 1
## 182 2.41 1
## 183 2.43 1
## 184 2.48 1
## 185 2.49 1
## 186 2.50 1
## 187 2.52 1
## 188 2.56 1
## 189 2.60 1
## 190 2.66 1
## 191 2.71 1
mean(data$average_price)
## [1] 1.367866
median(data$average_price)
## [1] 1.35
cor(data$total_volume,data$average_price)
## [1] -0.4822304
data <- read.csv("avocado_2019_20.csv")
head(data)
## date average_price total_volume type year geography
## 1 1/7/2019 1.07 129222 conventional 2019 Albany
## 2 1/7/2019 1.41 5006 organic 2019 Albany
## 3 1/7/2019 0.92 828971 conventional 2019 Atlanta
## 4 1/7/2019 1.42 16714 organic 2019 Atlanta
## 5 1/7/2019 1.31 925391 conventional 2019 Baltimore/Washington
## 6 1/7/2019 1.23 58619 organic 2019 Baltimore/Washington
## Mileage
## 1 2832
## 2 2832
## 3 2199
## 4 2199
## 5 2679
## 6 2679
count(data, 'geography')
## geography freq
## 1 Albany 200
## 2 Atlanta 200
## 3 Baltimore/Washington 200
## 4 Boise 200
## 5 Boston 200
## 6 Buffalo/Rochester 200
## 7 Charlotte 200
## 8 Chicago 200
## 9 Cincinnati/Dayton 200
## 10 Columbus 200
## 11 Dallas/Ft. Worth 200
## 12 Denver 200
## 13 Detroit 200
## 14 Grand Rapids 200
## 15 Harrisburg/Scranton 200
## 16 Hartford/Springfield 200
## 17 Houston 200
## 18 Indianapolis 200
## 19 Jacksonville 200
## 20 Las Vegas 200
## 21 Los Angeles 200
## 22 Louisville 200
## 23 Miami/Ft. Lauderdale 200
## 24 Nashville 200
## 25 New Orleans/Mobile 200
## 26 New York 200
## 27 Orlando 200
## 28 Philadelphia 200
## 29 Phoenix/Tucson 200
## 30 Pittsburgh 200
## 31 Portland 200
## 32 Raleigh/Greensboro 200
## 33 Richmond/Norfolk 200
## 34 Sacramento 200
## 35 San Diego 200
## 36 San Francisco 200
## 37 Seattle 200
## 38 Spokane 200
## 39 St. Louis 200
## 40 Syracuse 200
## 41 Tampa 200
count(data, 'average_price')
## average_price freq
## 1 0.54 1
## 2 0.56 1
## 3 0.59 1
## 4 0.60 1
## 5 0.61 2
## 6 0.62 6
## 7 0.63 3
## 8 0.64 4
## 9 0.65 8
## 10 0.66 9
## 11 0.67 9
## 12 0.68 11
## 13 0.69 8
## 14 0.70 20
## 15 0.71 14
## 16 0.72 17
## 17 0.73 20
## 18 0.74 24
## 19 0.75 16
## 20 0.76 28
## 21 0.77 21
## 22 0.78 24
## 23 0.79 18
## 24 0.80 32
## 25 0.81 35
## 26 0.82 34
## 27 0.83 30
## 28 0.84 41
## 29 0.85 39
## 30 0.86 41
## 31 0.87 39
## 32 0.88 46
## 33 0.89 62
## 34 0.90 66
## 35 0.91 70
## 36 0.92 61
## 37 0.93 74
## 38 0.94 73
## 39 0.95 83
## 40 0.96 63
## 41 0.97 73
## 42 0.98 81
## 43 0.99 61
## 44 1.00 80
## 45 1.01 80
## 46 1.02 63
## 47 1.03 87
## 48 1.04 89
## 49 1.05 73
## 50 1.06 84
## 51 1.07 69
## 52 1.08 72
## 53 1.09 85
## 54 1.10 96
## 55 1.11 76
## 56 1.12 71
## 57 1.13 83
## 58 1.14 119
## 59 1.15 102
## 60 1.16 81
## 61 1.17 102
## 62 1.18 95
## 63 1.19 110
## 64 1.20 111
## 65 1.21 88
## 66 1.22 96
## 67 1.23 80
## 68 1.24 91
## 69 1.25 85
## 70 1.26 92
## 71 1.27 71
## 72 1.28 82
## 73 1.29 94
## 74 1.30 70
## 75 1.31 85
## 76 1.32 81
## 77 1.33 84
## 78 1.34 84
## 79 1.35 88
## 80 1.36 87
## 81 1.37 93
## 82 1.38 90
## 83 1.39 72
## 84 1.40 76
## 85 1.41 78
## 86 1.42 89
## 87 1.43 78
## 88 1.44 70
## 89 1.45 78
## 90 1.46 89
## 91 1.47 81
## 92 1.48 74
## 93 1.49 73
## 94 1.50 79
## 95 1.51 80
## 96 1.52 68
## 97 1.53 71
## 98 1.54 57
## 99 1.55 74
## 100 1.56 48
## 101 1.57 59
## 102 1.58 60
## 103 1.59 64
## 104 1.60 54
## 105 1.61 65
## 106 1.62 52
## 107 1.63 52
## 108 1.64 46
## 109 1.65 47
## 110 1.66 48
## 111 1.67 37
## 112 1.68 41
## 113 1.69 51
## 114 1.70 40
## 115 1.71 47
## 116 1.72 40
## 117 1.73 48
## 118 1.74 33
## 119 1.75 41
## 120 1.76 36
## 121 1.77 40
## 122 1.78 38
## 123 1.79 31
## 124 1.80 40
## 125 1.81 27
## 126 1.82 33
## 127 1.83 24
## 128 1.84 30
## 129 1.85 30
## 130 1.86 33
## 131 1.87 37
## 132 1.88 30
## 133 1.89 32
## 134 1.90 30
## 135 1.91 37
## 136 1.92 24
## 137 1.93 24
## 138 1.94 23
## 139 1.95 25
## 140 1.96 31
## 141 1.97 29
## 142 1.98 29
## 143 1.99 19
## 144 2.00 17
## 145 2.01 13
## 146 2.02 23
## 147 2.03 23
## 148 2.04 16
## 149 2.05 24
## 150 2.06 15
## 151 2.07 17
## 152 2.08 15
## 153 2.09 9
## 154 2.10 14
## 155 2.11 13
## 156 2.12 13
## 157 2.13 13
## 158 2.14 10
## 159 2.15 16
## 160 2.16 16
## 161 2.17 7
## 162 2.18 12
## 163 2.19 13
## 164 2.20 5
## 165 2.21 14
## 166 2.22 8
## 167 2.23 5
## 168 2.24 5
## 169 2.25 3
## 170 2.26 5
## 171 2.27 8
## 172 2.28 5
## 173 2.29 3
## 174 2.30 1
## 175 2.31 7
## 176 2.32 5
## 177 2.33 6
## 178 2.34 5
## 179 2.35 3
## 180 2.36 6
## 181 2.37 5
## 182 2.38 5
## 183 2.39 8
## 184 2.40 4
## 185 2.41 4
## 186 2.42 2
## 187 2.43 4
## 188 2.44 6
## 189 2.45 4
## 190 2.46 4
## 191 2.48 1
## 192 2.49 2
## 193 2.50 2
## 194 2.51 1
## 195 2.52 1
## 196 2.53 1
## 197 2.54 1
## 198 2.55 2
## 199 2.56 1
## 200 2.57 1
## 201 2.59 1
## 202 2.62 2
## 203 2.64 2
## 204 2.66 3
## 205 2.67 1
## 206 2.69 2
## 207 2.71 2
## 208 2.72 2
## 209 2.73 1
## 210 2.78 1
mean(data$average_price)
## [1] 1.353968
median(data$average_price)
## [1] 1.31
cor(data$total_volume,data$average_price)
## [1] -0.3882737
data <- read.csv("avocado_organic.csv")
head(data)
## date average_price total_volume type year geography
## 1 12/3/2017 1.44 3577 organic 2017 Albany
## 2 12/3/2017 1.62 10609 organic 2017 Atlanta
## 3 12/3/2017 1.58 38754 organic 2017 Baltimore/Washington
## 4 12/3/2017 1.77 1829 organic 2017 Boise
## 5 12/3/2017 1.88 21338 organic 2017 Boston
## 6 12/3/2017 1.18 7575 organic 2017 Buffalo/Rochester
## Mileage
## 1 2832
## 2 2199
## 3 2679
## 4 827
## 5 2998
## 6 2552
#install.packages('plyr')
library(plyr)
count(data, 'geography')
## geography freq
## 1 Albany 154
## 2 Atlanta 154
## 3 Baltimore/Washington 154
## 4 Boise 154
## 5 Boston 154
## 6 Buffalo/Rochester 154
## 7 Charlotte 154
## 8 Chicago 154
## 9 Cincinnati/Dayton 154
## 10 Columbus 154
## 11 Dallas/Ft. Worth 154
## 12 Denver 154
## 13 Detroit 154
## 14 Grand Rapids 154
## 15 Harrisburg/Scranton 154
## 16 Hartford/Springfield 154
## 17 Houston 154
## 18 Indianapolis 154
## 19 Jacksonville 154
## 20 Las Vegas 154
## 21 Los Angeles 154
## 22 Louisville 154
## 23 Miami/Ft. Lauderdale 154
## 24 Nashville 154
## 25 New Orleans/Mobile 154
## 26 New York 154
## 27 Orlando 154
## 28 Philadelphia 154
## 29 Phoenix/Tucson 154
## 30 Pittsburgh 154
## 31 Portland 154
## 32 Raleigh/Greensboro 154
## 33 Richmond/Norfolk 154
## 34 Sacramento 154
## 35 San Diego 154
## 36 San Francisco 154
## 37 Seattle 154
## 38 Spokane 154
## 39 St. Louis 154
## 40 Syracuse 154
## 41 Tampa 154
count(data, 'average_price')
## average_price freq
## 1 0.69 1
## 2 0.71 1
## 3 0.72 1
## 4 0.75 1
## 5 0.77 3
## 6 0.79 2
## 7 0.80 1
## 8 0.81 1
## 9 0.82 1
## 10 0.84 3
## 11 0.85 3
## 12 0.88 2
## 13 0.89 1
## 14 0.90 4
## 15 0.91 5
## 16 0.92 4
## 17 0.93 10
## 18 0.94 11
## 19 0.95 12
## 20 0.96 9
## 21 0.97 4
## 22 0.98 11
## 23 0.99 5
## 24 1.00 6
## 25 1.01 7
## 26 1.02 11
## 27 1.03 16
## 28 1.04 16
## 29 1.05 19
## 30 1.06 16
## 31 1.07 15
## 32 1.08 18
## 33 1.09 22
## 34 1.10 27
## 35 1.11 19
## 36 1.12 17
## 37 1.13 30
## 38 1.14 47
## 39 1.15 52
## 40 1.16 40
## 41 1.17 41
## 42 1.18 44
## 43 1.19 62
## 44 1.20 61
## 45 1.21 52
## 46 1.22 54
## 47 1.23 59
## 48 1.24 66
## 49 1.25 66
## 50 1.26 61
## 51 1.27 44
## 52 1.28 56
## 53 1.29 56
## 54 1.30 51
## 55 1.31 58
## 56 1.32 66
## 57 1.33 66
## 58 1.34 75
## 59 1.35 73
## 60 1.36 81
## 61 1.37 88
## 62 1.38 86
## 63 1.39 69
## 64 1.40 77
## 65 1.41 81
## 66 1.42 85
## 67 1.43 95
## 68 1.44 80
## 69 1.45 94
## 70 1.46 104
## 71 1.47 77
## 72 1.48 86
## 73 1.49 72
## 74 1.50 82
## 75 1.51 88
## 76 1.52 73
## 77 1.53 75
## 78 1.54 75
## 79 1.55 96
## 80 1.56 76
## 81 1.57 78
## 82 1.58 73
## 83 1.59 89
## 84 1.60 78
## 85 1.61 80
## 86 1.62 72
## 87 1.63 76
## 88 1.64 74
## 89 1.65 76
## 90 1.66 75
## 91 1.67 58
## 92 1.68 60
## 93 1.69 77
## 94 1.70 58
## 95 1.71 62
## 96 1.72 56
## 97 1.73 67
## 98 1.74 46
## 99 1.75 58
## 100 1.76 54
## 101 1.77 55
## 102 1.78 52
## 103 1.79 52
## 104 1.80 59
## 105 1.81 44
## 106 1.82 49
## 107 1.83 49
## 108 1.84 44
## 109 1.85 48
## 110 1.86 45
## 111 1.87 44
## 112 1.88 43
## 113 1.89 36
## 114 1.90 41
## 115 1.91 47
## 116 1.92 40
## 117 1.93 35
## 118 1.94 35
## 119 1.95 37
## 120 1.96 34
## 121 1.97 34
## 122 1.98 31
## 123 1.99 26
## 124 2.00 23
## 125 2.01 18
## 126 2.02 33
## 127 2.03 30
## 128 2.04 25
## 129 2.05 33
## 130 2.06 23
## 131 2.07 26
## 132 2.08 22
## 133 2.09 16
## 134 2.10 18
## 135 2.11 21
## 136 2.12 16
## 137 2.13 17
## 138 2.14 17
## 139 2.15 21
## 140 2.16 24
## 141 2.17 8
## 142 2.18 16
## 143 2.19 15
## 144 2.20 8
## 145 2.21 18
## 146 2.22 11
## 147 2.23 7
## 148 2.24 9
## 149 2.25 9
## 150 2.26 8
## 151 2.27 12
## 152 2.28 6
## 153 2.29 3
## 154 2.30 4
## 155 2.31 9
## 156 2.32 6
## 157 2.33 7
## 158 2.34 5
## 159 2.35 4
## 160 2.36 6
## 161 2.37 5
## 162 2.38 5
## 163 2.39 9
## 164 2.40 5
## 165 2.41 5
## 166 2.42 2
## 167 2.43 5
## 168 2.44 6
## 169 2.45 4
## 170 2.46 4
## 171 2.48 2
## 172 2.49 3
## 173 2.50 3
## 174 2.51 1
## 175 2.52 2
## 176 2.53 1
## 177 2.54 1
## 178 2.55 2
## 179 2.56 2
## 180 2.57 1
## 181 2.59 1
## 182 2.60 1
## 183 2.62 2
## 184 2.64 2
## 185 2.66 4
## 186 2.67 1
## 187 2.69 2
## 188 2.71 3
## 189 2.72 2
## 190 2.73 1
## 191 2.78 1
mean(data$average_price)
## [1] 1.575117
median(data$average_price)
## [1] 1.55
cor(data$total_volume,data$average_price)
## [1] 0.08112979
data <- read.csv("avocado_conventional.csv")
head(data)
## date average_price total_volume type year geography
## 1 12/3/2017 1.39 139970 conventional 2017 Albany
## 2 12/3/2017 1.07 504933 conventional 2017 Atlanta
## 3 12/3/2017 1.43 658939 conventional 2017 Baltimore/Washington
## 4 12/3/2017 1.14 86646 conventional 2017 Boise
## 5 12/3/2017 1.40 488588 conventional 2017 Boston
## 6 12/3/2017 1.13 153282 conventional 2017 Buffalo/Rochester
## Mileage
## 1 2832
## 2 2199
## 3 2679
## 4 827
## 5 2998
## 6 2552
#install.packages('plyr')
library(plyr)
count(data, 'geography')
## geography freq
## 1 Albany 154
## 2 Atlanta 154
## 3 Baltimore/Washington 154
## 4 Boise 154
## 5 Boston 154
## 6 Buffalo/Rochester 154
## 7 Charlotte 154
## 8 Chicago 154
## 9 Cincinnati/Dayton 154
## 10 Columbus 154
## 11 Dallas/Ft. Worth 154
## 12 Denver 154
## 13 Detroit 154
## 14 Grand Rapids 154
## 15 Harrisburg/Scranton 154
## 16 Hartford/Springfield 154
## 17 Houston 154
## 18 Indianapolis 154
## 19 Jacksonville 154
## 20 Las Vegas 154
## 21 Los Angeles 154
## 22 Louisville 154
## 23 Miami/Ft. Lauderdale 154
## 24 Nashville 154
## 25 New Orleans/Mobile 154
## 26 New York 154
## 27 Orlando 154
## 28 Philadelphia 154
## 29 Phoenix/Tucson 154
## 30 Pittsburgh 154
## 31 Portland 154
## 32 Raleigh/Greensboro 154
## 33 Richmond/Norfolk 154
## 34 Sacramento 154
## 35 San Diego 154
## 36 San Francisco 154
## 37 Seattle 154
## 38 Spokane 154
## 39 St. Louis 154
## 40 Syracuse 154
## 41 Tampa 154
count(data, 'average_price')
## average_price freq
## 1 0.50 1
## 2 0.51 1
## 3 0.53 1
## 4 0.54 2
## 5 0.56 4
## 6 0.57 1
## 7 0.58 2
## 8 0.59 4
## 9 0.60 1
## 10 0.61 4
## 11 0.62 7
## 12 0.63 3
## 13 0.64 8
## 14 0.65 8
## 15 0.66 12
## 16 0.67 15
## 17 0.68 15
## 18 0.69 10
## 19 0.70 21
## 20 0.71 20
## 21 0.72 23
## 22 0.73 32
## 23 0.74 34
## 24 0.75 25
## 25 0.76 36
## 26 0.77 29
## 27 0.78 35
## 28 0.79 24
## 29 0.80 39
## 30 0.81 46
## 31 0.82 37
## 32 0.83 40
## 33 0.84 52
## 34 0.85 48
## 35 0.86 48
## 36 0.87 55
## 37 0.88 61
## 38 0.89 77
## 39 0.90 70
## 40 0.91 81
## 41 0.92 81
## 42 0.93 84
## 43 0.94 80
## 44 0.95 97
## 45 0.96 85
## 46 0.97 96
## 47 0.98 101
## 48 0.99 100
## 49 1.00 104
## 50 1.01 119
## 51 1.02 82
## 52 1.03 114
## 53 1.04 116
## 54 1.05 87
## 55 1.06 100
## 56 1.07 91
## 57 1.08 94
## 58 1.09 114
## 59 1.10 114
## 60 1.11 88
## 61 1.12 96
## 62 1.13 107
## 63 1.14 136
## 64 1.15 109
## 65 1.16 119
## 66 1.17 115
## 67 1.18 104
## 68 1.19 102
## 69 1.20 108
## 70 1.21 81
## 71 1.22 87
## 72 1.23 76
## 73 1.24 76
## 74 1.25 82
## 75 1.26 82
## 76 1.27 90
## 77 1.28 86
## 78 1.29 97
## 79 1.30 68
## 80 1.31 80
## 81 1.32 75
## 82 1.33 54
## 83 1.34 62
## 84 1.35 71
## 85 1.36 70
## 86 1.37 61
## 87 1.38 69
## 88 1.39 60
## 89 1.40 54
## 90 1.41 60
## 91 1.42 61
## 92 1.43 44
## 93 1.44 40
## 94 1.45 43
## 95 1.46 36
## 96 1.47 41
## 97 1.48 33
## 98 1.49 33
## 99 1.50 41
## 100 1.51 30
## 101 1.52 35
## 102 1.53 42
## 103 1.54 18
## 104 1.55 23
## 105 1.56 19
## 106 1.57 19
## 107 1.58 16
## 108 1.59 22
## 109 1.60 17
## 110 1.61 15
## 111 1.62 11
## 112 1.63 10
## 113 1.64 13
## 114 1.65 10
## 115 1.66 11
## 116 1.67 11
## 117 1.68 7
## 118 1.69 7
## 119 1.70 7
## 120 1.71 4
## 121 1.72 9
## 122 1.73 3
## 123 1.74 8
## 124 1.75 6
## 125 1.76 4
## 126 1.77 7
## 127 1.78 6
## 128 1.79 4
## 129 1.80 4
## 130 1.81 3
## 131 1.82 4
## 132 1.83 2
## 133 1.84 1
## 134 1.86 2
## 135 1.87 2
## 136 1.88 1
## 137 1.89 2
## 138 1.90 1
## 139 1.91 2
## 140 1.92 1
## 141 1.95 1
## 142 1.96 2
## 143 1.98 2
## 144 2.01 1
## 145 2.02 1
mean(data$average_price)
## [1] 1.142566
median(data$average_price)
## [1] 1.13
cor(data$total_volume,data$average_price)
## [1] -0.177141
To calculate Price Elasticity of Demand we use the formula: PE = (ΔQ/ΔP) * (P/Q) # (Iacobacci, 2015, p.134-135).
(ΔQ/ΔP) is determined by the coefficient in our regression analysis below. Here Beta represents the change in the dependent variable y with respect to x (i.e. Δy/Δx = (ΔQ/ΔP)). To determine (P/Q) we will use the average price and average sales volume (Salem, 2014).
data <- read.csv("avocado_2020.csv")
plot(total_volume ~ average_price, data)
regr <- lm(total_volume ~ average_price, data)
abline(regr, col='red')
coefficients(regr)
## (Intercept) average_price
## 1179544.8 -628686.6
Beta <- regr$coefficients[["average_price"]]
P <- mean(data$average_price)
Q <- mean(data$total_volume)
elasticity <-Beta*P/Q
elasticity
## [1] -2.626474
data <- read.csv("avocado_2017_18.csv")
plot(total_volume ~ average_price, data)
regr <- lm(total_volume ~ average_price, data)
abline(regr, col='red')
coefficients(regr)
## (Intercept) average_price
## 1330796.1 -752077.7
Beta <- regr$coefficients[["average_price"]]
P <- mean(data$average_price)
Q <- mean(data$total_volume)
elasticity <-Beta*P/Q
elasticity
## [1] -3.405812
data <- read.csv("avocado_2019_20.csv")
plot(total_volume ~ average_price, data)
regr <- lm(total_volume ~ average_price, data)
abline(regr, col='red')
coefficients(regr)
## (Intercept) average_price
## 1118604.9 -576686.2
Beta <- regr$coefficients[["average_price"]]
P <- mean(data$average_price)
Q <- mean(data$total_volume)
elasticity <-Beta*P/Q
elasticity
## [1] -2.31154
data <- read.csv("avocado_organic.csv")
plot(total_volume ~ average_price, data)
regr <- lm(total_volume ~ average_price, data)
abline(regr, col='red')
coefficients(regr)
## (Intercept) average_price
## 13574.048 7394.563
Beta <- regr$coefficients[["average_price"]]
P <- mean(data$average_price)
Q <- mean(data$total_volume)
elasticity <-Beta*P/Q
elasticity
## [1] 0.4618033
data <- read.csv("avocado_conventional.csv")
plot(total_volume ~ average_price, data)
regr <- lm(total_volume ~ average_price, data)
abline(regr, col='red')
coefficients(regr)
## (Intercept) average_price
## 1159708.1 -467728.6
Beta <- regr$coefficients[["average_price"]]
P <- mean(data$average_price)
Q <- mean(data$total_volume)
elasticity <-Beta*P/Q
elasticity
## [1] -0.8546503
The elasticity of demand is when the change in the price of a product has the inverse effect on demand- as price goes up, demand goes down and when price goes down, demand goes up. Avocados have elastic demand, as depicted in this analysis. For every $1 increase in avocado price, sales will decrease by 2.626474 units. The demand for avocados is much higher below the $1 price and demand begins to decrease above this mark and continues to fall with the most significant decline in demand at $1.75. It is difficult to identify which consumers are purchasing at this price point, but it could be restaurants.
Salem, 2014. Price Elasticity with R. http://www.salemmarafi.com/code/price-elasticity-with-r/
Xu, 2021. Avocado Pricing at Retail. https://rpubs.com/utjimmyx/avocado_pricing
rStudio.(n.d.)R Markdown: The Definitive Guide. Retrieved October 9,2021,from https://rmarkdown.rstudio.com/index.html
install.packages("forecast")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.1'
## (as 'lib' is unspecified)
library(readxl)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
data<- read.csv("avocado_2020.csv")
tsdata<-ts(data$average_price,frequency=52,start=c(2017, 12))
plot(tsdata)
autoarima1<-auto.arima(tsdata)
forecast1<-forecast(autoarima1, h=17)
forecast1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2260.058 0.8417413 0.5050377 1.178445 0.3267976 1.356685
## 2260.077 1.2754582 0.9387530 1.612163 0.7605121 1.790404
## 2260.096 0.9339693 0.5831672 1.284771 0.3974638 1.470475
## 2260.115 1.2752001 0.9232066 1.627194 0.7368725 1.813528
## 2260.135 1.0572080 0.6846290 1.429787 0.4873976 1.627018
## 2260.154 1.3194826 0.9405657 1.698400 0.7399792 1.898986
## 2260.173 1.0612866 0.6762926 1.446281 0.4724891 1.650084
## 2260.192 1.3098669 0.9214823 1.698252 0.7158840 1.903850
## 2260.212 1.0914930 0.6972070 1.485779 0.4884846 1.694501
## 2260.231 1.2898736 0.8916897 1.688058 0.6809038 1.898843
## 2260.250 1.0901358 0.6877048 1.492567 0.4746707 1.705601
## 2260.269 1.2715660 0.8668298 1.676302 0.6525755 1.890556
## 2260.288 1.1081544 0.6999667 1.516342 0.4838852 1.732424
## 2260.308 1.2687335 0.8586090 1.678858 0.6415022 1.895965
## 2260.327 1.1195775 0.7066106 1.532544 0.4879991 1.751156
## 2260.346 1.2661660 0.8518491 1.680483 0.6325230 1.899809
## 2260.365 1.1270718 0.7105043 1.543639 0.4899867 1.764157
plot(forecast1)
plot(forecast1$residuals)
qqnorm(forecast1$residuals)
acf(forecast1$residuals)
pacf(forecast1$residuals)
summary(autoarima1)
## Series: tsdata
## ARIMA(5,1,1)(1,0,0)[52]
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 sar1
## -0.0098 0.2795 -0.0968 0.2675 -0.1628 -0.9871 -0.0200
## s.e. 0.0092 0.0086 0.0088 0.0086 0.0089 0.0014 0.0096
##
## sigma^2 estimated as 0.06903: log likelihood=-1037.98
## AIC=2091.96 AICc=2091.97 BIC=2151.51
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0009515606 0.262648 0.2047202 -3.862788 15.8914 0.7099497
## ACF1
## Training set 0.0252883
plot(forecast1)