GlobalScholars_PovertyProject

Importing the Dataset

remove(list=ls())

Vegetable.Prices.2022 <- read.csv("~/Downloads/Vegetable-Prices-2022.csv")

Installing Packages

library(visdat)
library(stargazer)

Please cite as: 
 Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
 R package version 5.2.3. https://CRAN.R-project.org/package=stargazer 
library(stargazer)
library(psych)
#install.packages("nanair")
library(naniar)
library(ggplot2)

Attaching package: 'ggplot2'
The following objects are masked from 'package:psych':

    %+%, alpha
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# install.packages("Hmisc")
library("Hmisc")

Attaching package: 'Hmisc'
The following objects are masked from 'package:dplyr':

    src, summarize
The following object is masked from 'package:psych':

    describe
The following objects are masked from 'package:base':

    format.pval, units
library(MASS)

Attaching package: 'MASS'
The following object is masked from 'package:dplyr':

    select

Cleaning the data / Checking for missing data

vis_dat(Vegetable.Prices.2022)

vis_miss(Vegetable.Prices.2022)

missing_values_count <- sapply(Vegetable.Prices.2022, function(x) sum(is.na(x)))
print(missing_values_count)
         Vegetable               Form        RetailPrice    RetailPriceUnit 
                 0                  0                  0                  0 
             Yield  CupEquivalentSize  CupEquivalentUnit CupEquivalentPrice 
                 0                  0                  0                  0 
# Vegetable.Prices.2022$TIDAL.Popularity <- NULL

#Renaming Variables in Dataset

head(Vegetable.Prices.2022, n=10)
      Vegetable   Form RetailPrice RetailPriceUnit  Yield CupEquivalentSize
1  Acorn squash  Fresh      1.2136       per pound 0.4586            0.4519
2     Artichoke  Fresh      2.4703       per pound 0.3750            0.3858
3     Artichoke Canned      3.4498       per pound 0.6500            0.3858
4     Asparagus  Fresh      2.9531       per pound 0.4938            0.3968
5     Asparagus Canned      3.4328       per pound 0.6500            0.3968
6     Asparagus Frozen      6.8212       per pound 1.0335            0.3968
7      Avocados  Fresh      2.6737       per pound 0.7408            0.3197
8         Beets Canned      1.1431       per pound 0.6500            0.3748
9   Black beans Canned      1.2387       per pound 0.6500            0.3858
10  Black beans  Dried      1.5250       per pound 2.4692            0.3858
   CupEquivalentUnit CupEquivalentPrice
1             pounds             1.1961
2             pounds             2.5415
3             pounds             2.0476
4             pounds             2.3731
5             pounds             2.0958
6             pounds             2.6191
7             pounds             1.1538
8             pounds             0.6591
9             pounds             0.7352
10            pounds             0.2383
print(Vegetable.Prices.2022$Vegetable['Asparagus'])
[1] NA

Summary Statistics

stargazer(Vegetable.Prices.2022,
          type = "text",
          title = "Summary statistics",
          digits = 2)

Summary statistics
=============================================
Statistic          N  Mean St. Dev. Min  Max 
---------------------------------------------
RetailPrice        93 2.11   1.08   0.80 6.82
Yield              93 0.96   0.49   0.38 2.54
CupEquivalentSize  93 0.34   0.07   0.15 0.54
CupEquivalentPrice 93 0.83   0.51   0.22 2.62
---------------------------------------------

ggplots

ggplot(data = Vegetable.Prices.2022, 
       mapping = aes(x = RetailPrice, y = Yield)) + geom_point(colour = "lightblue") + ggtitle("2022 Vegetable Prices; Retail Price by Yeild")

ggplot(data = Vegetable.Prices.2022, 
       mapping = aes(x = Vegetable, y = RetailPrice)) + geom_point(colour = "lightblue") + ggtitle("2022 Vegetable Prices; Retail Price by Yeild")

Mass Food Access Dataset

Food.Access.Research.Atlas <- read.csv("~/Downloads/2019 Food Access Research Atlas Data/Food Access Research Atlas.csv", header=FALSE)

#sub_data <- Food.Access.Research.Atlas[Food.Access.Research.Atlas$V2 ==S Massachusetts, ]
#vis_dat(Food.Access.Research.Atlas)
#vis_miss(Food.Access.Research.Atlas)
#missing_values_count <- sapply(Food.Access.Research.Atlas, function(x) sum(is.na(x)))
#print(missing_values_count)

str(Food.Access.Research.Atlas)
'data.frame':   72532 obs. of  147 variables:
 $ V1  : chr  "CensusTract" "1001020100" "1001020200" "1001020300" ...
 $ V2  : chr  "State" "Alabama" "Alabama" "Alabama" ...
 $ V3  : chr  "County" "Autauga County" "Autauga County" "Autauga County" ...
 $ V4  : chr  "Urban" "1" "1" "1" ...
 $ V5  : chr  "Pop2010" "1912" "2170" "3373" ...
 $ V6  : chr  "OHU2010" "693" "743" "1256" ...
 $ V7  : chr  "GroupQuartersFlag" "0" "0" "0" ...
 $ V8  : chr  "NUMGQTRS" "0" "181" "0" ...
 $ V9  : chr  "PCTGQTRS" "0" "8.34" "0" ...
 $ V10 : chr  "LILATracts_1And10" "0" "1" "0" ...
 $ V11 : chr  "LILATracts_halfAnd10" "0" "1" "0" ...
 $ V12 : chr  "LILATracts_1And20" "0" "1" "0" ...
 $ V13 : chr  "LILATracts_Vehicle" "0" "0" "0" ...
 $ V14 : chr  "HUNVFlag" "0" "0" "0" ...
 $ V15 : chr  "LowIncomeTracts" "0" "1" "0" ...
 $ V16 : chr  "PovertyRate" "11.3" "17.9" "15" ...
 $ V17 : chr  "MedianFamilyIncome" "81250" "49000" "62609" ...
 $ V18 : chr  "LA1and10" "1" "1" "1" ...
 $ V19 : chr  "LAhalfand10" "1" "1" "1" ...
 $ V20 : chr  "LA1and20" "1" "1" "1" ...
 $ V21 : chr  "LATracts_half" "1" "1" "1" ...
 $ V22 : chr  "LATracts1" "1" "1" "1" ...
 $ V23 : chr  "LATracts10" "0" "0" "0" ...
 $ V24 : chr  "LATracts20" "0" "0" "0" ...
 $ V25 : chr  "LATractsVehicle_20" "0" "0" "0" ...
 $ V26 : chr  "LAPOP1_10" "1896" "1261" "1552" ...
 $ V27 : chr  "LAPOP05_10" "1912" "2170" "2857" ...
 $ V28 : chr  "LAPOP1_20" "1896" "1261" "1552" ...
 $ V29 : chr  "LALOWI1_10" "461" "604" "478" ...
 $ V30 : chr  "LALOWI05_10" "467" "962" "971" ...
 $ V31 : chr  "LALOWI1_20" "461" "604" "478" ...
 $ V32 : chr  "lapophalf" "1912" "2170" "2857" ...
 $ V33 : chr  "lapophalfshare" "100" "100" "84.7" ...
 $ V34 : chr  "lalowihalf" "467" "962" "971" ...
 $ V35 : chr  "lalowihalfshare" "24.42" "44.34" "28.79" ...
 $ V36 : chr  "lakidshalf" "507" "606" "771" ...
 $ V37 : chr  "lakidshalfshare" "26.52" "27.93" "22.86" ...
 $ V38 : chr  "laseniorshalf" "221" "214" "358" ...
 $ V39 : chr  "laseniorshalfshare" "11.56" "9.86" "10.6" ...
 $ V40 : chr  "lawhitehalf" "1622" "888" "2177" ...
 $ V41 : chr  "lawhitehalfshare" "84.83" "40.92" "64.53" ...
 $ V42 : chr  "lablackhalf" "217" "1217" "554" ...
 $ V43 : chr  "lablackhalfshare" "11.35" "56.08" "16.43" ...
 $ V44 : chr  "laasianhalf" "14" "5" "10" ...
 $ V45 : chr  "laasianhalfshare" "0.73" "0.23" "0.3" ...
 $ V46 : chr  "lanhopihalf" "0" "0" "1" ...
 $ V47 : chr  "lanhopihalfshare" "0" "0" "0.03" ...
 $ V48 : chr  "laaianhalf" "14" "5" "10" ...
 $ V49 : chr  "laaianhalfshare" "0.73" "0.23" "0.3" ...
 $ V50 : chr  "laomultirhalf" "45" "55" "105" ...
 $ V51 : chr  "laomultirhalfshare" "2.35" "2.53" "3.1" ...
 $ V52 : chr  "lahisphalf" "44" "75" "78" ...
 $ V53 : chr  "lahisphalfshare" "2.3" "3.46" "2.3" ...
 $ V54 : chr  "lahunvhalf" "5" "93" "39" ...
 $ V55 : chr  "lahunvhalfshare" "0.79" "12.47" "3.09" ...
 $ V56 : chr  "lasnaphalf" "92" "161" "139" ...
 $ V57 : chr  "lasnaphalfshare" "13.33" "21.7" "11.05" ...
 $ V58 : chr  "lapop1" "1896" "1261" "1552" ...
 $ V59 : chr  "lapop1share" "99.19" "58.11" "46" ...
 $ V60 : chr  "lalowi1" "461" "604" "478" ...
 $ V61 : chr  "lalowi1share" "24.11" "27.83" "14.18" ...
 $ V62 : chr  "lakids1" "504" "406" "416" ...
 $ V63 : chr  "lakids1share" "26.33" "18.69" "12.34" ...
 $ V64 : chr  "laseniors1" "219" "127" "201" ...
 $ V65 : chr  "laseniors1share" "11.44" "5.83" "5.96" ...
 $ V66 : chr  "lawhite1" "1611" "357" "1242" ...
 $ V67 : chr  "lawhite1share" "84.26" "16.43" "36.81" ...
 $ V68 : chr  "lablack1" "214" "854" "255" ...
 $ V69 : chr  "lablack1share" "11.17" "39.36" "7.56" ...
 $ V70 : chr  "laasian1" "14" "4" "8" ...
 $ V71 : chr  "laasian1share" "0.72" "0.18" "0.24" ...
 $ V72 : chr  "lanhopi1" "0" "0" "0" ...
 $ V73 : chr  "lanhopi1share" "0" "0" "0" ...
 $ V74 : chr  "laaian1" "14" "4" "2" ...
 $ V75 : chr  "laaian1share" "0.73" "0.2" "0.06" ...
 $ V76 : chr  "laomultir1" "44" "42" "45" ...
 $ V77 : chr  "laomultir1share" "2.31" "1.93" "1.33" ...
 $ V78 : chr  "lahisp1" "43" "33" "36" ...
 $ V79 : chr  "lahisp1share" "2.27" "1.52" "1.08" ...
 $ V80 : chr  "lahunv1" "5" "67" "0" ...
 $ V81 : chr  "lahunv1share" "0.79" "9" "0" ...
 $ V82 : chr  "lasnap1" "92" "96" "74" ...
 $ V83 : chr  "lasnap1share" "13.22" "12.95" "5.87" ...
 $ V84 : chr  "lapop10" "NULL" "NULL" "NULL" ...
 $ V85 : chr  "lapop10share" "NULL" "NULL" "NULL" ...
 $ V86 : chr  "lalowi10" "NULL" "NULL" "NULL" ...
 $ V87 : chr  "lalowi10share" "NULL" "NULL" "NULL" ...
 $ V88 : chr  "lakids10" "NULL" "NULL" "NULL" ...
 $ V89 : chr  "lakids10share" "NULL" "NULL" "NULL" ...
 $ V90 : chr  "laseniors10" "NULL" "NULL" "NULL" ...
 $ V91 : chr  "laseniors10share" "NULL" "NULL" "NULL" ...
 $ V92 : chr  "lawhite10" "NULL" "NULL" "NULL" ...
 $ V93 : chr  "lawhite10share" "NULL" "NULL" "NULL" ...
 $ V94 : chr  "lablack10" "NULL" "NULL" "NULL" ...
 $ V95 : chr  "lablack10share" "NULL" "NULL" "NULL" ...
 $ V96 : chr  "laasian10" "NULL" "NULL" "NULL" ...
 $ V97 : chr  "laasian10share" "NULL" "NULL" "NULL" ...
 $ V98 : chr  "lanhopi10" "NULL" "NULL" "NULL" ...
 $ V99 : chr  "lanhopi10share" "NULL" "NULL" "NULL" ...
  [list output truncated]
df_clean <- na.omit(Food.Access.Research.Atlas)

#Food.Access.Research.Atlas$TIDAL.Popularity <- NULL