- Expected Kwh Annual Production/Output

Get the data from the website

# get URL of where the dataset is located and read it for analysis
URL <- "https://data.ny.gov/api/views/3pzs-2zsk/rows.csv?accessType=DOWNLOAD"
solar_PV <- read.csv(URL)

Get some basic summary of the data like its structure and summary

# show the structure of the dataset
str(solar_PV)
## 'data.frame':    1529 obs. of  11 variables:
##  $Project.Install.Year : int 2010 2010 2010 2010 2010 2010 2010 2010 2010 2010 ... ##$ Contractor                    : Factor w/ 92 levels "1st Light Energy Inc.",..: 58 58 58 58 58 58 58 58 58 58 ...
##  $County : Factor w/ 62 levels "","Albany","Allegany",..: 3 7 12 15 16 17 18 20 23 29 ... ##$ City                          : Factor w/ 364 levels "Airmont","Akron",..: 236 236 236 236 236 236 236 236 236 236 ...
##  $Project.Count.by.City : int 1 1 1 4 2 2 1 1 1 2 ... ##$ Project.Cost                  : num  34440 39000 20198 163678 70272 ...
##  $Incentive..Dollars : num 8050 8820 4428 45080 21070 ... ##$ Total.Nameplate.KW            : num  4.6 5.04 2.53 25.76 12.24 ...
##  $Expected.KWh.Annual.Production: num 5400 5916 2970 30238 14368 ... ##$ Solicitation                  : Factor w/ 1 level "PON 2112": 1 1 1 1 1 1 1 1 1 1 ...