Salaries vary by location: The average salary for tech jobs in different geographical locations (e.g. San Francisco vs. Bentonville) may differ significantly.
Company size is related to employee compensation: Larger companies may offer higher salaries and bonuses than smaller companies.
Job titles and experience level influence salary: Different job titles (e.g. systems engineer vs. senior developer) or experience levels (e.g. total experience vs. employer experience) may be associated with different levels of compensation.
Signing and annual bonuses are offered by certain employer or location: Employers or locations may be more likely to offer signing or annual bonuses to their employees.
Stock value bonus is related to company size or location: Companies in certain locations or of certain sizes may be more likely to offer stock value bonus to their employees.
Employer experience is related to employee compensation: Employees with more experience with their current employer may be compensated more than those with less experience.
#Load Data
df=read.csv("salaries_clean.csv")
df
df
str(df)
## 'data.frame': 1655 obs. of 19 variables:
## $ index : int 0 1 2 3 4 5 6 7 8 9 ...
## $ salary_id : int 1 3 4 6 12 14 16 17 21 23 ...
## $ employer_name : chr "opower" "walmart" "vertical knowledge" "netapp" ...
## $ location_name : chr "san francisco, ca" "bentonville, ar" "cleveland, oh" "waltham" ...
## $ location_state : chr "CA" "AR" "OH" "" ...
## $ location_country : chr "US" "US" "US" "" ...
## $ location_latitude : num 37.8 36.4 41.5 NA NA ...
## $ location_longitude : num -122.4 -94.2 -81.7 NA NA ...
## $ job_title : chr "systems engineer" "senior developer" "software engineer" "mts" ...
## $ job_title_category : chr "Engineering" "Software" "Software" "Other" ...
## $ job_title_rank : chr "" "Senior" "" "" ...
## $ total_experience_years : num 13 15 4 4 4 5 4 8 2 1 ...
## $ employer_experience_years: num 2 8 1 0 3 1.5 2.5 2 1 1 ...
## $ annual_base_pay : num 125000 65000 86000 105000 110000 40000 45000 135000 105000 80000 ...
## $ signing_bonus : num 5000 NA 5000 5000 5000 0 0 0 0 0 ...
## $ annual_bonus : num 0 5000 6000 8500 7000 500 1500 0 47000 0 ...
## $ stock_value_bonus : chr "5000 shares" "3,000" "0" "0" ...
## $ comments : chr "Don't work here." "" "" "" ...
## $ submitted_at : chr "3/21/16 12:58" "3/21/16 12:58" "3/21/16 12:59" "3/21/16 13:00" ...
colnames(df)
## [1] "index" "salary_id"
## [3] "employer_name" "location_name"
## [5] "location_state" "location_country"
## [7] "location_latitude" "location_longitude"
## [9] "job_title" "job_title_category"
## [11] "job_title_rank" "total_experience_years"
## [13] "employer_experience_years" "annual_base_pay"
## [15] "signing_bonus" "annual_bonus"
## [17] "stock_value_bonus" "comments"
## [19] "submitted_at"
summary(df)
## index salary_id employer_name location_name
## Min. : 0.0 Min. : 1 Length:1655 Length:1655
## 1st Qu.: 413.5 1st Qu.: 897 Class :character Class :character
## Median : 827.0 Median :1711 Mode :character Mode :character
## Mean : 827.0 Mean :1684
## 3rd Qu.:1240.5 3rd Qu.:2477
## Max. :1654.0 Max. :3298
##
## location_state location_country location_latitude location_longitude
## Length:1655 Length:1655 Min. :-41.00 Min. :-123.27
## Class :character Class :character 1st Qu.: 37.41 1st Qu.:-102.70
## Mode :character Mode :character Median : 38.58 Median : -95.00
## Mean : 37.75 Mean : -64.71
## 3rd Qu.: 45.44 3rd Qu.: -64.56
## Max. : 65.00 Max. : 174.00
## NA's :863 NA's :863
## job_title job_title_category job_title_rank
## Length:1655 Length:1655 Length:1655
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## total_experience_years employer_experience_years annual_base_pay
## Min. : 0.000 Min. : 0.000 Min. :0.000e+00
## 1st Qu.: 3.000 1st Qu.: 1.000 1st Qu.:6.100e+04
## Median : 5.000 Median : 2.000 Median :9.948e+04
## Mean : 6.756 Mean : 2.656 Mean :6.325e+06
## 3rd Qu.:10.000 3rd Qu.: 3.000 3rd Qu.:1.300e+05
## Max. :56.000 Max. :58.000 Max. :1.000e+10
## NA's :47 NA's :47 NA's :4
## signing_bonus annual_bonus stock_value_bonus comments
## Min. : 0 Min. : 0 Length:1655 Length:1655
## 1st Qu.: 0 1st Qu.: 0 Class :character Class :character
## Median : 0 Median : 500 Mode :character Mode :character
## Mean : 22406 Mean : 11776
## 3rd Qu.: 5000 3rd Qu.: 10000
## Max. :8999999 Max. :2000000
## NA's :323 NA's :319
## submitted_at
## Length:1655
## Class :character
## Mode :character
##
##
##
##
#Plotting Location names
ggplot(df,aes(x=location_name,size=)) + geom_bar()
library(dplyr)
count(df,location_name)
length(df$location_name)
## [1] 1655
Remove all the rows where location name is a number
library(dplyr)
x=filter(df, !is.numeric(location_name))
count(x,location_name)
library(stringr)
select(df,location_name) %>%
filter(!is.na(df$location_name))
na_counts <- colSums(is.na(df))
na_counts
## index salary_id employer_name
## 0 0 0
## location_name location_state location_country
## 0 0 0
## location_latitude location_longitude job_title
## 863 863 0
## job_title_category job_title_rank total_experience_years
## 0 0 47
## employer_experience_years annual_base_pay signing_bonus
## 47 4 323
## annual_bonus stock_value_bonus comments
## 319 0 0
## submitted_at
## 0
empty_string_counts <- lapply(df, function(x) sum(nchar(x) == 0))
empty_string_counts
## $index
## [1] 0
##
## $salary_id
## [1] 0
##
## $employer_name
## [1] 4
##
## $location_name
## [1] 0
##
## $location_state
## [1] 1097
##
## $location_country
## [1] 863
##
## $location_latitude
## [1] NA
##
## $location_longitude
## [1] NA
##
## $job_title
## [1] 0
##
## $job_title_category
## [1] 0
##
## $job_title_rank
## [1] 1230
##
## $total_experience_years
## [1] NA
##
## $employer_experience_years
## [1] NA
##
## $annual_base_pay
## [1] NA
##
## $signing_bonus
## [1] NA
##
## $annual_bonus
## [1] NA
##
## $stock_value_bonus
## [1] 402
##
## $comments
## [1] 1363
##
## $submitted_at
## [1] 0
total_missing_counts <- lapply(df, function(x) sum(is.na(x) | nchar(x) == 0))
# convert the total_missing_counts vector to a dataframe
missing_counts_df <- data.frame(feature = names(total_missing_counts),
missing_count = as.numeric(total_missing_counts))
# calculate the percentage of missing values for each feature
missing_counts_df <- missing_counts_df %>%
mutate(missing_percent = (missing_count / nrow(df)) * 100)
# create the bar plot using the missing_percent column
ggplot(missing_counts_df, aes(y = feature, x = missing_percent)) +
geom_bar(stat = "identity") +
ylab("Feature") +
xlab("Missing Value Percent") +
xlim(c(0,100))+
ggtitle("Missing Value Percent by Feature")
# Using features PAY : annual_base_pay, # EXPERIENCE :
total_experience_years, employer_experience_years, # LOCATION : #
EMPLOYER : employer_name # JOB INFO : job_title_rank , job_title
`
library(stringdist)
location_name_cleaned <- as.character(df$location_name)
location_name_cleaned <- tolower(location_name_cleaned)
location_name_cleaned<-trimws(location_name_cleaned)
# remove numbers and special characters
location_name_cleaned<- gsub("[0-9!@#$%^&*()_+=-]", "", location_name_cleaned)
location_name_cleaned
## [1] "san francisco, ca"
## [2] "bentonville, ar"
## [3] "cleveland, oh"
## [4] "waltham"
## [5] "cupertino"
## [6] "eastern oregon"
## [7] "madison, wi"
## [8] "new york city"
## [9] "nyc"
## [10] "san francisco"
## [11] "denver, co"
## [12] "chicago"
## [13] "west lafayette, in"
## [14] "minneapolis"
## [15] "minneapolis"
## [16] "bordeaux"
## [17] "seattle"
## [18] "charleston, sc"
## [19] "bala cynwyd, pa"
## [20] "toronto, canada"
## [21] "durham, nc"
## [22] "seattle"
## [23] "new york"
## [24] "san francisco"
## [25] "santa clara"
## [26] "chicago"
## [27] "boulder co"
## [28] "bucharest, romania"
## [29] "stockholm, sweden"
## [30] "sterling, va"
## [31] "orlando, florida"
## [32] "detroit, mi"
## [33] "san francisco, ca"
## [34] "hillsboro, oregon"
## [35] "austin, tx"
## [36] "denver, co"
## [37] "washington dc"
## [38] "wellington, new zealand"
## [39] "denver, co"
## [40] "albany, ny"
## [41] "ct"
## [42] "san francisco"
## [43] "joplin mo"
## [44] "jersey city, nj"
## [45] "phils"
## [46] "bristol, uk"
## [47] "philadelphia, pa"
## [48] "san jose, ca"
## [49] "cambridge, ma"
## [50] "san francisco"
## [51] "san francisco"
## [52] "barcelona, spain"
## [53] "redmond, wa"
## [54] "new york city"
## [55] "san francisco, ca"
## [56] "ithaca, ny"
## [57] "redmond, wa"
## [58] "amsterdam"
## [59] "seattle"
## [60] "oklahoma city, ok"
## [61] "chicago"
## [62] "san francisco"
## [63] "raleigh, nc"
## [64] "verona, wisconsin"
## [65] "karlstad, sweden"
## [66] "nj"
## [67] "lubbock, tx"
## [68] "raleigh, nc"
## [69] "new york, ny"
## [70] "london, uk"
## [71] "sunnyvale"
## [72] "poland"
## [73] "new york city, ny"
## [74] "dallas"
## [75] "san francisco, ca"
## [76] "seattel, wa"
## [77] ""
## [78] ""
## [79] ""
## [80] ""
## [81] ""
## [82] ""
## [83] ""
## [84] ""
## [85] ""
## [86] ""
## [87] ""
## [88] ""
## [89] "london"
## [90] "hillsboro, oregon"
## [91] "mountain view"
## [92] "bothell"
## [93] ""
## [94] ""
## [95] "israel"
## [96] "bremerhaven, germany"
## [97] ""
## [98] "berlin"
## [99] "palo alto"
## [100] "dallas tx"
## [101] "austin"
## [102] "seattle"
## [103] "san francisco"
## [104] "berlin"
## [105] "san francisco"
## [106] "seattle"
## [107] "johnston, iowa"
## [108] "california"
## [109] "omaha, ne"
## [110] "plymouth, mn"
## [111] "boston metro"
## [112] "phoenix, arizona"
## [113] "sunnyvale"
## [114] "seattle, wa"
## [115] "san francisco"
## [116] "culver city, ca"
## [117] "menlo park"
## [118] "seattle"
## [119] "richmond, ca"
## [120] "solon, oh"
## [121] "seattle"
## [122] "ìngelholm, sweden"
## [123] "remote us"
## [124] "redmond"
## [125] "stuttgart"
## [126] "san francisco"
## [127] "helsinki"
## [128] "albuquerque, nm"
## [129] "mountain view"
## [130] "cupertino"
## [131] "dallas"
## [132] "washington d.c."
## [133] "palo alto"
## [134] "rennes, france"
## [135] "regina, sk canada"
## [136] "buffalo, ny"
## [137] "atlanta"
## [138] "sf"
## [139] "san francisco, ca"
## [140] "olathe, ks"
## [141] "norman, ok"
## [142] "wilton, ct usa"
## [143] "philadelphia"
## [144] "united states"
## [145] "seattle, wa"
## [146] "columbus, oh"
## [147] "palo alto"
## [148] "denver, co"
## [149] "sf"
## [150] "austin, tx"
## [151] "oakland, ca"
## [152] "cincinatti"
## [153] "san diego, ca"
## [154] "detroit, mi"
## [155] "stockholm, sweden"
## [156] "southampton, uk"
## [157] "london"
## [158] "seattle, wa"
## [159] "tampa"
## [160] "oregon"
## [161] "puerto rico"
## [162] "gaithersburg, md"
## [163] "seattle"
## [164] "warsaw, poland"
## [165] "nyc"
## [166] "philadelphia"
## [167] "redmond"
## [168] "asunciìn, paraguay"
## [169] "dublin, ireland"
## [170] "nyc"
## [171] "new york, new york"
## [172] "northern california"
## [173] "toronto, canada"
## [174] "seattle"
## [175] "vancouver, canada"
## [176] "canada"
## [177] "denver, co"
## [178] "kingston, canada"
## [179] "boston"
## [180] "rolla, mo, usa"
## [181] "boston"
## [182] "remote"
## [183] "boulder, co"
## [184] "san francisco"
## [185] "san francisco"
## [186] "redmond"
## [187] "mountain view"
## [188] "seattle"
## [189] "menlo park"
## [190] "chicago"
## [191] "seattle, wa"
## [192] "san francisco"
## [193] "seattle, wa"
## [194] "bangalore, india"
## [195] "salt lake city"
## [196] "san francisco, ca"
## [197] "menlo park"
## [198] "cape town"
## [199] "bristol, uk"
## [200] "london"
## [201] "wellington"
## [202] "mountain view"
## [203] "houston"
## [204] "mountain view"
## [205] "boston"
## [206] "stuart, fl"
## [207] "boston"
## [208] "menlo park"
## [209] "nc"
## [210] "north carolina"
## [211] "los angeles"
## [212] "bengaluru"
## [213] "atlanta, georgia"
## [214] "seattle, wa usa"
## [215] "chicago"
## [216] "redmon"
## [217] "oslo, norway"
## [218] "los gatos"
## [219] "new york city"
## [220] "san francisco"
## [221] "owego, ny"
## [222] "anaheim, ca"
## [223] "boston, ma"
## [224] "vancouver"
## [225] "santa clara, ca"
## [226] "san francisco"
## [227] "new york city"
## [228] "florida"
## [229] "los gatos"
## [230] "london"
## [231] "san francisco"
## [232] "frankfurt"
## [233] "rochester, ny"
## [234] "san francisco, ca"
## [235] "redmond, wa"
## [236] "canada"
## [237] "san francisco"
## [238] "college station, tx"
## [239] "san francisco"
## [240] "chinz"
## [241] "sf"
## [242] "cambridge, ma"
## [243] "washington dc"
## [244] "mountain view"
## [245] "new york"
## [246] "new york city"
## [247] "san francisco"
## [248] "chico ca"
## [249] "ca"
## [250] "oklahoma city, ok"
## [251] "san francisco"
## [252] "stockholm, sweden"
## [253] "nordrheinwestfalen"
## [254] "paris"
## [255] "san francisco"
## [256] "austin"
## [257] "radnor, pa"
## [258] "san francisco"
## [259] "nyc"
## [260] "kansas city, mo"
## [261] "redmond, wa"
## [262] "monaco"
## [263] "calgary, canada"
## [264] "yorktown,ny"
## [265] "seattle, wa"
## [266] "rochester ny"
## [267] "dortmund, germany"
## [268] "w"
## [269] "austin"
## [270] "sf"
## [271] "gainesville, fl"
## [272] "vancouver"
## [273] "san francisco"
## [274] "hillsboro"
## [275] "vancouver bc"
## [276] "sweden"
## [277] "lexington park, md"
## [278] "washington, dc"
## [279] "austin, tx"
## [280] "halifax, ns, canada"
## [281] "schenectady, ny"
## [282] "yeranus"
## [283] "washington, dc"
## [284] "london"
## [285] "arizona"
## [286] "woodland hills"
## [287] "pasadena"
## [288] "nyc"
## [289] "berlin"
## [290] "san francisco"
## [291] "richmond, ky"
## [292] "new york, ny"
## [293] "seattle"
## [294] "compton ca"
## [295] "palo alto"
## [296] "washington, dc"
## [297] "canada"
## [298] "chicago, il"
## [299] "your momma's buttcrack"
## [300] "seattle"
## [301] "amsterdam"
## [302] "menlo park"
## [303] "new york"
## [304] "new york city"
## [305] "houston, tx"
## [306] "schenectady, ny"
## [307] "warsaw, poland"
## [308] "redmond"
## [309] "toronto"
## [310] "portland, or"
## [311] "ann arbor, mi"
## [312] "nyc"
## [313] "clemson, sc"
## [314] "uk"
## [315] "cambridge, ma"
## [316] "south san francisco"
## [317] "san francisco palo alto, ca"
## [318] "new york"
## [319] "san francisco"
## [320] "amsterdam"
## [321] "san jose"
## [322] "south africa"
## [323] "fayetteville, ar"
## [324] "seattle"
## [325] "london"
## [326] "germany"
## [327] "pleasanton, ca"
## [328] "san jose, ca"
## [329] "lincoln, ne"
## [330] "sterling, va"
## [331] "san francisco"
## [332] "san francisco"
## [333] "p"
## [334] "chicago"
## [335] "madison, wi"
## [336] "dsvao"
## [337] "bellevue, wa"
## [338] "minneapolis, mn"
## [339] "toronto"
## [340] "san francisco"
## [341] "seattle"
## [342] "yonkers, ny"
## [343] "austin"
## [344] "redmond, wa"
## [345] "new york city"
## [346] "san francisco"
## [347] "munich"
## [348] "seattle"
## [349] "san diego"
## [350] "natick"
## [351] "berlin"
## [352] "sydney, australia"
## [353] "san francisco"
## [354] "boise, id remote, job is based in nyc"
## [355] "mountain view"
## [356] "sao paulo"
## [357] "bay area"
## [358] "sf"
## [359] "toronto"
## [360] "austin, tx"
## [361] "brazil"
## [362] "new york"
## [363] "san francisco, ca"
## [364] "london"
## [365] "london"
## [366] "san francisco"
## [367] "dublin ireland"
## [368] "pune"
## [369] "berlin"
## [370] "cambridge, ma"
## [371] "seattle"
## [372] "london"
## [373] "san francisco"
## [374] "ireland"
## [375] "menlo park"
## [376] "test"
## [377] "chicago"
## [378] "seattle"
## [379] "montreal, quebec, canada"
## [380] "seattle"
## [381] "mississippi"
## [382] "foobar"
## [383] "san francisco, ca"
## [384] "mountain view"
## [385] "stanford, ca"
## [386] "albuquerque, new mexico"
## [387] "bosnia"
## [388] "vancouver"
## [389] "mountain west"
## [390] "columbus, oh"
## [391] "sacramento, ca"
## [392] "cologne, germany"
## [393] "san francisco"
## [394] "durham, nc"
## [395] "austin, tx"
## [396] "bavaria"
## [397] "seattle"
## [398] "los angeles"
## [399] "new york"
## [400] "sunnyvale"
## [401] "london"
## [402] "manchester, nh"
## [403] "nyc"
## [404] "tacoma"
## [405] "waltham, ma"
## [406] "salt lake city"
## [407] "american fork, ut"
## [408] "bulgaria"
## [409] "radnor, pa"
## [410] "chicago"
## [411] "san francisco"
## [412] "berlin"
## [413] "menlo park"
## [414] "los angeles"
## [415] "chicago"
## [416] "san francisco"
## [417] "redmond, wa"
## [418] "new york, ny"
## [419] "berlin"
## [420] "san francisco"
## [421] "sunnyvale"
## [422] "new york city"
## [423] "seattle"
## [424] "midwest"
## [425] "issaquah"
## [426] "southern california"
## [427] "lahore, pakistan"
## [428] "beaverton, or"
## [429] "salt lake city, utah"
## [430] "ireland"
## [431] "sydney"
## [432] "new york"
## [433] "dcvamd area"
## [434] "peru"
## [435] "san francisco"
## [436] "burlington, vt"
## [437] "san francisco"
## [438] "toronto"
## [439] "seattle"
## [440] "chicago"
## [441] "phoenix, az"
## [442] "iceland"
## [443] "los angeles"
## [444] "new york"
## [445] "remote us"
## [446] "london, uk working remote"
## [447] "milwaukee"
## [448] "baltimore"
## [449] "san francisco"
## [450] "new york city"
## [451] "nyc"
## [452] "san francisco, ca"
## [453] "usa"
## [454] "newark"
## [455] "paris"
## [456] "north carolina"
## [457] "san francisco"
## [458] "brisbane, australia"
## [459] "pleasanton"
## [460] "memphis"
## [461] "sulzbach an der murr, germany"
## [462] "nijmegen"
## [463] "folsom, ca"
## [464] "boulder, co"
## [465] "interior bc, canada"
## [466] "midwest"
## [467] "london"
## [468] "englewood, co"
## [469] "midwest"
## [470] "santa monica, ca"
## [471] "virginia"
## [472] "halifax"
## [473] "santa clara"
## [474] "berlin"
## [475] "mountain view, ca"
## [476] "san mateo, ca"
## [477] "bedford ma"
## [478] "south carolina"
## [479] "mtv"
## [480] "san francisco, ca"
## [481] "san juan, pr"
## [482] "seattle"
## [483] "minneapolis, mn"
## [484] "seattle"
## [485] "uk"
## [486] "boston, ma"
## [487] "canada"
## [488] "salt lake city, ut"
## [489] "sacramento, ca"
## [490] "adelaide, australia"
## [491] "denver, co"
## [492] "sweden"
## [493] "atlanta"
## [494] "mountain view"
## [495] "alexandria, va"
## [496] "plano tx"
## [497] "san diego"
## [498] "tampa, fl"
## [499] "san francisco"
## [500] "san francisco"
## [501] "san francisco"
## [502] "redmond, wa"
## [503] "austin, tx"
## [504] "washington, dc"
## [505] "south of france"
## [506] "springsfield, co"
## [507] "south west uk"
## [508] "palo alto"
## [509] "vancouver"
## [510] "salt lake city, ut"
## [511] "arlington, va"
## [512] "washington dc"
## [513] "france lyon"
## [514] "new york, ny"
## [515] "toulouse, france"
## [516] "seattle, wa"
## [517] "mountain view, ca"
## [518] "redmond, wa"
## [519] "boston, ma"
## [520] "san francisco"
## [521] "australia"
## [522] "sunnyvale"
## [523] "did"
## [524] "austin"
## [525] "seattle"
## [526] "belgium"
## [527] "sf bay area"
## [528] "san francisco, ca"
## [529] "verona, wisconsin"
## [530] "hursley, uk"
## [531] "menlo park"
## [532] "toulouse fr"
## [533] "maryland"
## [534] "portland, or"
## [535] "redmond, wa"
## [536] "redmond, wa"
## [537] "omaha, ne"
## [538] "ms"
## [539] "louisville, co"
## [540] "brisbane, australia"
## [541] "ottawa, canada"
## [542] ""
## [543] "germany"
## [544] "clujnapoca, romania"
## [545] "san francisco"
## [546] "los angeles"
## [547] "toronto, on"
## [548] "bay area"
## [549] "istanbul"
## [550] "kingsport, tn"
## [551] "nyc"
## [552] "denver, co"
## [553] "redmond, wa"
## [554] "austin"
## [555] "zurich, switzerland"
## [556] "san ramon"
## [557] "austin, tx"
## [558] "london"
## [559] "lowell, ma"
## [560] "minnesota"
## [561] "champaign, il"
## [562] "mountain view, ca"
## [563] "chicago"
## [564] "maryland"
## [565] "salt lake city"
## [566] "portland, or"
## [567] "remote midwest"
## [568] "indianapolis, in"
## [569] "clujnapoca"
## [570] "london, uk"
## [571] "san francisco"
## [572] "irvine, ca"
## [573] "san francisco"
## [574] "new york city"
## [575] "eindhoven, netherlands"
## [576] "san diego"
## [577] "clujnapoca"
## [578] "hungary"
## [579] "cupertino, ca"
## [580] "remote"
## [581] "new york"
## [582] "houston"
## [583] "san diego"
## [584] "oslo"
## [585] "seattle"
## [586] "costa mesa, california"
## [587] "aaaa"
## [588] "fairfax, va"
## [589] "frankfurt, germany"
## [590] "seattle"
## [591] "salamanca, spain"
## [592] "colorado"
## [593] "honolulu"
## [594] "sunnyvale"
## [595] "philadelphia, pa"
## [596] "clujnapoca"
## [597] "los angles"
## [598] "cambridge"
## [599] "new york"
## [600] "helsinki, finland"
## [601] "melbourne, australia"
## [602] "brooklyn"
## [603] "los angeles"
## [604] "west yorkshire, uk"
## [605] "india"
## [606] "las juntas"
## [607] "provo"
## [608] "clujnapoca"
## [609] "berkeley"
## [610] "san francisco"
## [611] "dayton, oh"
## [612] "san jose"
## [613] "germany"
## [614] "menlo park"
## [615] "bucharest"
## [616] "silicon valley"
## [617] "california"
## [618] "new york city"
## [619] "nsw"
## [620] "raleigh"
## [621] "atlanta"
## [622] "san francisco"
## [623] "brazil"
## [624] "tel aviv, israel"
## [625] "seattle"
## [626] "mumbai"
## [627] "warsaw, poland"
## [628] "san diego"
## [629] "seattle, wa"
## [630] "san francisco"
## [631] "zìrich"
## [632] "reading, uk"
## [633] "melbourne, australia"
## [634] "denver, co"
## [635] "san francisco"
## [636] "new york, ny"
## [637] "uk"
## [638] "venezuela"
## [639] "dfw"
## [640] "buch"
## [641] "st. george, ut"
## [642] "new york, ny"
## [643] "baltimore, md"
## [644] "lisbon, portugal"
## [645] "san francisco"
## [646] "new york"
## [647] "a"
## [648] "bay area"
## [649] "new york"
## [650] "new york city"
## [651] "new york city"
## [652] "new york, ny"
## [653] "san francisco, ca"
## [654] "los angeles"
## [655] "palo alto, ca"
## [656] "new zealand"
## [657] "centennial, co"
## [658] "new york city"
## [659] "santa clara"
## [660] "san fransico"
## [661] "poland"
## [662] "seattle"
## [663] "nyc"
## [664] "amsterdam"
## [665] "greater los angeles area"
## [666] "san francisco"
## [667] "portland"
## [668] "austin"
## [669] "st. paul"
## [670] "berlin, germany"
## [671] "los angeles"
## [672] "austin tx"
## [673] "sao paulo"
## [674] "usa"
## [675] "buenos aires"
## [676] "redwood city"
## [677] "redmond"
## [678] "edegem"
## [679] "new york, ny"
## [680] "boston"
## [681] "india"
## [682] "remote"
## [683] "london"
## [684] "new york"
## [685] "cambridge, ma"
## [686] "cambridge"
## [687] "seattle, wa"
## [688] "huntsville, al"
## [689] "carson city, nv"
## [690] "uk"
## [691] "seattle"
## [692] "folsom, ca"
## [693] "stockholm"
## [694] "cupertino"
## [695] "new york"
## [696] "roseville, ca"
## [697] "oakland"
## [698] "belgium"
## [699] "folsom, ca"
## [700] "livermore, ca"
## [701] "sf"
## [702] "mtv"
## [703] "palo alto"
## [704] "austin, tx"
## [705] "chicago il"
## [706] "england manchester"
## [707] "copenhagen, denmark"
## [708] "kirkland wa usa"
## [709] "los angeles"
## [710] "austin"
## [711] "kitchener, canada"
## [712] "pittsburgh, pa"
## [713] "sydney, australia"
## [714] "kirkland, wa"
## [715] "ireland"
## [716] "olathe, ks"
## [717] "remote"
## [718] "luxembourg, luxembourg"
## [719] "washington dc"
## [720] "sweden"
## [721] "san francisco"
## [722] "los angeles"
## [723] "greenville, sc"
## [724] "seattle, wa"
## [725] "san franciaco"
## [726] "brighton, uk"
## [727] "berlin"
## [728] "chicago, il"
## [729] "redwood city, ca"
## [730] "sweden"
## [731] "palo alto"
## [732] "san francisco"
## [733] "remote"
## [734] "austin"
## [735] "palo alto"
## [736] "amsterdam"
## [737] "easton, pa"
## [738] "frankfurt"
## [739] "mountain view, ca"
## [740] "chicago"
## [741] "london"
## [742] "san francisco"
## [743] "menlo park"
## [744] "san francisco"
## [745] "maryland"
## [746] "orange county, ca"
## [747] "seattle"
## [748] "new york city"
## [749] "london"
## [750] "boulder"
## [751] "nyc"
## [752] "bc, canada"
## [753] "dallas"
## [754] "menlo park"
## [755] "seattle, wa"
## [756] "mill valley, ca"
## [757] "san francisco, ca"
## [758] "austin, tx"
## [759] "baltimore, md"
## [760] "redmond"
## [761] "san jose"
## [762] "portland, or"
## [763] "mìnchen"
## [764] "zurich"
## [765] "san francisco"
## [766] "toronto, on"
## [767] "toronto, on"
## [768] "madrid"
## [769] "portland"
## [770] "seattle, wa"
## [771] "bay area"
## [772] "san jose, ca"
## [773] "pleasanton"
## [774] "san francisco"
## [775] "budapest, hungary"
## [776] "new york city"
## [777] "vancouver"
## [778] "san francisco"
## [779] "chicago"
## [780] "san francisco"
## [781] "scotland"
## [782] "mountain view"
## [783] "sfo"
## [784] "pasadena, ca"
## [785] "san francisco"
## [786] "mexico"
## [787] "redwood city"
## [788] "austin, tx"
## [789] "seattle"
## [790] "ur moms house"
## [791] "dc"
## [792] "san francisco"
## [793] "location"
## [794] "san francisco"
## [795] "berkeley"
## [796] "nyc"
## [797] "london"
## [798] "columbus, oh"
## [799] "austin tx"
## [800] "edmonton, ab"
## [801] "denver"
## [802] "reno nevafa"
## [803] "bay area"
## [804] "cupertino"
## [805] "austin"
## [806] "washington, dc"
## [807] "ann arbor, mi"
## [808] "rennes, fr"
## [809] "washington d.c."
## [810] "warren, mi"
## [811] "seattle, wa"
## [812] "chicago"
## [813] "germany"
## [814] "zurich"
## [815] "boston"
## [816] "san francisco"
## [817] "tampa, fl"
## [818] "redmond"
## [819] "london"
## [820] "berlin, germany"
## [821] "baltimore, md"
## [822] "london, uk"
## [823] "seattle"
## [824] "boston, ma"
## [825] "san jose, ca"
## [826] "nj"
## [827] "san francisco"
## [828] "boulder, co"
## [829] "madison, wi"
## [830] "san francisco"
## [831] "new york"
## [832] "sf"
## [833] "virginiawashington area"
## [834] "seattle"
## [835] "ireland"
## [836] "seattle"
## [837] "san francisco"
## [838] "palo alto"
## [839] "redwood city, ca"
## [840] "chandler, az"
## [841] "moscow"
## [842] "seattle"
## [843] "seattle"
## [844] "tarzana, ca"
## [845] "irvine, ca"
## [846] "santa barbara"
## [847] "spain"
## [848] "san francisco"
## [849] "california"
## [850] "san francisco"
## [851] "toronto, canada"
## [852] "redwood city"
## [853] "chicago"
## [854] "san jose"
## [855] "san francisco"
## [856] "oxford, uk"
## [857] "london"
## [858] "sf"
## [859] "san francisco"
## [860] "norway"
## [861] "warwickshire"
## [862] "san francisco"
## [863] "uk"
## [864] "san francisco"
## [865] "redmond, washington"
## [866] "redwood city"
## [867] "princeton, nj"
## [868] "seattle"
## [869] "copenhagen, denmark"
## [870] "columbia mo"
## [871] "belgrade"
## [872] "seattle, wa"
## [873] "london"
## [874] "boston"
## [875] "nyc"
## [876] "san francisco"
## [877] "palo alto"
## [878] "redmond"
## [879] "spain"
## [880] "austin"
## [881] "san francisco"
## [882] "redwood city, ca"
## [883] "uk, outside london"
## [884] "vancouver"
## [885] "melbourne, australia"
## [886] "uk"
## [887] "san francisco"
## [888] "not sf"
## [889] "seattle"
## [890] "lyon, france"
## [891] "vancouver, bc"
## [892] "new hampshire"
## [893] "ottawa, canada"
## [894] "mountain view, ca"
## [895] "santa clara"
## [896] "mountain view, california"
## [897] "mountain view"
## [898] "canada"
## [899] "austin, tx"
## [900] "san francisco"
## [901] "san diego"
## [902] "remote"
## [903] "bournemouth, uk"
## [904] "glendale, ca"
## [905] "san diego"
## [906] "michigan"
## [907] "washington, dc"
## [908] "redmond, wa"
## [909] "atlanta, ga"
## [910] "chicago"
## [911] "new york city"
## [912] "san francisco"
## [913] "houston, tx"
## [914] "austin, tx"
## [915] "san jose, ca"
## [916] "germany"
## [917] "san francisco"
## [918] "seattle"
## [919] "toronto, on"
## [920] "toronto"
## [921] "new york"
## [922] "pittsburgh, pa"
## [923] "seattle, washington"
## [924] "london, uk"
## [925] "sunnyvale, ca"
## [926] "boston, ma"
## [927] "cambridge ma"
## [928] "san jose"
## [929] "atlanta, ga"
## [930] "menlo park"
## [931] "poland"
## [932] "davis"
## [933] "frankfurt am main, germany"
## [934] "new york, ny"
## [935] "san francisco"
## [936] "oakville, on., canada"
## [937] "limerick, ireland"
## [938] "toronto"
## [939] "chicago"
## [940] "toronto"
## [941] "salt lake city, ut"
## [942] "los angeles, ca"
## [943] "redmond, wa"
## [944] "new york"
## [945] "verona, wi"
## [946] "san francisco"
## [947] "nyc"
## [948] "pittsburgh"
## [949] "new york"
## [950] "new york"
## [951] "san francisco"
## [952] "los angeles, ca"
## [953] "seattle"
## [954] "cambridge, ma"
## [955] "sam diego"
## [956] "irvine, ca"
## [957] "new york"
## [958] "cupertino ca"
## [959] "needham, massachusetts"
## [960] "seattle"
## [961] "seattle"
## [962] "san francisco"
## [963] "boston, ma"
## [964] "redmond"
## [965] "burnaby bc"
## [966] "mountain view"
## [967] "san francisco"
## [968] "londnon"
## [969] "cupertino"
## [970] "san fransisco"
## [971] "columbus, oh"
## [972] "puerto rico"
## [973] "sunnyvale, california"
## [974] "remote"
## [975] "adelaide, au"
## [976] "dallas, tx"
## [977] "seattle"
## [978] "bay area"
## [979] "san francisco, ca"
## [980] "nyc"
## [981] "nebraska"
## [982] "austin, texas"
## [983] "cambridge, ma"
## [984] "kuala lumpur"
## [985] "atlanta"
## [986] "san francisco"
## [987] "zìrich"
## [988] "iowa"
## [989] "mountain view, ca"
## [990] "seattle"
## [991] "bay area"
## [992] "portland, or"
## [993] "san francisco"
## [994] "milwaukee"
## [995] "redmond"
## [996] "il"
## [997] "dc"
## [998] "orange county"
## [999] "nyc"
## [1000] "guadalajara"
## [1001] "new york, ny"
## [1002] "nashville"
## [1003] "redmond"
## [1004] "australia"
## [1005] "vancouver"
## [1006] "new york city"
## [1007] "baltimore"
## [1008] "dc area"
## [1009] "philippines"
## [1010] "minneapolis"
## [1011] "boston"
## [1012] "phoenix az"
## [1013] "argentina"
## [1014] "vancouver bc"
## [1015] "brooklyn"
## [1016] "vi\u0087èàt nam"
## [1017] "mountain view"
## [1018] "remote"
## [1019] "kansas city"
## [1020] "madrid, spain"
## [1021] "bangalore"
## [1022] "new york city"
## [1023] "redmond, wa"
## [1024] "los angeles"
## [1025] "scottsdale, az"
## [1026] "new zealand"
## [1027] "st petersburg, fl"
## [1028] "seattle"
## [1029] "boston"
## [1030] "mountain view, ca"
## [1031] "sf"
## [1032] "redmond"
## [1033] "houston, tx"
## [1034] "colorado"
## [1035] "cupertino"
## [1036] "bangalore"
## [1037] "atlanta, ga"
## [1038] "washington, dc"
## [1039] "campo grande, mato grosso do sul, brazil"
## [1040] "orlando"
## [1041] "cambridge ma"
## [1042] "mountain view, ca"
## [1043] "cleveland, oh"
## [1044] "singapore"
## [1045] "nyc"
## [1046] "columbia, md"
## [1047] "san jose"
## [1048] "bangalore"
## [1049] "porto, portugal"
## [1050] "seattle"
## [1051] "des moines, ia"
## [1052] "austin, tx"
## [1053] "brisbane, australia"
## [1054] "san francisco, ca"
## [1055] "remote usa"
## [1056] "lenexa, ks"
## [1057] "philadelphia"
## [1058] "mountain view"
## [1059] "toronto, canada"
## [1060] "beijing"
## [1061] "nyc"
## [1062] "new york"
## [1063] "los angeles"
## [1064] "san francisco, ca"
## [1065] "pittsburgh, pa"
## [1066] "los angeles, ca"
## [1067] "montreal"
## [1068] "new york city"
## [1069] "san jose"
## [1070] "australia"
## [1071] "sydney, au"
## [1072] "menlo park, ca"
## [1073] "sydney"
## [1074] "atlanta, ga"
## [1075] "new york, new york"
## [1076] "salt lake city, ut"
## [1077] "seattle"
## [1078] "seattle"
## [1079] "ann arbor, mi"
## [1080] "remote"
## [1081] "seattle"
## [1082] "new york"
## [1083] "mountain view"
## [1084] "san jose, ca"
## [1085] "new york"
## [1086] "san francisco, ca"
## [1087] "singapore"
## [1088] "shanghai"
## [1089] "palo alto california"
## [1090] "boston ma"
## [1091] "kansas"
## [1092] "redmond"
## [1093] "mclean, va"
## [1094] "denver, co"
## [1095] "argentina"
## [1096] "vancouver, bc"
## [1097] "los angeles"
## [1098] "shanghai, china"
## [1099] "madison, wi"
## [1100] "redmond, washington"
## [1101] "san francisco"
## [1102] "melbourne"
## [1103] "seattle, wa"
## [1104] "washington dc"
## [1105] "vancouver"
## [1106] "san francisco, ca"
## [1107] "rochester"
## [1108] "san francisco"
## [1109] "montreal, qc, canada"
## [1110] "washington dc"
## [1111] "seattle"
## [1112] "bangalore"
## [1113] "los angeles"
## [1114] "madison, wi"
## [1115] "new york, ny"
## [1116] "new zealand"
## [1117] "nyc"
## [1118] "grand prairie, tx"
## [1119] "norwalk, ct"
## [1120] "vancouver"
## [1121] "nyc"
## [1122] "edmonton, alberta, canada"
## [1123] "singapore"
## [1124] "austin"
## [1125] "canada"
## [1126] "new york"
## [1127] "chicago, il"
## [1128] "san jose"
## [1129] "ohio"
## [1130] "rio de janeiro"
## [1131] "lancaster, pa"
## [1132] "seattle, wa"
## [1133] "toronto"
## [1134] "washington dc"
## [1135] "mountain view"
## [1136] "jakarta"
## [1137] "foster city, ca"
## [1138] "knoxviile, tn"
## [1139] "oregon"
## [1140] "new york, ny"
## [1141] "anacortes, wa"
## [1142] "laurel, md, usa"
## [1143] "chicago"
## [1144] "williamsburg"
## [1145] "australia"
## [1146] "riverside"
## [1147] "minneapolis"
## [1148] "oakland, ca"
## [1149] "san francisco"
## [1150] "san francisco"
## [1151] "minneapolis, minnesota"
## [1152] "los angeles"
## [1153] "nyc"
## [1154] "pune"
## [1155] "portland, or"
## [1156] "sydney, australia"
## [1157] "falls church, va, usa"
## [1158] "falls church, va, usa"
## [1159] "mountain view"
## [1160] "columbus, ohio"
## [1161] "omaha"
## [1162] "sf"
## [1163] "san francisco"
## [1164] "nyc"
## [1165] "redmond"
## [1166] "norfolk"
## [1167] "bellingham, wa"
## [1168] "san francisco"
## [1169] "los angeles, ca"
## [1170] "san francisco"
## [1171] "san francisco"
## [1172] "denver"
## [1173] "corvallis, or"
## [1174] "seattle"
## [1175] "redwood shores"
## [1176] "boulder, colorado"
## [1177] "india"
## [1178] "kansas city, mo"
## [1179] "cupertino, ca"
## [1180] "hyderabad, india"
## [1181] "sf"
## [1182] "redmond, wa"
## [1183] "foster city"
## [1184] "washington dc"
## [1185] "auckland"
## [1186] "southern california"
## [1187] "hanoi, vietnam"
## [1188] "new york"
## [1189] "bloomington, il"
## [1190] "new york"
## [1191] "nyc"
## [1192] "jakarta"
## [1193] "detroit, mi"
## [1194] "san francisco"
## [1195] "alameda"
## [1196] "san francisco"
## [1197] "san francisco"
## [1198] "hillsboro, oregon"
## [1199] "houston"
## [1200] "seattle"
## [1201] "minneapolis mn"
## [1202] "chicago, il"
## [1203] "chicago, il"
## [1204] "greater boston area"
## [1205] "silicon valley, ca"
## [1206] "boston, massachusetts"
## [1207] "bangalore"
## [1208] "irvine"
## [1209] "mountain view"
## [1210] "las vegas"
## [1211] "redmond, wa"
## [1212] "new york"
## [1213] "nyc"
## [1214] "mountain view"
## [1215] "redmond, wa, usa"
## [1216] "redmond, wa"
## [1217] "san bruno"
## [1218] "sydney"
## [1219] "mtv"
## [1220] "new york"
## [1221] "seattle"
## [1222] "bay area"
## [1223] "san francisco"
## [1224] "pune, india"
## [1225] "korea"
## [1226] "palo alto"
## [1227] "san francisco"
## [1228] "nyc"
## [1229] "san diego"
## [1230] "san francisco"
## [1231] "seattle"
## [1232] "new york"
## [1233] "austin, tx"
## [1234] "san francisco"
## [1235] "london"
## [1236] "cyprus"
## [1237] "remote/everywhere/usa"
## [1238] ""
## [1239] "san francisco, ca"
## [1240] "cyprus"
## [1241] "boston"
## [1242] "bellevue wa"
## [1243] "nyc"
## [1244] "san francisco"
## [1245] "mountain view"
## [1246] "redwood"
## [1247] "pittsburgh, pa"
## [1248] "seattle"
## [1249] "mumbai"
## [1250] "silicon valley"
## [1251] "mumbai, india"
## [1252] "mountain view"
## [1253] "seattle"
## [1254] "los gatos, ca"
## [1255] "mountain view, ca"
## [1256] "washington d.c."
## [1257] "new york city"
## [1258] "shanghai"
## [1259] "san bruno"
## [1260] "auckland, new zealand"
## [1261] "san francisco, ca"
## [1262] "everett"
## [1263] "los gatos"
## [1264] "ridgecrest"
## [1265] "chennai"
## [1266] "bangalore, india"
## [1267] "dallas texas usa"
## [1268] "sf"
## [1269] "san francisco"
## [1270] "sunnyvale"
## [1271] "russia, spb"
## [1272] "bangalore, india"
## [1273] "singapore"
## [1274] "bangalore india"
## [1275] "bangalore, india"
## [1276] "san francisco"
## [1277] "mountain view"
## [1278] "seattle"
## [1279] "seattle"
## [1280] "san francisco"
## [1281] "san jose"
## [1282] "san francisco"
## [1283] "singapore"
## [1284] "sunnyvale"
## [1285] "redwood city, ca"
## [1286] "san francisco"
## [1287] "pune, india"
## [1288] "california"
## [1289] "boulder"
## [1290] "san francisco"
## [1291] "asia"
## [1292] "san francisco"
## [1293] "livermore"
## [1294] "not a coast"
## [1295] "mountain view"
## [1296] "sunnyvale"
## [1297] "france"
## [1298] "denver"
## [1299] "sacramento, ca"
## [1300] "germany"
## [1301] "san francisco"
## [1302] "sunnyvale"
## [1303] "mountain view"
## [1304] "\u008aü¾µá"
## [1305] "sf"
## [1306] "cupertino"
## [1307] "livermore, california"
## [1308] "san francisco"
## [1309] "mountain view"
## [1310] "seattle, wa"
## [1311] "boston"
## [1312] "san francisco, ca"
## [1313] "manila, philippines"
## [1314] "san ramon, ca"
## [1315] "seattle, wa"
## [1316] "athens, greece"
## [1317] "san jose"
## [1318] "karachi, pakistan"
## [1319] "menlo park"
## [1320] "secret"
## [1321] "san jose"
## [1322] "eindhoven, the netherlands"
## [1323] "seattle"
## [1324] "here"
## [1325] "chennai"
## [1326] "abu dhabi"
## [1327] "sunnyvale, ca"
## [1328] "kenya"
## [1329] "chennai"
## [1330] "bern"
## [1331] "hanoi, vietnam"
## [1332] "mountain view"
## [1333] "singapore"
## [1334] "dubin"
## [1335] "san francisco"
## [1336] "cupertino"
## [1337] "san francisco"
## [1338] "richmond, surrey"
## [1339] "palo alto"
## [1340] "location"
## [1341] "seattle, wa"
## [1342] "nairobi"
## [1343] "chennai"
## [1344] "netherlands"
## [1345] "sf bay area"
## [1346] "pleasanton, ca"
## [1347] "geneva, switzerland"
## [1348] "berlin, germany"
## [1349] "san francisco"
## [1350] "san francisco, ca"
## [1351] "glasgow"
## [1352] "dhaka, bangladesh"
## [1353] "spain"
## [1354] "berlin"
## [1355] "london"
## [1356] "london"
## [1357] "melbourne, australia"
## [1358] "san francisco"
## [1359] "singapore"
## [1360] "chisinau, moldova"
## [1361] "bangalore"
## [1362] "hong kong"
## [1363] "bonn"
## [1364] "remote"
## [1365] "stockholm, sweden"
## [1366] "vietnam"
## [1367] "mountain view, ca"
## [1368] "berlin"
## [1369] "michigan"
## [1370] "ssf"
## [1371] "germany, brunswick"
## [1372] "portland, or"
## [1373] "chicago, il"
## [1374] "sydney"
## [1375] "morocco"
## [1376] "constanta, romania"
## [1377] "the netherlands"
## [1378] "amsterdam"
## [1379] "sydney"
## [1380] "italy"
## [1381] "turin, italy"
## [1382] "menlo park, ca"
## [1383] "london"
## [1384] "manchester, uk"
## [1385] "israel"
## [1386] "casablanca, morocco"
## [1387] "london, uk"
## [1388] "barcelona, spain"
## [1389] "berlin"
## [1390] "copenhagen, denmark"
## [1391] "remote"
## [1392] "moscow, russia"
## [1393] "pa"
## [1394] "singapore"
## [1395] "kiev"
## [1396] "germany"
## [1397] "clujnapoca, romania"
## [1398] "munich, germany"
## [1399] "dalas"
## [1400] "athens, greece remote"
## [1401] "greenville, sc"
## [1402] "london"
## [1403] "ireland"
## [1404] "south africa"
## [1405] "switzerland"
## [1406] "london"
## [1407] "seattle, wa"
## [1408] "istanbul"
## [1409] "dc"
## [1410] "melbourne"
## [1411] "arbedo"
## [1412] "brasil"
## [1413] "milan, italy"
## [1414] "cramlington, uk"
## [1415] "denmark"
## [1416] "wroclaw"
## [1417] "uk"
## [1418] "amsterdam"
## [1419] "paris"
## [1420] "edinburgh"
## [1421] "strasbourg"
## [1422] "clujnapoca, romania"
## [1423] "ghgh"
## [1424] "asdf"
## [1425] "bali"
## [1426] "prague"
## [1427] "didu"
## [1428] "stockholm, sweden"
## [1429] "brussels"
## [1430] "london"
## [1431] "reading, uk"
## [1432] "lund, sweden"
## [1433] "cape town, south africa"
## [1434] "west yorkshire, uk"
## [1435] "paris"
## [1436] "new haven, ct, usa"
## [1437] "san francisco"
## [1438] "dublin"
## [1439] "frankfurt"
## [1440] "toronto"
## [1441] "noida"
## [1442] "milwaukee, wi"
## [1443] "london"
## [1444] "atlanta"
## [1445] "france"
## [1446] "manchester, uk"
## [1447] "bermuda"
## [1448] "chattanooga, tn"
## [1449] "bonn"
## [1450] "nw"
## [1451] "italy"
## [1452] "switzerland zurich"
## [1453] "france"
## [1454] "boca raton, fl"
## [1455] "hatfield, pa"
## [1456] "richfield mn"
## [1457] "india"
## [1458] "mumbai"
## [1459] "san francisco"
## [1460] "boston"
## [1461] "paris, france"
## [1462] "toronto"
## [1463] "montvale nj"
## [1464] "indianapolis in"
## [1465] "chicago"
## [1466] "madrid, spain"
## [1467] "germany"
## [1468] "milan"
## [1469] "munich, germany"
## [1470] "moscow, russia"
## [1471] "grand rapids, mi"
## [1472] "krakow"
## [1473] "hamburg"
## [1474] "toronto"
## [1475] "new york"
## [1476] "kentucky"
## [1477] "grenoble, france"
## [1478] "slovenia"
## [1479] "london, uk"
## [1480] "montreal"
## [1481] "poughkeepsie"
## [1482] "pittsburgh, pa"
## [1483] "jblm, wa"
## [1484] "pune"
## [1485] "singapore"
## [1486] "milan"
## [1487] "mountain view"
## [1488] "boston, ma"
## [1489] "london"
## [1490] "austin, tx"
## [1491] "cambridge, ma"
## [1492] "bristol, uk"
## [1493] "sweden"
## [1494] "argentina"
## [1495] "nyc"
## [1496] "boston"
## [1497] "atlanta"
## [1498] "berlin"
## [1499] "clujnapoca, romania"
## [1500] "sao paulo"
## [1501] "austin, tx"
## [1502] "winnipeg, mb canada"
## [1503] "nyc"
## [1504] "durham, nc"
## [1505] "strasbourg"
## [1506] "amsterdam"
## [1507] "new york city"
## [1508] "paris"
## [1509] "dc metro area"
## [1510] "dallas, tx"
## [1511] "remote"
## [1512] "melbourne"
## [1513] "phoenix, az"
## [1514] "new york city"
## [1515] "jì¦nkì¦ping, sweden"
## [1516] "munich"
## [1517] "durham, nc"
## [1518] "leicester"
## [1519] "vermont"
## [1520] "exeter, uk"
## [1521] "columbus, oh"
## [1522] "oklahoma city"
## [1523] "menlo park"
## [1524] "uk"
## [1525] "kansas city, mo"
## [1526] "remote usa"
## [1527] "orlando"
## [1528] "irving, tx"
## [1529] "minneapolis"
## [1530] "london"
## [1531] "st. louis"
## [1532] "moscow"
## [1533] "st petersburg, fl"
## [1534] "minneapolis"
## [1535] "nj, usa"
## [1536] "minneapolis, mn"
## [1537] "wallops island, virginia"
## [1538] "atlanta"
## [1539] "minneapolis, mn"
## [1540] "buenos aires"
## [1541] "london"
## [1542] "geneva"
## [1543] "seattle, wa"
## [1544] "montreal, quebec, canada"
## [1545] "sunnyvale"
## [1546] "santa clara, cuba"
## [1547] "philadelphia, pa"
## [1548] "ann arbor, michigan"
## [1549] "aveiro, portugal"
## [1550] "london"
## [1551] "sunnyvale"
## [1552] "serbia"
## [1553] "new york, ny"
## [1554] "glasgow, uk"
## [1555] "netherlands"
## [1556] "asdf"
## [1557] "san francisco"
## [1558] "portland, or"
## [1559] "cambridge, ma"
## [1560] "rolla, mo"
## [1561] "shillington pa"
## [1562] "british columbia"
## [1563] "menlo park"
## [1564] "madison, wi"
## [1565] "taiwan"
## [1566] "augusta, me"
## [1567] "san francisco, ca"
## [1568] "baltimore"
## [1569] "radford, va"
## [1570] "redmond"
## [1571] "san francisco"
## [1572] "mountain view"
## [1573] "san francisco"
## [1574] "seattle, wa"
## [1575] "aveiro, portugal"
## [1576] "palo alto, ca"
## [1577] "culver city, ca"
## [1578] "mountain view"
## [1579] "usa"
## [1580] "menlo park, ca"
## [1581] "sunnyvale"
## [1582] "philadelphia, pa"
## [1583] "san jose"
## [1584] "san francisco"
## [1585] "greer"
## [1586] "greer"
## [1587] "new york"
## [1588] "cupertino"
## [1589] "chicago, il"
## [1590] "sewttle"
## [1591] "pisa, italy"
## [1592] "nj, minutes from nyc"
## [1593] "baltimore, md"
## [1594] "bangalore"
## [1595] "fort lauderdale"
## [1596] "\u008eç÷\u008eýã"
## [1597] "eire"
## [1598] "berlin, germany"
## [1599] "frankfurt, germany"
## [1600] "austin, tx"
## [1601] "netherlands"
## [1602] "bellevue, wa, usa"
## [1603] "ireland"
## [1604] "ireland"
## [1605] "provo"
## [1606] "south san francisco, ca"
## [1607] "ireland"
## [1608] "raleigh, nc"
## [1609] "boston, ma"
## [1610] "uk"
## [1611] "boston, ma"
## [1612] "ottawa, canada"
## [1613] "madrid es"
## [1614] "product engineer"
## [1615] "sf"
## [1616] "san jose"
## [1617] "san jose"
## [1618] "seattle"
## [1619] "sydney, australia"
## [1620] "san francisco, ca"
## [1621] "bangalore,india"
## [1622] "seattle, wa"
## [1623] "redmond, wa"
## [1624] "san francisco"
## [1625] "beijing"
## [1626] "toronto"
## [1627] "san francisco"
## [1628] "buffalo"
## [1629] "san francisco"
## [1630] "sf"
## [1631] "san jose"
## [1632] "kitchener, on"
## [1633] "boulder, co"
## [1634] "los angeles ca"
## [1635] "menlo park"
## [1636] "zurich"
## [1637] "poland, warsaw"
## [1638] "vienna"
## [1639] "pune, india"
## [1640] "\u0093ãï\u0093ü"
## [1641] "\u0093\u009dã\u0093¤û\u0091áï"
## [1642] "atlanta, ga"
## [1643] "\u0092î\u0090\u0090µ\u0090"
## [1644] "khi"
## [1645] "cambridge, ma"
## [1646] "\u0091ã´\u0093ã"
## [1647] "exton, pa"
## [1648] "philadelphia, pa"
## [1649] "menlo park"
## [1650] "beaverton, or"
## [1651] "boulder, co"
## [1652] "europe"
## [1653] "nyc"
## [1654] "college park, md"
## [1655] "sunnyvale"
df_lat_long_sal <- select(df,location_latitude,location_longitude,annual_base_pay) %>%
na.omit()
df_lat_long_sal
library(ggmap)
## ℹ Google's Terms of Service: <]8;;https://mapsplatform.google.comhttps://mapsplatform.google.com]8;;>
## ℹ Please cite ggmap if you use it! Use `citation("ggmap")` for details.
# Get the map tile for the area of interest
map_tile <- get_stamenmap(location = c(lon = mean(df_lat_long_sal$location_longitude), lat = mean(df_lat_long_sal$location_latitude)),
zoom = 10)#, source = "stamen")#maptype = "terrain",
## Warning: `location` is not a valid argument to
## `]8;;ide:help:ggmap::get_stamenmapggmap::get_stamenmap]8;;()`; it is ignored.
## ℹ Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
# Plot the map tile using ggmap
ggmap(map_tile) +
geom_point(aes(x = location_longitude, y = location_latitude, color = annual_base_pay), data = df_lat_long_sal)
## Warning: Removed 788 rows containing missing values (`geom_point()`).
x= df_lat_long_sal %>%
filter(annual_base_pay > quantile(annual_base_pay, 0.25) - 1.5 * IQR(annual_base_pay) &
annual_base_pay < quantile(annual_base_pay, 0.75) + 1.5 * IQR(annual_base_pay) )
ggplot(x,aes(annual_base_pay)) + geom_boxplot()
#Histogram of annual base pay
ggplot(df_lat_long_sal%>% filter(annual_base_pay < quantile(annual_base_pay,0.95)) ,aes(annual_base_pay)) + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
length(x)
## [1] 3
ggplot(x, aes(x=annual_base_pay)) +
geom_histogram(binwidth=5000, fill='grey',color='black') +
geom_vline(aes(xintercept = mean(annual_base_pay), color = "mean"), linetype = "dashed", size = 1) +
ggtitle("Annual base pay distribution after removing outliers" ) +
xlab("Annual Base Pay (in USD)") +
ylab("Frequency") +
theme(plot.title = element_text(hjust = 0.5))+
scale_color_manual(name = "", values = c(mean = "blue"), labels = c("Mean"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
# Get the map tile for the area of interest
map_tile <- get_stamenmap(location = c(lon = mean(x$location_longitude), lat = mean(x$location_latitude)),
zoom = 10)#, source = "stamen")#maptype = "terrain",
## Warning: `location` is not a valid argument to
## `]8;;ide:help:ggmap::get_stamenmapggmap::get_stamenmap]8;;()`; it is ignored.
## ℹ Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.
# Plot the map tile using ggmap
ggmap(map_tile) +
geom_point(aes(x = location_longitude, y = location_latitude, size = annual_base_pay), data = x)
## Warning: Removed 759 rows containing missing values (`geom_point()`).
# load leaflet package
library(leaflet)
# create the map
leaflet(data = df_lat_long_sal) %>%
addTiles() %>%
addCircleMarkers(lng = ~location_longitude, lat = ~location_latitude, color = "YlOrRd",
radius = 4 , data = df_lat_long_sal) %>%
setView(lng = mean(df_lat_long_sal$location_longitude), lat = mean(df_lat_long_sal$location_latitude), zoom = 10)
# load leaflet package
library(leaflet)
# Create a new column for salary groups
df_lat_long_sal$salary_group <- cut(df_lat_long_sal$annual_base_pay,
breaks = c(0, 50000, 75000, 100000, Inf),
labels = c("low: <50k", "medium: <75k", "high: <100k", "very high: >100k"),
include.lowest = TRUE,
right=TRUE)
# define color palette for salary groups
pal <- colorFactor(c("low" = "green", "medium" = "yellow", "high" = "orange", "very high" = "red"),
domain = df_lat_long_sal$salary_group)
#create the map
leaflet(data = df_lat_long_sal) %>%
addTiles() %>%
addCircleMarkers(lng = ~location_longitude, lat = ~location_latitude, color = pal(df_lat_long_sal$salary_group),
radius = 4 , data = df_lat_long_sal) %>%
setView(lng = mean(df_lat_long_sal$location_longitude)+40, lat = mean(df_lat_long_sal$location_latitude), zoom = 1.5)%>%
addLegend(pal = pal, values = df_lat_long_sal$salary_group, title = "Salary Group", position = "bottomright")
# Creating a data set for salary and experience
df_sal_exp <- select(df,annual_base_pay,total_experience_years,employer_experience_years,employer_name) %>%
na.omit()
df_sal_exp
# Create a violin plot of salary by experience level
library(ggplot2)
ggplot(data=df_sal_exp, aes(x=total_experience_years, y=annual_base_pay)) +
geom_violin(fill="blue") +
geom_jitter(width = 0.1, color = "black") +
ggtitle("Salary Distribution by Experience Years") +
xlab("Experience Years") +
ylab("Annual Base Pay")
library(dplyr)
# remove outliers
df_sal_exp_no_outliers <- df_sal_exp %>%
group_by(total_experience_years) %>%
filter(annual_base_pay < quantile(annual_base_pay, 0.95))
# plot violin plot
ggplot(data=df_sal_exp_no_outliers, aes(x=total_experience_years, y=annual_base_pay)) +
geom_violin(trim=FALSE, draw_quantiles = c(0.25, 0.5, 0.75))
df_sal_exp <- df_sal_exp %>%
filter(annual_base_pay < quantile(annual_base_pay, 0.95))
# Divide the total_experience_years into different groups
df_sal_exp$experience_group <- cut(df_sal_exp$total_experience_years, breaks = c(0, 5, 10, 15, 20,Inf),include.lowest = TRUE, right = TRUE, labels = c("0-5", "6-10", "11-15", "16-20",">20"))
# Create violin plots for each group of experience
ggplot(data = df_sal_exp, aes(x = experience_group, y = annual_base_pay)) +
geom_violin(aes(fill = experience_group), color = "black", trim = TRUE,draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(values = c("#0000FF", "#00FF00", "#FFFF00", "#FFA500", "#FF0000")) +
ggtitle("Salary distribution by experience group") +
xlab("Experience Group") + ylab("Annual Base Pay")
ggplot(df_sal_exp,aes(total_experience_years)) + geom_boxplot()
ggplot(df_sal_exp,aes(x=total_experience_years,y=annual_base_pay)) + geom_point() + geom_smooth(method = 'lm')
## `geom_smooth()` using formula = 'y ~ x'
If the line is nearly horizontal, it means that there is little or no
correlation between the two variables.
#Wilcoxon rank-sum test
experience_group = ifelse(df_sal_exp$total_experience_years <= 10, "less than or equal to 10 years", "greater than 10 years")
wilcox.test(df_sal_exp$annual_base_pay ~ experience_group)
##
## Wilcoxon rank sum test with continuity correction
##
## data: df_sal_exp$annual_base_pay by experience_group
## W = 209702, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#Kruskal-Wallis test
kruskal.test(df_sal_exp$annual_base_pay ~ df_sal_exp$experience_group)
##
## Kruskal-Wallis rank sum test
##
## data: df_sal_exp$annual_base_pay by df_sal_exp$experience_group
## Kruskal-Wallis chi-squared = 117.57, df = 4, p-value < 2.2e-16
The Kruskal-Wallis rank sum test is a non-parametric method for comparing the central tendency of two or more groups. It is used when the data is not normally distributed and the groups being compared have different sample sizes.
In this case, the test is being used to compare the salary distributions of different experience groups (df_sal_exp\(annual_base_pay by df_sal_exp\)experience_group). The test statistic, Kruskal-Wallis chi-squared, is calculated as 17.747, with a degree of freedom of 4. The p-value is 0.001383, which is less than 0.05. This suggests that there is a statistically significant difference in the salary distributions of the different experience groups, and that experience level does influence salary.
—– XXX —- The Wilcoxon rank sum test with continuity correction is a non-parametric test that is used to determine whether there are significant differences in the median of two groups. The test statistic is W and the p-value represents the probability of observing a W statistic as extreme or more extreme than the one observed, assuming that the null hypothesis (i.e., no significant difference in median) is true. A small p-value (typically less than 0.05) suggests that there is evidence to reject the null hypothesis and conclude that there is a significant difference in median between the two groups.
————-xxxx——— # Which kind of job gets paid well ?
df_sal_type <- select(df,annual_base_pay,employer_name,job_title_category)
df_sal_type
unique(df_sal_type$job_title_category)
## [1] "Engineering" "Software" "Other" "Web"
## [5] "Data" "Management" "Operations" "Applied Science"
x<- df_sal_type %>%
na.omit() %>%
filter(annual_base_pay < quantile(annual_base_pay,0.95))
ggplot(x,aes( y=job_title_category,x= annual_base_pay )) + geom_boxplot()
——— xxxxx —————— # Trying to use 1.5*IQR for Experience years
# remove outliers
df_sal_exp_no_outliers_n <- df_sal_exp %>%
group_by(total_experience_years)%>%
filter(annual_base_pay > quantile(annual_base_pay, 0.25) - 1.5 * IQR(annual_base_pay) &
annual_base_pay < quantile(annual_base_pay, 0.75) + 1.5 * IQR(annual_base_pay) )
# plot violin plot
ggplot(data=df_sal_exp_no_outliers_n, aes(x=total_experience_years, y=annual_base_pay)) +
geom_violin(trim=FALSE, draw_quantiles = c(0.25, 0.5, 0.75))
df_sal_exp_n <- df_sal_exp %>%
filter(annual_base_pay > quantile(annual_base_pay, 0.25) - 1.5 * IQR(annual_base_pay) &
annual_base_pay < quantile(annual_base_pay, 0.75) + 1.5 * IQR(annual_base_pay) )
# Divide the total_experience_years into different groups
df_sal_exp_n$experience_group <- cut(df_sal_exp_n$total_experience_years, breaks = c(0, 5, 10, 15, 20,Inf),include.lowest = TRUE, right = TRUE, labels = c("0-5", "6-10", "11-15", "16-20",">20"))
# Create violin plots for each group of experience
ggplot(data = df_sal_exp_n, aes(x = experience_group, y = annual_base_pay)) +
geom_violin(aes(fill = experience_group), color = "black", trim = TRUE,draw_quantiles = c(0.25, 0.5, 0.75)) +
scale_fill_manual(name = "Experience (years)",values = c("#0000FF", "#00FF00", "#FFFF00", "#FFA500", "#FF0000")) +
ggtitle("Annual Base Pay distribution by experience group") +
xlab("Experience Group") + ylab("Annual Base Pay")
#ggplot(df_sal_exp_n,aes(x=total_experience_years,y=annual_base_pay)) + geom_point() + geom_smooth(method = 'lm')
ggplot(df_sal_exp_n, aes(x=total_experience_years, y=annual_base_pay)) +
geom_point(color = "blue") +
geom_smooth(method = 'lm', color = "red", size = 1) +
ggtitle("Annual Base Pay vs Total Years of Experience") +
xlab("Total Years of Experience") + ylab("Annual Base Pay (in USD)") +
theme(plot.title = element_text(hjust = 0.5))+
scale_color_manual(name = "", values = c("blue" = "point", "red" = "linear regression line"), labels = c("Data Points", "Linear Regression Line"))
## `geom_smooth()` using formula = 'y ~ x'
#Wilcoxon rank-sum test
experience_group = ifelse(df_sal_exp_n$total_experience_years <= 10, "less than or equal to 10 years", "greater than 10 years")
wilcox.test(df_sal_exp_n$annual_base_pay ~ experience_group)
##
## Wilcoxon rank sum test with continuity correction
##
## data: df_sal_exp_n$annual_base_pay by experience_group
## W = 209702, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#Kruskal-Wallis test
kruskal.test(df_sal_exp_n$annual_base_pay ~ df_sal_exp_n$experience_group)
##
## Kruskal-Wallis rank sum test
##
## data: df_sal_exp_n$annual_base_pay by df_sal_exp_n$experience_group
## Kruskal-Wallis chi-squared = 117.57, df = 4, p-value < 2.2e-16
The Kruskal-Wallis rank sum test is a non-parametric statistical test that is used to determine whether there are significant differences in the distribution of a continuous variable among different groups.
In this case, the test is being applied to the annual_base_pay variable, which is a continuous variable, and is being grouped by experience_group. The test calculates a chi-squared statistic, which is used to determine whether there is a significant difference in the annual base pay among the different experience groups.
The df (degree of freedom) is 4, which indicates the number of groups being compared. The p-value is less than 2.2e-16 which means that there is less than a 0.0000000002% chance that the differences in annual base pay among the experience groups is due to chance. Therefore, we can reject the null hypothesis that the annual base pay is the same across all experience groups and conclude that there is a significant difference in annual base pay among the different experience groups.
ggplot(df_sal_exp_n, aes(x=total_experience_years)) +
geom_histogram(fill='orange',color='black') +
geom_vline(aes(xintercept = mean(total_experience_years), color = "mean"), linetype = "dashed", size = 1) +
ggtitle("Distribution of experience (years) of individuals after removing outliers" ) +
xlab("Total Experience (in years)") +
ylab("Frequency") +
theme(plot.title = element_text(hjust = 0.5))+
scale_color_manual(name = "", values = c(mean = "blue"), labels = c("Mean"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Load libraries
library(ggplot2)
# Plot Q-Q plot to visualize normality
ggplot(df_sal_exp_n, aes(sample = total_experience_years)) +
geom_qq() +
geom_qq_line() +
facet_wrap(~ experience_group, ncol = 2)
ggplot(df_sal_exp_n, aes(sample = annual_base_pay)) +
geom_qq() +
geom_qq_line() +
facet_wrap(~ experience_group, ncol = 2)
# Run Shapiro-Wilk test for normality
shapiro.test(df_sal_exp_n$total_experience_years)
##
## Shapiro-Wilk normality test
##
## data: df_sal_exp_n$total_experience_years
## W = 0.8465, p-value < 2.2e-16
shapiro.test(df_sal_exp_n$employer_experience_years)
##
## Shapiro-Wilk normality test
##
## data: df_sal_exp_n$employer_experience_years
## W = 0.63553, p-value < 2.2e-16
shapiro.test(df_sal_exp_n$annual_base_pay)
##
## Shapiro-Wilk normality test
##
## data: df_sal_exp_n$annual_base_pay
## W = 0.99037, p-value = 2.126e-08
ggplot(df_sal_exp_n, aes(x=employer_experience_years, y=annual_base_pay)) +
geom_point(color = "black") +
geom_smooth(method = 'lm', color = "red", size = 1) +
ggtitle("Annual Base Pay vs Experience at Current Employer ") +
xlab(" Years of Experience at Current Employer") + ylab("Annual Base Pay (in USD)") +
theme(plot.title = element_text(hjust = 0.5))+
scale_color_manual(name = "", values = c("black" = "point", "red" = "linear regression line"), labels = c("Data Points", "Linear Regression Line"))
## `geom_smooth()` using formula = 'y ~ x'
# Pearson correlation test
cor.test(df_sal_exp_n$total_experience_years, df_sal_exp_n$annual_base_pay,method = "spearman")
## Warning in cor.test.default(df_sal_exp_n$total_experience_years,
## df_sal_exp_n$annual_base_pay, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_sal_exp_n$total_experience_years and df_sal_exp_n$annual_base_pay
## S = 402076136, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.2951122
cor.test(df_sal_exp_n$employer_experience_years, df_sal_exp_n$annual_base_pay,method = "spearman")
## Warning in cor.test.default(df_sal_exp_n$employer_experience_years,
## df_sal_exp_n$annual_base_pay, : Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: df_sal_exp_n$employer_experience_years and df_sal_exp_n$annual_base_pay
## S = 512833984, p-value = 8.662e-05
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.1009404
cor.test(df_sal_exp_n$total_experience_years, df_sal_exp_n$annual_base_pay,method = "kendall")
##
## Kendall's rank correlation tau
##
## data: df_sal_exp_n$total_experience_years and df_sal_exp_n$annual_base_pay
## z = 11.495, p-value < 2.2e-16
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.20528
cor.test(df_sal_exp_n$employer_experience_years, df_sal_exp_n$annual_base_pay,method = "kendall")
##
## Kendall's rank correlation tau
##
## data: df_sal_exp_n$employer_experience_years and df_sal_exp_n$annual_base_pay
## z = 3.9715, p-value = 7.142e-05
## alternative hypothesis: true tau is not equal to 0
## sample estimates:
## tau
## 0.07337516
ggplot(df_sal_exp_n, aes(sample = annual_base_pay)) +
geom_qq() +
geom_qq_line()+
xlab("Theoritical") +
ylab("Sample")+
ggtitle("Q-Q plot for Annual Base Pay")+
theme(plot.title = element_text(hjust = 0.5))
ggplot(df_sal_exp_n, aes(sample = total_experience_years)) +
geom_qq() +
geom_qq_line()+
xlab("Theoritical") +
ylab("Sample")+
ggtitle("Q-Q plot for Total Experience (in years)")+
theme(plot.title = element_text(hjust = 0.5))
ggplot(df_sal_exp_n, aes(sample = employer_experience_years)) +
geom_qq() +
geom_qq_line() +
xlab("Theoritical") +
ylab("Sample")+
ggtitle("Q-Q plot for Experience at Current Employer (in years)")+
theme(plot.title = element_text(hjust = 0.5))
filter(df_sal_exp_n,annual_base_pay == 0)
#df_lat_long_sal$region <- ifelse(location_longitude<-100,"West",ifelse(location_longitude<-80,"Central",ifelse(location_longitude>`-70,"East","Not in USA")) )
df_lat_long_sal$region <- ifelse(df_lat_long_sal$location_latitude > 24 &
df_lat_long_sal$location_latitude < 50 &
df_lat_long_sal$location_longitude > -125 &
df_lat_long_sal$location_longitude < -65,
ifelse(df_lat_long_sal$location_longitude < -110, "West USA",
ifelse(df_lat_long_sal$location_longitude > -90, "East USA", "Central USA")),
"Outside USA")
df_lat_long_sal
library(leaflet)
# define color palette for regions
pal <- colorFactor(c("West USA" = "green", "Central USA" = "yellow","Outside USA" ="grey", "East USA" = "orange"),
domain = df_lat_long_sal$region)
#create the map
leaflet(data = df_lat_long_sal) %>%
addTiles() %>%
addCircleMarkers(lng = ~location_longitude, lat = ~location_latitude, color = pal(df_lat_long_sal$region),
radius = 4 , data = df_lat_long_sal) %>%
setView(lng = mean(df_lat_long_sal$location_longitude)+40, lat = mean(df_lat_long_sal$location_latitude), zoom = 1.5)%>%
addLegend(pal = pal, values = df_lat_long_sal$region, title = "Region", position = "bottomright")
# Check if each of them are normally distributed
library(ggplot2)
ggplot(data = df_lat_long_sal %>%
filter(region != "Outside USA"), aes(sample = annual_base_pay)) +
geom_qq() +
geom_qq_line() +
facet_wrap(~region, ncol = 1) +
ggtitle("Q-Q Plot of Annual Base Pay by Region") +
xlab("Theoretical Quantiles") +
ylab("Sample Quantiles")+
theme(plot.title = element_text(hjust = 0.5))
` #Normality Tests
df_west_usa <- df_lat_long_sal %>%
filter(region == "West USA") %>%
select(annual_base_pay)
df_east_usa <- df_lat_long_sal %>%
filter(region == "East USA") %>%
select(annual_base_pay)
df_central_usa <- df_lat_long_sal %>%
filter(region == "Central USA") %>%
select(annual_base_pay)
shapiro.test(df_west_usa$annual_base_pay)
##
## Shapiro-Wilk normality test
##
## data: df_west_usa$annual_base_pay
## W = 0.077868, p-value < 2.2e-16
#length(df_west_usa$annual_base_pay)
shapiro.test(df_west_usa$annual_base_pay)
##
## Shapiro-Wilk normality test
##
## data: df_west_usa$annual_base_pay
## W = 0.077868, p-value < 2.2e-16
#length(df_east_usa$annual_base_pay)
shapiro.test(df_west_usa$annual_base_pay)
##
## Shapiro-Wilk normality test
##
## data: df_west_usa$annual_base_pay
## W = 0.077868, p-value < 2.2e-16
#length(df_central_usa$annual_base_pay)
library(dplyr)
library(ggplot2)
df_lat_long_sal_sum <- df_lat_long_sal %>%
group_by(region) %>%
filter(region != "Outside USA")%>%
summarise(
mean = mean(annual_base_pay),
se = sd(annual_base_pay)/sqrt(n()),
)
#df_lat_long_sal_sum
ggplot(df_lat_long_sal_sum, aes(x = mean, y = region )) +
geom_errorbarh(aes(xmax=mean+1.96*se, xmin=mean-se*1.96)) +
geom_point() +
geom_label(aes(label=round(mean,3), nudge_y = 0.1))+
ggtitle("Mean Annual Base Pay (Confidence Interval)") +
xlab("Mean Annual Base Pay ") +
ylab("Region")+
theme(plot.title = element_text(hjust = 0.5))
## Warning in geom_label(aes(label = round(mean, 3), nudge_y = 0.1)): Ignoring
## unknown aesthetics: nudge_y
# Count the type of each group in the dataset
count_df <-df_lat_long_sal %>%
count(region,salary_group,sort=FALSE)
count_df <- count_df %>%
filter(region!='Outside USA')
ggplot(count_df, aes(x=salary_group, y=region, fill=n)) +
geom_tile(colour="white") +
scale_fill_gradient(name="No of people",low="white", high="steelblue") +
theme(axis.text.x = element_text(angle=45, vjust=1, hjust=1)) +
geom_text(aes(label=n), color="black", size=5)
filter( df_lat_long_sal, location_longitude < -100)
summary(df_lat_long_sal$annual_base_pay)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 55000 86400 131774 120000 10280000