# First Unique Character in a string.
# Problem. Suppose we have a string and we have to find the first unique character in the string. So if the string is
# like “people”, the first letter whose occurrence is one is ‘o’. So the index will be returned, that is 2 here.
# If there is no such character, then return -1.
# To solve this, we will follow these steps −
# Create one frequency map for each character c in the string, do if c is not in frequency, then insert it into frequency,
# and put value 1 otherwise, increase the count in frequency.
# Scan the frequency map, if the value of a specific key is 1, then return that key, otherwise return -1
class Solution(object):
def firstUniqChar(self, s):
"""
:type s: str
:rtype: int
"""
frequency = {}
for i in s:
if i not in frequency:
frequency[i] = 1
else:
frequency[i] +=1
for i in range(len(s)):
if frequency[s[i]] == 1:
return i
return -1
ob1 = Solution()
print(ob1.firstUniqChar("hackthegame"))
2
# Organization of of Data v2
# import .csv file with Pandas
import pandas as pd
df = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
print(df)
# Select Subset of Columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns= ['First_Name'])
print(df)
Count First_Name Last_Name Address \
0 1 John Doe 2440 North Booth Street
1 2 Jane Doe NaN
2 3 Alan Turing 1 Fairfield Street Sackville Park
3 4 Roger Penrose Queen Mary University of London Mile End Road
City State User_Name User_ID Browser OS OS_Price_Factor
0 Milwaukee WI Jdoe 1581 Firefox windows NaN
1 New York NY Jadoe 4501 Google mac NaN
2 Manchester GB Aturing 1000 Brave linux NaN
3 London GB Rpenrose 1001 NaN windows NaN
First_Name
0 John
1 Jane
2 Alan
3 Roger
# Select specified columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns= ['First_Name', 'Last_Name', 'User_ID', 'Browser', 'OS'])
print(df)
First_Name Last_Name User_ID Browser OS
0 John Doe 1581 Firefox windows
1 Jane Doe 4501 Google mac
2 Alan Turing 1000 Brave linux
3 Roger Penrose 1001 NaN windows
# Remove Specified Column - First_Name and OS, for selected columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns = ['Last_Name', 'User_ID', 'Browser'])
print(df)
Last_Name User_ID Browser
0 Doe 1581 Firefox
1 Doe 4501 Google
2 Turing 1000 Brave
3 Penrose 1001 NaN
# Import .csv file explore shape
import pandas as pd
# reading csv file
dataFrame = pd.read_csv(r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(dataFrame)
print("DataFrame with some NaN (missing) values...\n",dataFrame)
# count the rows and columns in a DataFrame
print("\nNumber of rows and column in our DataFrame = ",dataFrame.shape)
# Remove NaN from Data frame
print("\nDataFrame after removing NaN values...\n",dataFrame.dropna(axis=1))
DataFrame with some NaN (missing) values...
Count First_Name Last_Name Address \
0 1 John Doe 2440 North Booth Street
1 2 Jane Doe NaN
2 3 Alan Turing 1 Fairfield Street Sackville Park
3 4 Roger Penrose Queen Mary University of London Mile End Road
City State User_Name User_ID Browser OS OS_Price_Factor
0 Milwaukee WI Jdoe 1581 Firefox windows NaN
1 New York NY Jadoe 4501 Google mac NaN
2 Manchester GB Aturing 1000 Brave linux NaN
3 London GB Rpenrose 1001 NaN windows NaN
Number of rows and column in our DataFrame = (4, 11)
DataFrame after removing NaN values...
Count First_Name Last_Name City State User_Name User_ID OS
0 1 John Doe Milwaukee WI Jdoe 1581 windows
1 2 Jane Doe New York NY Jadoe 4501 mac
2 3 Alan Turing Manchester GB Aturing 1000 linux
3 4 Roger Penrose London GB Rpenrose 1001 windows
# Organizing the Data v2 - without a .csv file
## Creating a Dictonary
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
])
# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])
## Data Frame
df
|
|
First_Name
|
Last_Name
|
City
|
State
|
User_Name
|
User_ID
|
OS
|
|
0
|
John
|
Doe
|
Milwaukee
|
WI
|
Jdoe
|
1581
|
windows
|
|
1
|
Jane
|
Doe
|
New York
|
NY
|
Jadoe
|
4501
|
mac
|
|
2
|
Alan
|
Turing
|
Manchester
|
GB
|
Aturing
|
1000
|
linux
|
|
3
|
Roger
|
Penrose
|
London
|
GB
|
Rpenrose
|
1001
|
windows
|
# Organizing the Data v2 - without .csv file
## Selected Columns
# Import pandas package
import pandas as pd
## Creating a Dictonary
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
])
# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])
# select Last Names Only
df.drop(['First_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'], axis = 1)
|
|
Last_Name
|
|
0
|
Doe
|
|
1
|
Doe
|
|
2
|
Turing
|
|
3
|
Penrose
|
# Organizing the Data v2 - without .csv file
## Combine Selected Columns
# Import pandas package
import pandas as pd
## Creating a Dictonary
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
])
# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])
# select First Name, Last Name and User_ID
df.drop(['City', 'State', 'User_Name', 'OS'], axis = 1)
|
|
First_Name
|
Last_Name
|
User_ID
|
|
0
|
John
|
Doe
|
1581
|
|
1
|
Jane
|
Doe
|
4501
|
|
2
|
Alan
|
Turing
|
1000
|
|
3
|
Roger
|
Penrose
|
1001
|
# Apache PySpark
# Import Libraries
# Connect Apache Spark and Jupyter Notebooks
import findspark
findspark.init()
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()
from pyspark.sql.types import StructType, StructField, FloatType, BooleanType
from pyspark.sql.types import DoubleType, IntegerType, StringType
from pyspark import SQLContext
# Setup the Configuration
conf = pyspark.SparkConf()
spark_context = SparkSession.builder.config(conf=conf).getOrCreate()
sqlcontext = SQLContext(spark_context)
# Add Data
data = ([(1580, "Barry", "Firefox", "Windows"),
(5820, "Sam", "MS Edge", "Linux"),
(2340, "Harry", "Vivaldi", "Windows"),
(7860, "Albert", "Chrome", "Windows"),
(1123, "May", "Safari", "macOS")
])
schm=["UserID", "Username","Browser","OS"]
# Setup the Data Frame
user_data_df = sqlcontext.createDataFrame(data,schema=schm)
user_data_df
DataFrame[UserID: bigint, Username: string, Browser: string, OS: string]
user_data_df.show()
+------+--------+-------+-------+
|UserID|Username|Browser| OS|
+------+--------+-------+-------+
| 1580| Barry|Firefox|Windows|
| 5820| Sam|MS Edge| Linux|
| 2340| Harry|Vivaldi|Windows|
| 7860| Albert| Chrome|Windows|
| 1123| May| Safari| macOS|
+------+--------+-------+-------+
# End Hacker Rank Reassessment Based Objective Quality Evidence (OQE) and Lessons Learned.