20220529_Hacker_Rank_Assessment_OQE

# First Unique Character in a string.
# Problem. Suppose we have a string and we have to find the first unique character in the string. So if the string is 
# like “people”, the first letter whose occurrence is one is ‘o’. So the index will be returned, that is 2 here. 
# If there is no such character, then return -1.
# To solve this, we will follow these steps −
# Create one frequency map for each character c in the string, do if c is not in frequency, then insert it into frequency, 
# and put value 1 otherwise, increase the count in frequency.
# Scan the frequency map, if the value of a specific key is 1, then return that key, otherwise return -1
class Solution(object):
   def firstUniqChar(self, s):
      """
      :type s: str
      :rtype: int
      """
      frequency = {}
      for i in s:
         if i not in frequency:
            frequency[i] = 1
         else:
            frequency[i] +=1
      for i in range(len(s)):
         if frequency[s[i]] == 1:
            return i
      return -1
ob1 = Solution()
print(ob1.firstUniqChar("hackthegame"))

# Organization of of Data v2
# import .csv file with Pandas
import pandas as pd
df = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
print(df)

# Select Subset of Columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns= ['First_Name'])
print(df)

   Count First_Name Last_Name                                        Address  \
0      1       John       Doe                        2440 North Booth Street   
1      2       Jane       Doe                                            NaN   
2      3       Alan    Turing              1 Fairfield Street Sackville Park   
3      4      Roger   Penrose  Queen Mary University of London Mile End Road   

         City State User_Name  User_ID  Browser       OS  OS_Price_Factor  
0   Milwaukee    WI      Jdoe     1581  Firefox  windows              NaN  
1    New York    NY     Jadoe     4501   Google      mac              NaN  
2  Manchester    GB   Aturing     1000    Brave    linux              NaN  
3      London    GB  Rpenrose     1001      NaN  windows              NaN  
  First_Name
0       John
1       Jane
2       Alan
3      Roger

# Select specified columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns= ['First_Name', 'Last_Name', 'User_ID', 'Browser', 'OS'])
print(df)

  First_Name Last_Name  User_ID  Browser       OS
0       John       Doe     1581  Firefox  windows
1       Jane       Doe     4501   Google      mac
2       Alan    Turing     1000    Brave    linux
3      Roger   Penrose     1001      NaN  windows

# Remove Specified Column - First_Name and OS, for selected columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns = ['Last_Name', 'User_ID', 'Browser'])
print(df)

  Last_Name  User_ID  Browser
0       Doe     1581  Firefox
1       Doe     4501   Google
2    Turing     1000    Brave
3   Penrose     1001      NaN

# Import .csv file explore shape
import pandas as pd

# reading csv file
dataFrame = pd.read_csv(r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(dataFrame)
print("DataFrame with some NaN (missing) values...\n",dataFrame)

# count the rows and columns in a DataFrame
print("\nNumber of rows and column in our DataFrame = ",dataFrame.shape)

# Remove NaN from Data frame 
print("\nDataFrame after removing NaN values...\n",dataFrame.dropna(axis=1))

DataFrame with some NaN (missing) values...
    Count First_Name Last_Name                                        Address  \
0      1       John       Doe                        2440 North Booth Street   
1      2       Jane       Doe                                            NaN   
2      3       Alan    Turing              1 Fairfield Street Sackville Park   
3      4      Roger   Penrose  Queen Mary University of London Mile End Road   

         City State User_Name  User_ID  Browser       OS  OS_Price_Factor  
0   Milwaukee    WI      Jdoe     1581  Firefox  windows              NaN  
1    New York    NY     Jadoe     4501   Google      mac              NaN  
2  Manchester    GB   Aturing     1000    Brave    linux              NaN  
3      London    GB  Rpenrose     1001      NaN  windows              NaN  

Number of rows and column in our DataFrame =  (4, 11)

DataFrame after removing NaN values...
    Count First_Name Last_Name        City State User_Name  User_ID       OS
0      1       John       Doe   Milwaukee    WI      Jdoe     1581  windows
1      2       Jane       Doe    New York    NY     Jadoe     4501      mac
2      3       Alan    Turing  Manchester    GB   Aturing     1000    linux
3      4      Roger   Penrose      London    GB  Rpenrose     1001  windows

# Organizing the Data v2 - without a .csv file
## Creating a Dictonary 
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
         ("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
         ("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
         ("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
        ])

# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])

## Data Frame
df

	First_Name	Last_Name	City	State	User_Name	User_ID	OS
0	John	Doe	Milwaukee	WI	Jdoe	1581	windows
1	Jane	Doe	New York	NY	Jadoe	4501	mac
2	Alan	Turing	Manchester	GB	Aturing	1000	linux
3	Roger	Penrose	London	GB	Rpenrose	1001	windows

# Organizing the Data v2 - without .csv file
## Selected Columns
# Import pandas package 
import pandas as pd

## Creating a Dictonary 
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
         ("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
         ("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
         ("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
        ])

# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])

# select Last Names Only
df.drop(['First_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'], axis = 1)

	Last_Name
0	Doe
1	Doe
2	Turing
3	Penrose

# Organizing the Data v2 - without .csv file
## Combine Selected Columns
# Import pandas package 
import pandas as pd

## Creating a Dictonary 
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
         ("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
         ("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
         ("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
        ])

# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])

# select First Name, Last Name and User_ID
df.drop(['City', 'State', 'User_Name', 'OS'], axis = 1)

	First_Name	Last_Name	User_ID
0	John	Doe	1581
1	Jane	Doe	4501
2	Alan	Turing	1000
3	Roger	Penrose	1001

# Apache PySpark 
# Import Libraries
# Connect Apache Spark and Jupyter Notebooks
import findspark
findspark.init()

import pyspark 
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

from pyspark.sql.types import StructType, StructField, FloatType, BooleanType
from pyspark.sql.types import DoubleType, IntegerType, StringType
from pyspark import SQLContext

# Setup the Configuration
conf = pyspark.SparkConf()
spark_context = SparkSession.builder.config(conf=conf).getOrCreate()
sqlcontext = SQLContext(spark_context)

# Add Data
data = ([(1580, "Barry", "Firefox", "Windows"),
         (5820, "Sam", "MS Edge", "Linux"),
         (2340, "Harry", "Vivaldi", "Windows"),
         (7860, "Albert", "Chrome", "Windows"),
         (1123, "May", "Safari", "macOS")
        ])

schm=["UserID", "Username","Browser","OS"]

# Setup the Data Frame
user_data_df = sqlcontext.createDataFrame(data,schema=schm)

user_data_df

DataFrame[UserID: bigint, Username: string, Browser: string, OS: string]

user_data_df.show()

+------+--------+-------+-------+
|UserID|Username|Browser|     OS|
+------+--------+-------+-------+
|  1580|   Barry|Firefox|Windows|
|  5820|     Sam|MS Edge|  Linux|
|  2340|   Harry|Vivaldi|Windows|
|  7860|  Albert| Chrome|Windows|
|  1123|     May| Safari|  macOS|
+------+--------+-------+-------+

# End Hacker Rank Reassessment Based Objective Quality Evidence (OQE) and Lessons Learned.