# First Unique Character in a string.
# Problem. Suppose we have a string and we have to find the first unique character in the string. So if the string is 
# like “people”, the first letter whose occurrence is one is ‘o’. So the index will be returned, that is 2 here. 
# If there is no such character, then return -1.
# To solve this, we will follow these steps −
# Create one frequency map for each character c in the string, do if c is not in frequency, then insert it into frequency, 
# and put value 1 otherwise, increase the count in frequency.
# Scan the frequency map, if the value of a specific key is 1, then return that key, otherwise return -1
class Solution(object):
   def firstUniqChar(self, s):
      """
      :type s: str
      :rtype: int
      """
      frequency = {}
      for i in s:
         if i not in frequency:
            frequency[i] = 1
         else:
            frequency[i] +=1
      for i in range(len(s)):
         if frequency[s[i]] == 1:
            return i
      return -1
ob1 = Solution()
print(ob1.firstUniqChar("hackthegame"))
2
# Organization of of Data v2
# import .csv file with Pandas
import pandas as pd
df = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
print(df)

# Select Subset of Columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns= ['First_Name'])
print(df)
   Count First_Name Last_Name                                        Address  \
0      1       John       Doe                        2440 North Booth Street   
1      2       Jane       Doe                                            NaN   
2      3       Alan    Turing              1 Fairfield Street Sackville Park   
3      4      Roger   Penrose  Queen Mary University of London Mile End Road   

         City State User_Name  User_ID  Browser       OS  OS_Price_Factor  
0   Milwaukee    WI      Jdoe     1581  Firefox  windows              NaN  
1    New York    NY     Jadoe     4501   Google      mac              NaN  
2  Manchester    GB   Aturing     1000    Brave    linux              NaN  
3      London    GB  Rpenrose     1001      NaN  windows              NaN  
  First_Name
0       John
1       Jane
2       Alan
3      Roger
# Select specified columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns= ['First_Name', 'Last_Name', 'User_ID', 'Browser', 'OS'])
print(df)
  First_Name Last_Name  User_ID  Browser       OS
0       John       Doe     1581  Firefox  windows
1       Jane       Doe     4501   Google      mac
2       Alan    Turing     1000    Brave    linux
3      Roger   Penrose     1001      NaN  windows
# Remove Specified Column - First_Name and OS, for selected columns
import pandas as pd
data = pd.read_csv (r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(data, columns = ['Last_Name', 'User_ID', 'Browser'])
print(df)
  Last_Name  User_ID  Browser
0       Doe     1581  Firefox
1       Doe     4501   Google
2    Turing     1000    Brave
3   Penrose     1001      NaN
# Import .csv file explore shape
import pandas as pd

# reading csv file
dataFrame = pd.read_csv(r'C:\Users\Micha\OneDrive\14_Python files\Python_CSV_files\Organization of Data_v2.csv')
df = pd.DataFrame(dataFrame)
print("DataFrame with some NaN (missing) values...\n",dataFrame)

# count the rows and columns in a DataFrame
print("\nNumber of rows and column in our DataFrame = ",dataFrame.shape)

# Remove NaN from Data frame 
print("\nDataFrame after removing NaN values...\n",dataFrame.dropna(axis=1))
DataFrame with some NaN (missing) values...
    Count First_Name Last_Name                                        Address  \
0      1       John       Doe                        2440 North Booth Street   
1      2       Jane       Doe                                            NaN   
2      3       Alan    Turing              1 Fairfield Street Sackville Park   
3      4      Roger   Penrose  Queen Mary University of London Mile End Road   

         City State User_Name  User_ID  Browser       OS  OS_Price_Factor  
0   Milwaukee    WI      Jdoe     1581  Firefox  windows              NaN  
1    New York    NY     Jadoe     4501   Google      mac              NaN  
2  Manchester    GB   Aturing     1000    Brave    linux              NaN  
3      London    GB  Rpenrose     1001      NaN  windows              NaN  

Number of rows and column in our DataFrame =  (4, 11)

DataFrame after removing NaN values...
    Count First_Name Last_Name        City State User_Name  User_ID       OS
0      1       John       Doe   Milwaukee    WI      Jdoe     1581  windows
1      2       Jane       Doe    New York    NY     Jadoe     4501      mac
2      3       Alan    Turing  Manchester    GB   Aturing     1000    linux
3      4      Roger   Penrose      London    GB  Rpenrose     1001  windows
# Organizing the Data v2 - without a .csv file
## Creating a Dictonary 
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
         ("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
         ("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
         ("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
        ])

# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])

## Data Frame
df
First_Name Last_Name City State User_Name User_ID OS
0 John Doe Milwaukee WI Jdoe 1581 windows
1 Jane Doe New York NY Jadoe 4501 mac
2 Alan Turing Manchester GB Aturing 1000 linux
3 Roger Penrose London GB Rpenrose 1001 windows
# Organizing the Data v2 - without .csv file
## Selected Columns
# Import pandas package 
import pandas as pd

## Creating a Dictonary 
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
         ("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
         ("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
         ("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
        ])

# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])

# select Last Names Only
df.drop(['First_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'], axis = 1)
Last_Name
0 Doe
1 Doe
2 Turing
3 Penrose
# Organizing the Data v2 - without .csv file
## Combine Selected Columns
# Import pandas package 
import pandas as pd

## Creating a Dictonary 
data = ([("John", "Doe", "Milwaukee", "WI", "Jdoe", 1581, "windows"),
         ("Jane", "Doe", "New York", "NY", "Jadoe", 4501, "mac"),
         ("Alan", "Turing", "Manchester", "GB", "Aturing", 1000, "linux"),
         ("Roger", "Penrose", "London", "GB", "Rpenrose", 1001, "windows"),
        ])

# creating a pandas dataframe
df = pd.DataFrame(data, columns=['First_Name', 'Last_Name', 'City', 'State', 'User_Name', 'User_ID', 'OS'])

# select First Name, Last Name and User_ID
df.drop(['City', 'State', 'User_Name', 'OS'], axis = 1)
First_Name Last_Name User_ID
0 John Doe 1581
1 Jane Doe 4501
2 Alan Turing 1000
3 Roger Penrose 1001
# Apache PySpark 
# Import Libraries
# Connect Apache Spark and Jupyter Notebooks
import findspark
findspark.init()

import pyspark 
from pyspark.sql import SparkSession
spark = SparkSession.builder.getOrCreate()

from pyspark.sql.types import StructType, StructField, FloatType, BooleanType
from pyspark.sql.types import DoubleType, IntegerType, StringType
from pyspark import SQLContext
# Setup the Configuration
conf = pyspark.SparkConf()
spark_context = SparkSession.builder.config(conf=conf).getOrCreate()
sqlcontext = SQLContext(spark_context)
# Add Data
data = ([(1580, "Barry", "Firefox", "Windows"),
         (5820, "Sam", "MS Edge", "Linux"),
         (2340, "Harry", "Vivaldi", "Windows"),
         (7860, "Albert", "Chrome", "Windows"),
         (1123, "May", "Safari", "macOS")
        ])
schm=["UserID", "Username","Browser","OS"]
# Setup the Data Frame
user_data_df = sqlcontext.createDataFrame(data,schema=schm)
user_data_df
DataFrame[UserID: bigint, Username: string, Browser: string, OS: string]
user_data_df.show()
+------+--------+-------+-------+
|UserID|Username|Browser|     OS|
+------+--------+-------+-------+
|  1580|   Barry|Firefox|Windows|
|  5820|     Sam|MS Edge|  Linux|
|  2340|   Harry|Vivaldi|Windows|
|  7860|  Albert| Chrome|Windows|
|  1123|     May| Safari|  macOS|
+------+--------+-------+-------+
# End Hacker Rank Reassessment Based Objective Quality Evidence (OQE) and Lessons Learned.