import csv
import codecs
from itertools import islice
with codecs.open('ZTransPropertyInfo.txt', 'r', encoding='utf-8', errors='ignore') as zd, open('outfile.txt', 'w') as w:
    for line in zd:
        if 'MARICOPA' in line:
            w.write(line)
with codecs.open('ZTransMain.txt', 'r', encoding='utf-8', errors='ignore') as zd, open('outfile2.txt', 'w') as w:
    for line in zd:
        if 'MARICOPA' in line:
            w.write(line)
import pandas as pd
# reading two csv files
data1 = pd.read_csv('outfile.txt', delimiter = '|')
data2 = pd.read_csv('outfile2.txt', delimiter = '|')
/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3165: DtypeWarning: Columns (2,4,7,8,11,12,13,19,20,21,24,26,30,31,32,33,34,36,37,42,43,45,47,49,50,51,54,59,60,61) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
/opt/anaconda3/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3165: DtypeWarning: Columns (5,7,8,9,10,17,22,30,31,32,33,34,35,37,39,40,50,54,55,56,58,60,63,67,71,74,75,82,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,100,103,105,106,107,109,111,112,113,114,116,117,119) have mixed types.Specify dtype option on import or set low_memory=False.
  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
len(data1)
142869
len(data2)
9318789
coloutfile = pd.read_csv('colnames_outfile.csv')
coloutfile2 = pd.read_csv('colnames_outfile2.csv')
coloutfile2
Unnamed: 0 x
0 1 TransId
1 2 FIPS
2 3 State
3 4 County
4 5 DataClassStndCode
126 127 TransactionTypeStndCode
127 128 BatchID
128 129 BKFSPID
129 130 ZVendorStndCode
130 131 SourceChkSum

131 rows × 2 columns

print(coloutfile.columns)
Index(['Unnamed: 0', 'x'], dtype='object')
cname1 = coloutfile['x'].values
cname2 = coloutfile2['x'].values
cname1
array(['TransId', 'AssessorParcelNumber', 'APNIndicatorStndCode',
       'TaxIDNumber', 'TaxIDIndicatorStndCode',
       'UnformattedAssessorParcelNumber', 'AlternateParcelNumber',
       'HawaiiCondoCPRCode', 'PropertyHouseNumber',
       'PropertyHouseNumberExt', 'PropertyStreetPreDirectional',
       'PropertyStreetName', 'PropertyStreetSuffix',
       'PropertyStreetPostDirectional', 'PropertyBuildingNumber',
       'PropertyFullStreetAddress', 'PropertyCity', 'PropertyState',
       'PropertyZip', 'PropertyZip4', 'OriginalPropertyFullStreetAddress',
       'OriginalPropertyAddressLastline', 'PropertyAddressStndCode',
       'LegalLot', 'LegalOtherLot', 'LegalLotCode', 'LegalBlock',
       'LegalSubdivisionName', 'LegalCondoProjectPUDDevName',
       'LegalBuildingNumber', 'LegalUnit', 'LegalSection', 'LegalPhase',
       'LegalTract', 'LegalDistrict', 'LegalMunicipality', 'LegalCity',
       'LegalTownship', 'LegalSTRSection', 'LegalSTRTownship',
       'LegalSTRRange', 'LegalSTRMeridian', 'LegalSecTwnRngMer',
       'LegalRecordersMapReference', 'LegalDescription', 'LegalLotSize',
       'PropertySequenceNumber', 'PropertyAddressMatchcode',
       'PropertyAddressUnitDesignator', 'PropertyAddressUnitNumber',
       'PropertyAddressCarrierRoute', 'PropertyAddressGeoCodeMatchCode',
       'PropertyAddressLatitude', 'PropertyAddressLongitude',
       'PropertyAddressCensusTractAndBlock',
       'PropertyAddressConfidenceScore', 'PropertyAddressCBSACode',
       'PropertyAddressCBSADivisionCode', 'PropertyAddressMatchType',
       'PropertyAddressDPV', 'PropertyGeocodeQualityCode',
       'PropertyAddressQualityCode', 'FIPS', 'LoadID', 'ImportParcelID',
       'BKFSPID', 'AssessmentRecordMatchFlag', 'BatchID'], dtype=object)
data1.columns = cname1
data1.head
<bound method NDFrame.head of           TransId AssessorParcelNumber APNIndicatorStndCode  TaxIDNumber  \
0        31447596           104-03-029                  NaN          NaN   
1        31450688          104-07-002A                  NaN          NaN   
2        31451247          104-41-002B                  NaN          NaN   
3        31455254          104-07-004K                  NaN          NaN   
4        31455269          104-07-004K                  NaN          NaN   
...           ...                  ...                  ...          ...   
142864  443923230           306-08-004                  NaN          NaN   
142865  444307662          107-14-046B                  NaN          NaN   
142866  447601053           306-08-042                               NaN   
142867   44476352                  NaN                  NaN          NaN   
142868   44562101                  NaN                  NaN          NaN   

       TaxIDIndicatorStndCode UnformattedAssessorParcelNumber  \
0                         NaN                        10403029   
1                         NaN                       10407002A   
2                         NaN                       10441002B   
3                         NaN                       10407004K   
4                         NaN                       10407004K   
...                       ...                             ...   
142864                    NaN                        30608004   
142865                    NaN                       10714046B   
142866                                               30608042   
142867                    NaN                             NaN   
142868                    NaN                             NaN   

        AlternateParcelNumber HawaiiCondoCPRCode PropertyHouseNumber  \
0                         NaN                NaN                 NaN   
1                         NaN                NaN                 NaN   
2                         NaN                NaN                 NaN   
3                         NaN                NaN                 NaN   
4                         NaN                NaN                 NaN   
...                       ...                ...                 ...   
142864                    NaN                                   2100   
142865                    NaN                NaN                 NaN   
142866                    NaN                                    NaN   
142867                    NaN                NaN                 NaN   
142868                    NaN                                    NaN   

        PropertyHouseNumberExt  ... PropertyAddressMatchType  \
0                          NaN  ...                      NaN   
1                          NaN  ...                      NaN   
2                          NaN  ...                      NaN   
3                          NaN  ...                      NaN   
4                          NaN  ...                      NaN   
...                        ...  ...                      ...   
142864                     NaN  ...                      NaN   
142865                     NaN  ...                      NaN   
142866                     NaN  ...                      NaN   
142867                     NaN  ...                      NaN   
142868                     NaN  ...                      NaN   

       PropertyAddressDPV PropertyGeocodeQualityCode  \
0                     NaN                        NaN   
1                     NaN                        NaN   
2                     NaN                        NaN   
3                     NaN                        NaN   
4                     NaN                        NaN   
...                   ...                        ...   
142864                NaN                        NaN   
142865                NaN                        NaN   
142866                NaN                        NaN   
142867                NaN                        NaN   
142868                NaN                        NaN   

       PropertyAddressQualityCode  FIPS        LoadID ImportParcelID  \
0                             NaN  4001  4.176882e+09      3471129.0   
1                             NaN  4001  4.176880e+09      3471280.0   
2                             NaN  4001  4.176885e+09    160724094.0   
3                             NaN  4001  4.288529e+09      3471293.0   
4                             NaN  4001  4.288529e+09      3471293.0   
...                           ...   ...           ...            ...   
142864                        NaN  4025  4.227734e+09      6467503.0   
142865                        NaN  4025  4.245149e+09      6420274.0   
142866                        NaN  4025  4.325089e+09      6467541.0   
142867                        NaN  4027  4.251788e+09            NaN   
142868                        NaN  4027  4.251693e+09            NaN   

         BKFSPID  AssessmentRecordMatchFlag  BatchID  
0          26719                          1  4381728  
1          30505                          1  4381728  
2          31294                          1  4381728  
3          36088                          1  4585783  
4          36089                          1  4585783  
...          ...                        ...      ...  
142864    146591                          1  4480589  
142865    581706                          1  4510412  
142866  19257490                          1  4671832  
142867    158159                          0  4537074  
142868     50303                          0  4537078  

[142869 rows x 68 columns]>
data2.columns = cname2
data2.head
<bound method NDFrame.head of           TransId  FIPS State   County DataClassStndCode RecordTypeStndCode  \
0        31466602  4001    AZ   APACHE                 D                  P   
1        31490181  4001    AZ   APACHE                 M                  P   
2        31490639  4001    AZ   APACHE                 M                  P   
3        31494701  4001    AZ   APACHE                 M                  P   
4        31614926  4003    AZ  COCHISE                 M                  P   
...           ...   ...   ...      ...               ...                ...   
9318784  44250889  4025    AZ  YAVAPAI                 M                  P   
9318785  44257768  4025    AZ  YAVAPAI                 M                  P   
9318786  44261774  4025    AZ  YAVAPAI                 M                  P   
9318787  44291305  4025    AZ  YAVAPAI                 M                  P   
9318788  44568103  4027    AZ     YUMA                 M                  P   

        RecordingDate RecordingDocumentNumber RecordingBookNumber  \
0          2007-11-05             2007-010777                 NaN   
1          2005-07-29             2005-007363                 NaN   
2          2006-01-30             2006-001267                 NaN   
3          2012-11-01             2012-005919                 NaN   
4          2009-07-10              2009-16844                 NaN   
...               ...                     ...                 ...   
9318784    2007-05-03                 4133816                4503   
9318785    2007-11-06                 4188215                4552   
9318786    2008-03-27                 4222598                4584   
9318787    2013-04-12            2013-0021421                4950   
9318788    2012-10-29              2012-27912                 NaN   

        RecordingPageNumber  ... MatchStndCode  REOStndCode  \
0                       NaN  ...            NM          NaN   
1                       NaN  ...            NM          NaN   
2                       NaN  ...            MA          NaN   
3                       NaN  ...            NM          NaN   
4                       NaN  ...            MA          NaN   
...                     ...  ...           ...          ...   
9318784                 811  ...            MA          NaN   
9318785                 855  ...            NM          NaN   
9318786                 295  ...            NM          NaN   
9318787                 159  ...            MA          NaN   
9318788                 NaN  ...            MA          NaN   

         UpdateOwnershipFlag      LoadID  StatusInd  TransactionTypeStndCode  \
0                             4176950433        NaN                            
1                             4176785559        NaN                            
2                             4176784953        NaN                            
3                             4176785341        NaN                            
4                             4240489498        NaN                            
...                      ...         ...        ...                      ...   
9318784                       3873932235        NaN                            
9318785                       3873963458        NaN                            
9318786                       3873963871        NaN                            
9318787                       3874013809        NaN                            
9318788                       4251574354        NaN                            

         BatchID BKFSPID  ZVendorStndCode  SourceChkSum  
0        4381728   24330              BKF -8.397281e+08  
1        4381724    3768              BKF  1.084165e+09  
2        4381724    4234              BKF  1.789306e+08  
3        4381724    2182              BKF -1.926040e+09  
4        4503023   23484              BKF  1.796548e+09  
...          ...     ...              ...           ...  
9318784  3819760   73366              BKF  5.891026e+07  
9318785  3819760   80246              BKF  1.754581e+07  
9318786  3819760   84271              BKF  9.791578e+07  
9318787  3819760  114215              BKF -9.256134e+08  
9318788  4537078   56570              BKF -8.043522e+08  

[9318789 rows x 131 columns]>
# using merge function by setting how='inner'
output1 = pd.merge(data1, data2, 
                   on='TransId', 
                   how='inner')
output1.to_csv('merge1.csv')
databuyer = pd.read_csv('ZTransBuyerName.txt', sep = '|', header=None, error_bad_lines=False)
dataseller = pd.read_csv('ZTransSellerName.txt', sep = '|', header=None, error_bad_lines=False)
b'Skipping line 2083791: expected 10 fields, saw 13\n'
dataseller
TransId Name N2 3 4 5 6 7 8 9
0 31996253 DONALD S MERRELL DONALD S MERRELL NaN 1 NaN 3.847747e+09 4011 1000485622
1 31996254 DONALD S MERRELL DONALD S MERRELL NaN 1 NaN 3.847747e+09 4011 1000485622
2 31996255 DONALD S MERRELL DONALD S MERRELL NaN 1 NaN 3.847747e+09 4011 1000485622
3 31996256 DONALD S MERRELL DONALD S MERRELL NaN 1 NaN 3.847747e+09 4011 1000485622
4 31996257 KATHY ANN CHANDLER KATHY ANN CHANDLER NaN 1 NaN 3.847747e+09 4011 1000485622
14804494 448919808 JESUS MANUEL GOMEZ JESUS MANUEL GOMEZ NaN 1 NaN NaN 4023 1000505493
14804495 448919808 ANA CECILIA GOMEZ ANA CECILIA GOMEZ NaN 2 NaN NaN 4023 1000505493
14804496 448919808 JESUS M GOMEZ JESUS M GOMEZ NaN 3 NaN NaN 4023 1000505493
14804497 448919808 ANA C GOMEZ ANA C GOMEZ NaN 4 NaN NaN 4023 1000505493
14804498 448919809 JOSE A CEDENO JOSE A CEDENO NaN 1 NaN NaN 4023 1000505493

14804499 rows × 10 columns

databuyer.columns = ['TransId', 'Name', 'N2', '3', '4', '5', '6', '7', '8', '9']
dataseller.columns = ['TransId', 'Name', 'N2', '3', '4', '5', '6', '7', '8', '9']
# using merge function by setting how='inner'
output2 = pd.merge(output1, databuyer, 
                   on='TransId', 
                   how='inner')
output2.to_csv('merge_buyer.csv')
# using merge function by setting how='inner'
output3 = pd.merge(output1, dataseller, 
                   on='TransId', 
                   how='inner')
output3.to_csv('merge_seller.csv')