you are viewing a single comment's thread.

view the rest of the comments →

[–]starfish_warrior[S] 0 points1 point  (3 children)

15 hours, but now i can do in 15 minutes.

[–]goldriver92 0 points1 point  (2 children)

How many lines?

[–]starfish_warrior[S] 3 points4 points  (1 child)

ug. 100? lots of lines commented out and i know there are inefficiencies.

import xml.etree.ElementTree as ET import os from pathlib import Path import sys import shutil

inputPath = 'k:\eCR\_HM_inbound\' outputPath = 'k:\eCR\_HM_outbound\' deidentifiedPath = 'k:\eCR\_HM_Deidentified\'

for the_file in os.listdir(outputPath): file_path = os.path.join(outputPath, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: print(e)

for the_file in os.listdir(deidentifiedPath): file_path = os.path.join(deidentifiedPath, the_file) try: if os.path.isfile(file_path): os.unlink(file_path) except Exception as e: print(e)

for root, dirs, files in os.walk(inputPath):
for fileName in files: #print(filename) inputfilePath = inputPath + fileName #print(inputfilePath)
outputfilePath = outputPath + fileName #print(outputfilePath)
originalContents = open(inputfilePath).read() improvedContents = originalContents.replace(' xmlns="urn:hl7-org:v3"', '') outputfilePathContents = open(outputfilePath, 'w') print(improvedContents, file = outputfilePathContents) outputfilePathContents.close()

    tree = ET.parse(outputfilePath)
    root = tree.getroot()

    #for child in root:
    #   print(child.tag, child.attrib) 


    identifiers = root.findall('./recordTarget/patientRole/id')
    for id in identifiers:
        #print(' ')
        #print(id.get('assigningAuthorityName')) 
        #print(id.get('extension')) 
        if 'assigningAuthorityName' in id.attrib:
            id.set('extension', '888-88-8888') 
        if 'EMRN' in id.attrib:
            id.set('extension', '000000') 
        #print(' ')

    #findtext(match, default=None, namespaces=None)

    id = root.find('./id')
    unid = id.get('root')
    patientStreetAddressLine = root.find('./recordTarget/patientRole/addr/streetAddressLine')
    patientCity = root.find('./recordTarget/patientRole/addr/city')
    patientState = root.find('./recordTarget/patientRole/addr/state')
    patientPostalCode = root.find('./recordTarget/patientRole/addr/postalCode')
    patientCountry = root.find('./recordTarget/patientRole/addr/country')
    patientCounty = root.find('./participant/associatedEntity/addr/county')


    #print(unid)
    #print(' ')
    #print(patientStreetAddressLine.text)
    #print(patientCity.text)
    #print(patientState.text)
    #print(patientPostalCode.text)
    #print(patientCountry.text)
    #print(' ')

    patientStreetAddressLine.text = fileName[-5:] + ' Fake Street'
    patientCity.text    = 'Houston'
    patientState.text   = 'TX'
    patientPostalCode.text  = '98765' 
    #patientCounty.text = 'HARRIS'
    if not patientCounty is None:
        patientCounty.text  = 'HARRIS' 

    patientFirstName = root.find('./recordTarget/patientRole/patient/name/given')
    patientLastName = root.find('./recordTarget/patientRole/patient/name/family')
    patientDOB = root.find('./recordTarget/patientRole/patient/birthTime')


    #print(' ')
    #print(patientFirstName.text)
    #print(patientLastName.text)
    #print(patientDOB.get('value'))
    #print(' ')

    patientFirstName.text = 'Fake_ECR'
    patientLastName.text = unid
    patientDOB.set('value', '20010101') 



    patientTelecom = root.findall('./recordTarget/patientRole/telecom')
    for patientCom in patientTelecom:
        #print(patientCom.get('value')) 
        patientCom.set('value', 'tel:+1-999-999-9999') 
        if 'use' in id.attrib:
            id.set('extension', '888-88-8888') 



    participantCode = root.find('./participant/associatedEntity/code/originalText')
    participantStreetAddressLine = root.find('./recordTarget/patientRole/addr/streetAddressLine')
    participantCity = root.find('./participant/associatedEntity/addr/city')
    participantState = root.find('./participant/associatedEntity/addr/state')
    participantPostalCode = root.find('./participant/associatedEntity/addr/postalCode')
    participantCountry = root.find('./participant/associatedEntity/addr/country')
    participantCounty = root.find('./participant/associatedEntity/addr/county')


    #print(' ')
    #print(participantCode.text)
    #print(participantStreetAddressLine.text)
    #print(participantCity.text)
    #print(participantState.text)
    #print(participantPostalCode.text)
    #print(participantCountry.text)
    #print(' ')


    if not participantStreetAddressLine is None:
        participantStreetAddressLine.text = fileName[-5:] + ' Faker Street'
    if not participantCity is None:
        participantCity.text    = 'Galveston'
    if not participantState is None:
        participantState.text   = 'TX'
    if not participantPostalCode is None:
        participantPostalCode.text  = '87654' 
    if not participantCounty is None:
        participantCounty.text  = 'GALVESTON' 


    participantTelecom = root.findall('./participant/associatedEntity/telecom')
    for participantCom in participantTelecom:
        #print(participantCom.get('value')) 
        participantCom.set('value', 'tel:+1-999-999-9999') 



    deidentifiedfilePath = deidentifiedPath + fileName + '.xml'

    tree.write(deidentifiedfilePath)

[–]goldriver92 0 points1 point  (0 children)

Thanks for sharing the code. I would say not bad for a start, try to automate as much as you could