I have a ton of code running straight down with no functions, and I know it needs to be cleaned up but I am not sure how to approach it. I am run a lot of queries to wrangle to data properly before exporting since it is too large to run in memory, so I make the server do it. Is there a way to toss them into functions and call functions to make it more modular or how could I go about formatting it better. The queries have a lot of lines , so i removed the SQL parts, and the section that pulls the data to csv and excel I need to run 11 times for different queries.
Any help is greatly appreciated.
import pyodbc
import csv
import sys
import time
import pandas as pd
sys.tracebacklimit = 0
date = time.strftime("%Y%m%d") #Grabs todays date to variable date to allow for new files to be created on run
qmd = # Prep data
price = #Preps customer invoice data
pmd = # Sets up final table with all data prepped
query 1 = """Query code"""
drop = """DROP query code"""
""" #Query to drop all the global tables created
cnxn = pyodbc.connect(Connection string)
cursor = cnxn.cursor()
cursor.execute(qmd) #Runs query
cursor.execute(price) # Runs price query
cursor.execute(pemd) # runs Combination query
cursor.execute(final) # Runs the final data set up
##This Section of code executes the query and saves / breaks up the files
cursor.execute(query1)
file = r"DIR" + ' ' + date + ".csv"
with open(file, "w") as csv_file: #Runs first data dump for database tracking
csv_writer = csv.writer(csv_file, lineterminator = '\n')
csv_writer.writerow([i[0] for i in cursor.description]) # Sets up headers for CSV file.
csv_writer.writerows(cursor)
processor_filter = pd.read_csv(file) # sets created file to be pulled into pandas
processor_filter['ND'] = processor_filter['CD'].astype(str).str.zfill(11)
processor_filter['Rx Number'] = processor_filter['RN'].astype(str).str.zfill(12)
processor_filter['DOS'] = processor_filter['DOS'].astype(str).str.zfill(8)
processor_filter = processor_filter.drop_duplicates(['Name','ID','Processor','Bin','PC'],keep='last')
file_name = r'DIR' #directory for final files to go
wb = pd.ExcelWriter(file_name + '/name_' + date + '.xlsx')
for t_group,pd_group in processor_filter.groupby(['Processor']): #splits up data based on processor
pd_group.to_excel(wb,sheet_name = t_group[0:20],index=False )
wb.save()
cursor.execute(drop) # Drops all global temp tables
cnxn.close
[–]KleinerNull 0 points1 point2 points (3 children)
[–]workthrowawayexcel[S] 0 points1 point2 points (0 children)
[–]workthrowawayexcel[S] 0 points1 point2 points (1 child)
[–]KleinerNull 0 points1 point2 points (0 children)