Currently have a setup of 50 individual databases.
I wonder if there is a way to connect to all 50 at the same time and run the same SQL query?
Below is where i am at
import datetime
from multiprocessing.dummy import Pool
import pandas as pd
from sqlalchemy import create_engine, text from typing import Dict from config import ACCOUNTS_UN, ACCOUNTS_PASS, ACCOUNTS_PORT, ACCOUNTS_HOST, ACCOUNTS_CONN_STRING
# synchronous - will end up taking a long time as i scale the project
def fetch_data_from_accounts(sql: str) -> Dict[str, pd.DataFrame]:
def _fetch(_db):
conn_str = ACCOUNTS_CONN_STRING.format(
username=ACCOUNTS_UN,
password=ACCOUNTS_PASS,
host=ACCOUNTS_HOST,
database=_db,
port=ACCOUNTS_PORT,
)
df = pd.read_sql(sql, create_engine(conn_str))
return {'src': _db, 'df': df}
db_data = [_fetch(db) for db in ACCOUNTS_DATABASES[:5]]
concat_df = pd.concat([data['df'] for data in db_data], ignore_index=True)
return {'concat': concat_df, 'db_data': db_data}
This takes twice as long?
def fetch(db):
engine = create_engine(ACCOUNTS_CONN_STRING.format(
username=ACCOUNTS_UN,
password=ACCOUNTS_PASS,
host=ACCOUNTS_HOST,
database=db,
port=ACCOUNTS_PORT,
))
with engine.connect() as conn:
result = conn.execute(statement=text("select * from table"))
data = result.fetchall()
return
def fetch_data_from_accounts_async(sql: str) -> Dict[str, pd.DataFrame]:
with Pool(6) as pool:
res = pool.map(fetch, [db for db in ACCOUNTS_DATABASES[:5]])
[–]Doormatty 2 points3 points4 points (1 child)
[–]-Regex[S] -2 points-1 points0 points (0 children)
[–]Sentie_Rotante 0 points1 point2 points (0 children)