Content is user-generated and unverified.
import pandas as pd import sys import os import glob from datetime import datetime def find_fry9c_files(directory): """ Find all CSV files starting with FRY9C in the given directory and subdirectories Args: directory (str): Root directory to search Returns: list: List of file paths matching the pattern """ pattern = os.path.join(directory, "**", "FRY9C*.csv") files = glob.glob(pattern, recursive=True) return files def extract_trading_revenue_from_file(file_path): """ Extract trading revenue data from a single FRY9C CSV file Args: file_path (str): Path to the CSV file Returns: list: List of dictionaries containing extracted data """ try: # Read the CSV file df = pd.read_csv(file_path) # Extract institution name and report date institution_row = df[df['ItemName'] == 'Institution Name'] report_date_row = df[df['ItemName'] == 'Report Date'] if institution_row.empty or report_date_row.empty: print(f"Warning: Could not find institution name or report date in {file_path}") return [] institution_name = institution_row['Value'].iloc[0] report_date_raw = report_date_row['Value'].iloc[0] # Convert report date from YYYYMMDD to a readable format report_date_str = str(report_date_raw) report_date = datetime.strptime(report_date_str, '%Y%m%d').strftime('%Y-%m-%d') # Filter rows where Description starts with "Trading Revenue" or "TRADING REVENUE" trading_revenue_mask = df['Description'].fillna('').str.startswith(('Trading Revenue', 'TRADING REVENUE')) trading_revenue_rows = df[trading_revenue_mask].copy() # Create the output data for this file file_output_data = [] for _, row in trading_revenue_rows.iterrows(): # Convert value from thousands to actual USD (multiply by 1,000) value_in_thousands = float(row['Value']) value_in_usd = value_in_thousands * 1000 # Clean up the measure name by removing "EXPOSURES" and "(BHC CONSOLIDATED)" measure_name = row['Description'] measure_name = measure_name.replace(' EXPOSURES', '').replace(' (BHC CONSOLIDATED)', '') file_output_data.append({ 'InstitutionName': institution_name, 'Report Date': report_date, 'Measure': measure_name, 'Value': int(value_in_usd) # Convert to int to avoid decimals }) print(f"Extracted {len(file_output_data)} trading revenue records from {os.path.basename(file_path)}") return file_output_data except Exception as e: print(f"Error processing file {file_path}: {str(e)}") return [] def extract_trading_revenue_data(directory, output_file): """ Extract trading revenue data from all FRY9C CSV files in a directory and subdirectories Args: directory (str): Root directory to search for FRY9C files output_file (str): Path to output CSV file """ try: # Find all FRY9C CSV files csv_files = find_fry9c_files(directory) if not csv_files: print(f"No FRY9C*.csv files found in directory: {directory}") return None print(f"Found {len(csv_files)} FRY9C CSV file(s):") for file_path in csv_files: print(f" - {file_path}") # Process all files and combine data all_output_data = [] for file_path in csv_files: file_data = extract_trading_revenue_from_file(file_path) all_output_data.extend(file_data) if not all_output_data: print("No trading revenue data found in any of the files") return None # Create output DataFrame output_df = pd.DataFrame(all_output_data) # Sort by Institution Name and Report Date for better organization output_df = output_df.sort_values(['InstitutionName', 'Report Date', 'Measure']) # Save to CSV output_df.to_csv(output_file, index=False) print(f"\nSuccessfully extracted {len(all_output_data)} total trading revenue records") print(f"Output saved to: {output_file}") # Display summary statistics print(f"\nSummary:") print(f"Total records: {len(output_df)}") print(f"Unique institutions: {output_df['InstitutionName'].nunique()}") print(f"Unique report dates: {output_df['Report Date'].nunique()}") print(f"Unique measures: {output_df['Measure'].nunique()}") # Display preview of extracted data print("\nPreview of extracted data:") print(output_df.head(10).to_string(index=False)) return output_df except Exception as e: print(f"Error processing directory: {str(e)}") return None def main(): """ Main function to run the extraction script """ if len(sys.argv) != 3: print("Usage: python trading_revenue_extractor.py <directory> <output_file.csv>") print("Example: python trading_revenue_extractor.py ./data/ trading_revenue_output.csv") print("The script will search for all FRY9C*.csv files in the directory and subdirectories") sys.exit(1) directory = sys.argv[1] output_file = sys.argv[2] result = extract_trading_revenue_data(directory, output_file) if result is not None: print(f"\n✓ Processing completed successfully!") else: print(f"\n✗ Processing failed!") sys.exit(1) if __name__ == "__main__": main()
Content is user-generated and unverified.
    Trading Revenue Data Extractor | Claude