Content is user-generated and unverified.
#!/usr/bin/env cctbx.python import os import sys import glob import numpy as np from collections import defaultdict import cctbx.miller from iotbx import mtz from cctbx import crystal from cctbx.array_family import flex def process_mtz_files(directory_path, output_filename="averaged.mtz", amplitude_label="F", phase_label="PHI"): """ Process MTZ files in a directory to compute mean structure factors. Args: directory_path (str): Path to directory containing MTZ files output_filename (str): Name of output MTZ file amplitude_label (str): Column label for amplitudes (e.g., 'F', 'FP', 'FOBS') phase_label (str): Column label for phases (e.g., 'PHI', 'PHIC', 'PHIFOM') """ # Find all MTZ files in directory mtz_pattern = os.path.join(directory_path, "*.mtz") mtz_files = glob.glob(mtz_pattern) if not mtz_files: print(f"No MTZ files found in {directory_path}") return print(f"Found {len(mtz_files)} MTZ files") # Storage for complex structure factors indexed by Miller indices structure_factors = defaultdict(list) crystal_symmetry = None # Process each MTZ file for i, mtz_file in enumerate(mtz_files): print(f"Processing file {i+1}/{len(mtz_files)}: {os.path.basename(mtz_file)}") try: # Read MTZ file mtz_object = mtz.object(file_name=mtz_file) # Get crystal symmetry from first file if crystal_symmetry is None: crystal_symmetry = mtz_object.crystal_symmetry() print(f"Crystal symmetry: {crystal_symmetry.space_group_info()}") print(f"Unit cell: {crystal_symmetry.unit_cell()}") # Extract Miller array with amplitudes and phases miller_arrays = mtz_object.as_miller_arrays() amplitude_array = None phase_array = None # Find amplitude and phase arrays for array in miller_arrays: if array.info().label_string() == amplitude_label: amplitude_array = array elif array.info().label_string() == phase_label: phase_array = array # Alternative: search for arrays containing the labels elif amplitude_label in array.info().label_string(): amplitude_array = array elif phase_label in array.info().label_string(): phase_array = array if amplitude_array is None or phase_array is None: print(f"Warning: Could not find {amplitude_label} and/or {phase_label} in {mtz_file}") print("Available labels:") for array in miller_arrays: print(f" {array.info().label_string()}") continue # Ensure both arrays have the same Miller indices amplitude_array, phase_array = amplitude_array.common_sets(phase_array) # Convert phases to radians phases_rad = phase_array.data() * (np.pi / 180.0) # Calculate complex structure factors complex_sf = amplitude_array.data() * flex.exp(1j * phases_rad) # Store structure factors by Miller index for hkl, sf in zip(amplitude_array.indices(), complex_sf): structure_factors[hkl].append(sf) except Exception as e: print(f"Error processing {mtz_file}: {str(e)}") continue if not structure_factors: print("No structure factors were successfully extracted") return print(f"Computing averages for {len(structure_factors)} unique reflections") # Compute mean structure factors averaged_indices = [] averaged_amplitudes = [] averaged_phases = [] for hkl, sf_list in structure_factors.items(): if len(sf_list) > 0: # Compute mean of complex numbers mean_sf = np.mean(sf_list) # Extract amplitude and phase from complex number amplitude = abs(mean_sf) phase_rad = np.angle(mean_sf) phase_deg = phase_rad * (180.0 / np.pi) averaged_indices.append(hkl) averaged_amplitudes.append(amplitude) averaged_phases.append(phase_deg) # Convert to flex arrays miller_indices = flex.miller_index(averaged_indices) amplitudes = flex.double(averaged_amplitudes) phases = flex.double(averaged_phases) # Create Miller arrays miller_set = cctbx.miller.set( crystal_symmetry=crystal_symmetry, indices=miller_indices, anomalous_flag=False ) amplitude_array = miller_set.array(data=amplitudes).set_info( mtz.label_decorator(label=f"{amplitude_label}_MEAN") ) phase_array = miller_set.array(data=phases).set_info( mtz.label_decorator(label=f"{phase_label}_MEAN") ) # Write output MTZ file output_path = os.path.join(directory_path, output_filename) mtz_dataset = amplitude_array.as_mtz_dataset(column_root_label=f"{amplitude_label}_MEAN") mtz_dataset.add_miller_array(phase_array, column_root_label=f"{phase_label}_MEAN") mtz_object = mtz_dataset.mtz_object() mtz_object.write(file_name=output_path) print(f"Averaged structure factors written to: {output_path}") print(f"Number of reflections: {len(averaged_indices)}") print(f"Resolution range: {miller_set.d_max_min()}") def main(): if len(sys.argv) < 2: print("Usage: python average_mtz.py <directory_path> [output_filename] [amplitude_label] [phase_label]") print("Example: python average_mtz.py /path/to/mtz/files averaged.mtz F PHI") sys.exit(1) directory_path = sys.argv[1] output_filename = sys.argv[2] if len(sys.argv) > 2 else "averaged.mtz" amplitude_label = sys.argv[3] if len(sys.argv) > 3 else "F" phase_label = sys.argv[4] if len(sys.argv) > 4 else "PHI" if not os.path.isdir(directory_path): print(f"Error: {directory_path} is not a valid directory") sys.exit(1) process_mtz_files(directory_path, output_filename, amplitude_label, phase_label) if __name__ == "__main__": main()
Content is user-generated and unverified.
    MTZ Structure Factor Averaging Script | Claude