#!/usr/bin/env cctbx.python
import os
import sys
import glob
import numpy as np
from collections import defaultdict
import cctbx.miller
from iotbx import mtz
from cctbx import crystal
from cctbx.array_family import flex
def process_mtz_files(directory_path, output_filename="averaged.mtz",
amplitude_label="F", phase_label="PHI"):
"""
Process MTZ files in a directory to compute mean structure factors.
Args:
directory_path (str): Path to directory containing MTZ files
output_filename (str): Name of output MTZ file
amplitude_label (str): Column label for amplitudes (e.g., 'F', 'FP', 'FOBS')
phase_label (str): Column label for phases (e.g., 'PHI', 'PHIC', 'PHIFOM')
"""
# Find all MTZ files in directory
mtz_pattern = os.path.join(directory_path, "*.mtz")
mtz_files = glob.glob(mtz_pattern)
if not mtz_files:
print(f"No MTZ files found in {directory_path}")
return
print(f"Found {len(mtz_files)} MTZ files")
# Storage for complex structure factors indexed by Miller indices
structure_factors = defaultdict(list)
crystal_symmetry = None
# Process each MTZ file
for i, mtz_file in enumerate(mtz_files):
print(f"Processing file {i+1}/{len(mtz_files)}: {os.path.basename(mtz_file)}")
try:
# Read MTZ file
mtz_object = mtz.object(file_name=mtz_file)
# Get crystal symmetry from first file
if crystal_symmetry is None:
crystal_symmetry = mtz_object.crystal_symmetry()
print(f"Crystal symmetry: {crystal_symmetry.space_group_info()}")
print(f"Unit cell: {crystal_symmetry.unit_cell()}")
# Extract Miller array with amplitudes and phases
miller_arrays = mtz_object.as_miller_arrays()
amplitude_array = None
phase_array = None
# Find amplitude and phase arrays
for array in miller_arrays:
if array.info().label_string() == amplitude_label:
amplitude_array = array
elif array.info().label_string() == phase_label:
phase_array = array
# Alternative: search for arrays containing the labels
elif amplitude_label in array.info().label_string():
amplitude_array = array
elif phase_label in array.info().label_string():
phase_array = array
if amplitude_array is None or phase_array is None:
print(f"Warning: Could not find {amplitude_label} and/or {phase_label} in {mtz_file}")
print("Available labels:")
for array in miller_arrays:
print(f" {array.info().label_string()}")
continue
# Ensure both arrays have the same Miller indices
amplitude_array, phase_array = amplitude_array.common_sets(phase_array)
# Convert phases to radians
phases_rad = phase_array.data() * (np.pi / 180.0)
# Calculate complex structure factors
complex_sf = amplitude_array.data() * flex.exp(1j * phases_rad)
# Store structure factors by Miller index
for hkl, sf in zip(amplitude_array.indices(), complex_sf):
structure_factors[hkl].append(sf)
except Exception as e:
print(f"Error processing {mtz_file}: {str(e)}")
continue
if not structure_factors:
print("No structure factors were successfully extracted")
return
print(f"Computing averages for {len(structure_factors)} unique reflections")
# Compute mean structure factors
averaged_indices = []
averaged_amplitudes = []
averaged_phases = []
for hkl, sf_list in structure_factors.items():
if len(sf_list) > 0:
# Compute mean of complex numbers
mean_sf = np.mean(sf_list)
# Extract amplitude and phase from complex number
amplitude = abs(mean_sf)
phase_rad = np.angle(mean_sf)
phase_deg = phase_rad * (180.0 / np.pi)
averaged_indices.append(hkl)
averaged_amplitudes.append(amplitude)
averaged_phases.append(phase_deg)
# Convert to flex arrays
miller_indices = flex.miller_index(averaged_indices)
amplitudes = flex.double(averaged_amplitudes)
phases = flex.double(averaged_phases)
# Create Miller arrays
miller_set = cctbx.miller.set(
crystal_symmetry=crystal_symmetry,
indices=miller_indices,
anomalous_flag=False
)
amplitude_array = miller_set.array(data=amplitudes).set_info(
mtz.label_decorator(label=f"{amplitude_label}_MEAN")
)
phase_array = miller_set.array(data=phases).set_info(
mtz.label_decorator(label=f"{phase_label}_MEAN")
)
# Write output MTZ file
output_path = os.path.join(directory_path, output_filename)
mtz_dataset = amplitude_array.as_mtz_dataset(column_root_label=f"{amplitude_label}_MEAN")
mtz_dataset.add_miller_array(phase_array, column_root_label=f"{phase_label}_MEAN")
mtz_object = mtz_dataset.mtz_object()
mtz_object.write(file_name=output_path)
print(f"Averaged structure factors written to: {output_path}")
print(f"Number of reflections: {len(averaged_indices)}")
print(f"Resolution range: {miller_set.d_max_min()}")
def main():
if len(sys.argv) < 2:
print("Usage: python average_mtz.py <directory_path> [output_filename] [amplitude_label] [phase_label]")
print("Example: python average_mtz.py /path/to/mtz/files averaged.mtz F PHI")
sys.exit(1)
directory_path = sys.argv[1]
output_filename = sys.argv[2] if len(sys.argv) > 2 else "averaged.mtz"
amplitude_label = sys.argv[3] if len(sys.argv) > 3 else "F"
phase_label = sys.argv[4] if len(sys.argv) > 4 else "PHI"
if not os.path.isdir(directory_path):
print(f"Error: {directory_path} is not a valid directory")
sys.exit(1)
process_mtz_files(directory_path, output_filename, amplitude_label, phase_label)
if __name__ == "__main__":
main()