Source code for packing_defect.core.interactions

"""
interactions.py

Tools for detecting protein–defect interactions in lipid membrane simulations.
This module can process multiple lipid types, identify protein residues that 
interact with packing defects within a distance cutoff, and write results to a CSV.
"""


import os
import csv
import numpy as np
import MDAnalysis as mda
import argparse


[docs] class ProteinDefectInteractions: """ Tracks protein–defect interactions in a single trajectory. Parameters ---------- cutoff : float, optional Minimum distance in Å between a protein residue center of mass and any defect atom for the residue to be counted as interacting. Default is 0.1 Å. Attributes ---------- cutoff : float Interaction distance threshold in Å. results : dict Mapping from frame index → list of (resid, resname) tuples for interacting residues. """ def __init__(self, cutoff=0.1): self.cutoff = cutoff self.results = {}
[docs] def track_frame(self, universe, frame_index): """ Analyze a single frame and record protein residues near defects. Parameters ---------- universe : MDAnalysis.Universe Universe containing the current frame. frame_index : int or str Index or identifier for the current frame. Notes ----- - All protein residues whose center of mass lies within `cutoff` Å of any defect atom are recorded. - Defect atoms are defined as all atoms not belonging to the protein. """ protein = universe.select_atoms("protein") defect_atoms = universe.atoms.difference(protein) defect_positions = defect_atoms.positions hits = [] for residue in protein.residues: com = residue.atoms.center_of_mass() if defect_positions.size == 0: continue dists = np.linalg.norm(defect_positions - com, axis=1) if np.any(dists < self.cutoff): hits.append((residue.resid, residue.resname)) self.results[frame_index] = hits
[docs] def process_multiple(input_dirs, output_csv, cutoff): """ Process multiple lipid-type directories and record protein–defect interactions. Parameters ---------- input_dirs : list of str Paths to directories containing `.gro` or `.pdb` files for each lipid type. Each directory is treated as a separate lipid type, and its basename is used as the lipid label in the output. output_csv : str Path to the CSV file to be written. Parent directories will be created if necessary. cutoff : float Distance cutoff in Å for protein–defect interactions. Output CSV Format ----------------- type, frame, resid, resname - type : lipid type (directory name) - frame : frame index (int or str extracted from filename) - resid : residue ID of interacting protein residue - resname : residue name of interacting protein residue Notes ----- - Frame index is parsed from the filename by extracting the last underscore-delimited component before the extension. If parsing fails, the filename stem is used as-is. - The CSV rows are sorted by lipid type (order given in `input_dirs`), then frame index, then residue ID. """ combined = [] lipid_order = [os.path.basename(d) for d in input_dirs] for input_dir in input_dirs: lipid = os.path.basename(input_dir) tracker = ProteinDefectInteractions(cutoff=cutoff) for fname in sorted(os.listdir(input_dir)): if not (fname.endswith(".gro") or fname.endswith(".pdb")): continue path = os.path.join(input_dir, fname) print(f"Processing {lipid} frame {fname}") u = mda.Universe(path) try: frame_index = int(os.path.splitext(fname)[0].rsplit('_', 1)[-1]) except ValueError: frame_index = os.path.splitext(fname)[0] tracker.track_frame(u, frame_index) # Collect hits with lipid tag for frame_idx, hits in tracker.results.items(): for resid, resname in hits: combined.append((lipid, frame_idx, resid, resname)) combined.sort(key=lambda x: (lipid_order.index(x[0]), x[1], x[2])) os.makedirs(os.path.dirname(output_csv) or ".", exist_ok=True) with open(output_csv, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(["type", "frame", "resid", "resname"]) for lipid, frame, resid, resname in combined: writer.writerow([lipid, frame, resid, resname]) print(f"Combined CSV written to {output_csv}")
[docs] def run(input_dirs, cutoff=1.0, output_csv="combined_interactions.csv", as_dataframe=True): """ Compute protein–defect interactions, write the CSV, and return results. Parameters ---------- input_dirs : list of str Directories containing .gro/.pdb frames, one per lipid type. cutoff : float, optional Distance cutoff in Å. Default 1.0. output_csv : str, optional Output CSV path. Default "combined_interactions.csv". as_dataframe : bool, optional If True and pandas is available, return a pandas DataFrame. Otherwise return a list of dicts. Returns ------- pandas.DataFrame or list[dict] Rows with columns: type, frame, resid, resname. """ # Write CSV using the existing logic os.makedirs(os.path.dirname(output_csv) or ".", exist_ok=True) process_multiple(input_dirs, output_csv, cutoff) if as_dataframe: try: import pandas as pd return pd.read_csv(output_csv) except Exception: pass rows = [] with open(output_csv, newline='') as f: reader = csv.DictReader(f) for row in reader: try: row["frame"] = int(row["frame"]) except Exception: pass try: row["resid"] = int(row["resid"]) except Exception: pass rows.append(row) return rows
if __name__ == "__main__": parser = argparse.ArgumentParser( description="Track protein–defect interactions for multiple lipid types" ) parser.add_argument( "-i", "--input-dirs", required=True, nargs='+', help="List of directories containing .gro/.pdb frames (one per lipid)" ) parser.add_argument( "-o", "--output-csv", default=None, help="Path for the combined CSV (default: ./combined_interactions.csv)" ) parser.add_argument( "-c", "--cutoff", type=float, default=1.0, help="Distance cutoff in Å for interaction" ) args = parser.parse_args() output_csv = args.output_csv or "combined_interactions.csv" process_multiple(args.input_dirs, output_csv, args.cutoff)