"""Statistics utilities for packing_defect."""
import os
import argparse
import statistics as stats
from collections import Counter, defaultdict
import numpy as np
from MDAnalysis import Universe
_NEIGHBORS = [
(dx, dy)
for dx in (-1, 0, 1)
for dy in (-1, 0, 1)
if (dx, dy) != (0, 0)
]
[docs]
def open_gro(gro_path):
"""
Open a GRO file and return a set of integer (i, j) grid positions.
"""
u = Universe(gro_path)
xy = u.atoms.positions[:, :2]
ij = np.rint(xy).astype(int)
return set(map(tuple, ij))
[docs]
def cluster_positions(positions):
"""
Return a list of clusters, each as a set of (i, j) tuples.
"""
visited = set()
clusters = []
for pos in positions:
if pos in visited:
continue
stack = [pos]
visited.add(pos)
cluster = {pos}
while stack:
i0, j0 = stack.pop()
for di, dj in _NEIGHBORS:
nb = (i0 + di, j0 + dj)
if nb in positions and nb not in visited:
visited.add(nb)
stack.append(nb)
cluster.add(nb)
clusters.append(cluster)
return clusters
[docs]
def cluster_sizes(positions):
"""
Return cluster sizes from a set of positions.
"""
return [len(c) for c in cluster_positions(positions)]
[docs]
def count_defect_types(gro_dir, cutoff):
"""
Count number of clusters ≥ cutoff in each frame GRO file.
Returns list of (frame_name, count).
"""
results = []
for fn in sorted(os.listdir(gro_dir)):
if not fn.endswith('.gro'):
continue
path = os.path.join(gro_dir, fn)
positions = open_gro(path)
sizes = cluster_sizes(positions)
count = sum(1 for s in sizes if s >= cutoff)
frame_name = fn[:-4]
results.append((frame_name, count))
return results
[docs]
def return_stats(gro_dir, cutoff):
"""
Return summary statistics (mean, median, stdev) of defect counts.
"""
results = count_defect_types(gro_dir, cutoff)
counts = [cnt for (_name, cnt) in results]
return {
'mean': stats.mean(counts),
'median': stats.median(counts),
'stdev': stats.stdev(counts),
'per_type': results,
}
[docs]
def defect_distribution(gro_dir):
"""
Returns dict mapping frame_name to Counter of defect sizes.
"""
distribution = {}
for fn in sorted(os.listdir(gro_dir)):
if not fn.endswith('.gro'):
continue
path = os.path.join(gro_dir, fn)
positions = open_gro(path)
sizes = cluster_sizes(positions)
distribution[fn[:-4]] = Counter(sizes)
return distribution
[docs]
def defect_distro_stats(distros):
"""
Compute mean and stdev for each defect size across frames.
"""
size_timeseries = defaultdict(list)
for distro in distros.values():
for size, count in distro.items():
size_timeseries[size].append(count)
output = {}
for size, counts in size_timeseries.items():
output[size] = {
'mean': stats.mean(counts),
'stdev': stats.stdev(counts) if len(counts) > 1 else 0.0,
}
return output
[docs]
def average_std_per_frame(distros):
"""
Return dict mapping frame_name to (mean, stdev) of all defects in that frame.
"""
result = {}
for frame, distro in distros.items():
all_sizes = []
for size, count in distro.items():
all_sizes.extend([size] * count)
if not all_sizes:
result[frame] = (0.0, 0.0)
elif len(all_sizes) == 1:
result[frame] = (all_sizes[0], 0.0)
else:
result[frame] = (stats.mean(all_sizes), stats.stdev(all_sizes))
return result
[docs]
def global_avg_std_defect_size(distros):
"""
Return global (mean, stdev) across all frames and sizes.
"""
all_sizes = []
for distro in distros.values():
for size, count in distro.items():
all_sizes.extend([size] * count)
if not all_sizes:
return 0.0, 0.0
if len(all_sizes) == 1:
return all_sizes[0], 0.0
return stats.mean(all_sizes), stats.stdev(all_sizes)
[docs]
def main():
parser = argparse.ArgumentParser(
description="Count defect clusters and compute stats"
)
parser.add_argument('gro_dir', help="Directory with .gro files")
parser.add_argument('-c', '--cutoff', type=int, default=5,
help="Minimum cluster size to count")
parser.add_argument('--summary', action='store_true',
help="Print summary stats of counts")
parser.add_argument('--stats', action='store_true',
help="Print per-frame and global defect stats")
args = parser.parse_args()
counts = count_defect_types(args.gro_dir, args.cutoff)
for frame, cnt in counts:
print(f"{frame:10s} clusters ≥ {args.cutoff:3d} → {cnt}")
if args.summary:
summary = return_stats(args.gro_dir, args.cutoff)
print("\nSummary:")
print(f" Mean : {summary['mean']:.2f}")
print(f" Median : {summary['median']:.2f}")
print(f" Stdev : {summary['stdev']:.2f}")
if args.stats:
distros = defect_distribution(args.gro_dir)
print("\nAverage defect size per frame:")
frame_stats = average_std_per_frame(distros)
for frame, (mean_, std_) in sorted(frame_stats.items()):
print(f"{frame:20s} mean: {mean_:.2f}, stdev: {std_:.2f}")
global_mean, global_std = global_avg_std_defect_size(distros)
print("\nGlobal average defect size:")
print(f" Mean : {global_mean:.2f}")
print(f" Stdev : {global_std:.2f}")
if __name__ == '__main__':
main()