LLM4HEP / util /inspect_root.py
ho22joshua's picture
initial commit
cfcbbc8
#!/usr/bin/env python3
"""
Simple ROOT file inspector using uproot
Usage: python inspect_root.py [options]
Default data directory: /global/cfs/projectdirs/atlas/eligd/llm_for_analysis_copy/data/
"""
import sys
import os
import uproot
import numpy as np
import argparse
# Default data directory
DEFAULT_DATA_DIR = '/global/cfs/projectdirs/atlas/eligd/llm_for_analysis_copy/data/'
def inspect_root_file(filepath, show_stats=True, max_entries=None):
"""Inspect a ROOT file and display its contents"""
if not os.path.exists(filepath):
print("Error: File '{}' does not exist!".format(filepath))
return
print("Inspecting ROOT file: {}".format(filepath))
print("=" * 80)
try:
# Open the ROOT file
with uproot.open(filepath) as file:
print("File opened successfully!")
print("Keys in file: {}".format(list(file.keys())))
print()
# Look for trees (typically named 'mini' or similar)
trees = [key for key in file.keys() if not key.endswith(';1') or ';' not in key]
print("Available trees/objects: {}".format(trees))
print()
# Try to find and inspect the main tree
tree_name = None
if 'mini;1' in file:
tree_name = 'mini;1'
elif 'mini' in file:
tree_name = 'mini'
else:
# Look for the first tree-like object
for key in file.keys():
if ';' in key and not key.endswith('.root'):
tree_name = key
break
if tree_name:
print("Inspecting tree: {}".format(tree_name))
tree = file[tree_name]
# Get basic tree information
num_entries = tree.num_entries
print("Number of entries: {}".format(num_entries))
print("Branches: {}".format(list(tree.keys())))
print()
if max_entries:
num_entries = min(num_entries, max_entries)
print("Limiting analysis to first {} entries".format(num_entries))
print()
# Show branch details
print("Branch details:")
for branch_name in tree.keys():
try:
branch = tree[branch_name]
if hasattr(branch, 'array'):
arr = branch.array()
if max_entries:
arr = arr[:max_entries]
# Handle different array shapes
if hasattr(arr, 'shape'):
print(" {}: shape={}, dtype={}".format(branch_name, arr.shape, arr.dtype))
else:
# Handle scalar or other types
print(" {}: type={}, value_type={}".format(branch_name, type(arr), arr.dtype if hasattr(arr, 'dtype') else 'unknown'))
if len(arr) > 0:
sample_size = min(5, len(arr))
print(" Sample values: {}".format(arr[:sample_size]))
else:
print(" {}: {}".format(branch_name, type(branch)))
except Exception as e:
print(" {}: Error reading - {}".format(branch_name, e))
print()
if show_stats:
# Show some basic statistics for numerical branches
print("Basic statistics for numerical branches:")
for branch_name in tree.keys():
try:
branch = tree[branch_name]
if hasattr(branch, 'array'):
arr = branch.array()
if max_entries:
arr = arr[:max_entries]
if arr.dtype.kind in 'iufc': # integer, unsigned, float, complex
print(" {}:".format(branch_name))
print(" Mean: {:.3f}".format(np.mean(arr)))
print(" Std: {:.3f}".format(np.std(arr)))
print(" Min: {:.3f}".format(np.min(arr)))
print(" Max: {:.3f}".format(np.max(arr)))
except:
pass
else:
print("No tree found in the file.")
except Exception as e:
print("Error opening file: {}".format(e))
def list_files_in_directory(directory):
"""List all ROOT files in a directory"""
if not os.path.exists(directory):
print("Directory '{}' does not exist!".format(directory))
return []
root_files = []
for filename in os.listdir(directory):
if filename.endswith('.root'):
root_files.append(os.path.join(directory, filename))
return sorted(root_files)
def main():
parser = argparse.ArgumentParser(description='Inspect ROOT files using uproot')
parser.add_argument('filepath', nargs='?', help='Path to the ROOT file to inspect')
parser.add_argument('--data-dir', default=DEFAULT_DATA_DIR,
help='Default data directory (default: {})'.format(DEFAULT_DATA_DIR))
parser.add_argument('--list-files', action='store_true',
help='List all ROOT files in the data directory')
parser.add_argument('--inspect-all', action='store_true',
help='Inspect all ROOT files in the data directory')
parser.add_argument('--no-stats', action='store_true',
help='Skip statistical analysis')
parser.add_argument('--max-entries', type=int,
help='Limit analysis to first N entries')
args = parser.parse_args()
if args.list_files:
# Just list files
root_files = list_files_in_directory(args.data_dir)
print("ROOT files in {}:".format(args.data_dir))
for i, filepath in enumerate(root_files, 1):
filename = os.path.basename(filepath)
print(" {}. {}".format(i, filename))
return
if args.inspect_all:
# Inspect all files
root_files = list_files_in_directory(args.data_dir)
if not root_files:
print("No ROOT files found in {}".format(args.data_dir))
return
print("Inspecting all {} ROOT files in {}".format(len(root_files), args.data_dir))
print("=" * 80)
for i, filepath in enumerate(root_files, 1):
filename = os.path.basename(filepath)
print("\n[{}/{}] Inspecting: {}".format(i, len(root_files), filename))
print("-" * 40)
inspect_root_file(filepath, show_stats=not args.no_stats, max_entries=args.max_entries)
return
# Single file inspection
if not args.filepath:
# If no filepath provided, show available files and prompt
root_files = list_files_in_directory(args.data_dir)
if root_files:
print("Available ROOT files in {}:".format(args.data_dir))
for i, filepath in enumerate(root_files, 1):
filename = os.path.basename(filepath)
print(" {}. {}".format(i, filename))
try:
choice = input("\nEnter file number or full path (or 'q' to quit): ").strip()
if choice.lower() == 'q':
return
if choice.isdigit():
idx = int(choice) - 1
if 0 <= idx < len(root_files):
filepath = root_files[idx]
else:
print("Invalid choice!")
return
else:
filepath = choice
except KeyboardInterrupt:
print("\nExiting...")
return
else:
filepath = input("Enter path to ROOT file: ").strip()
else:
filepath = args.filepath
# Handle relative paths
if not os.path.isabs(filepath):
filepath = os.path.join(args.data_dir, filepath)
inspect_root_file(filepath, show_stats=not args.no_stats, max_entries=args.max_entries)
if __name__ == "__main__":
main()