"""
File utilities for update_lib.
This module provides functions for:
- Safe file reading with error handling
- Safe AST parsing with error handling
- Iterating over Python files
- Parsing and converting library paths
- Detecting test paths vs library paths
- Comparing files or directories for equality
"""
from __future__ import annotations
import ast
import filecmp
import pathlib
from collections.abc import Callable, Iterator
# === I/O utilities ===
def safe_read_text(path: pathlib.Path) -> str | None:
"""Read file content with UTF-8 encoding, returning None on error."""
try:
return path.read_text(encoding="utf-8")
except (OSError, UnicodeDecodeError):
return None
def safe_parse_ast(content: str) -> ast.Module | None:
"""Parse Python content into AST, returning None on syntax error."""
try:
return ast.parse(content)
except SyntaxError:
return None
def iter_python_files(path: pathlib.Path) -> Iterator[pathlib.Path]:
"""Yield Python files from a file or directory."""
if path.is_file():
yield path
else:
yield from path.glob("**/*.py")
def read_python_files(path: pathlib.Path) -> Iterator[tuple[pathlib.Path, str]]:
"""Read all Python files from a path, yielding (path, content) pairs."""
for py_file in iter_python_files(path):
content = safe_read_text(py_file)
if content is not None:
yield py_file, content
# === Path utilities ===
def parse_lib_path(path: pathlib.Path | str) -> pathlib.Path:
"""
Extract the Lib/... portion from a path containing /Lib/.
Example:
parse_lib_path("cpython/Lib/test/foo.py") -> Path("Lib/test/foo.py")
"""
path_str = str(path).replace("\\", "/")
lib_marker = "/Lib/"
if lib_marker not in path_str:
raise ValueError(f"Path must contain '/Lib/' or '\\Lib\\' (got: {path})")
idx = path_str.index(lib_marker)
return pathlib.Path(path_str[idx + 1 :])
def is_lib_path(path: pathlib.Path) -> bool:
"""Check if path starts with Lib/"""
path_str = str(path).replace("\\", "/")
return path_str.startswith("Lib/") or path_str.startswith("./Lib/")
def is_test_path(path: pathlib.Path) -> bool:
"""Check if path is a test path (contains /Lib/test/ or starts with Lib/test/)"""
path_str = str(path).replace("\\", "/")
return "/Lib/test/" in path_str or path_str.startswith("Lib/test/")
def lib_to_test_path(src_path: pathlib.Path) -> pathlib.Path:
"""
Convert library path to test path.
Examples:
cpython/Lib/dataclasses.py -> cpython/Lib/test/test_dataclasses/
cpython/Lib/json/__init__.py -> cpython/Lib/test/test_json/
"""
path_str = str(src_path).replace("\\", "/")
lib_marker = "/Lib/"
if lib_marker in path_str:
lib_path = parse_lib_path(src_path)
lib_name = lib_path.stem if lib_path.suffix == ".py" else lib_path.name
if lib_name == "__init__":
lib_name = lib_path.parent.name
prefix = path_str[: path_str.index(lib_marker)]
dir_path = pathlib.Path(f"{prefix}/Lib/test/test_{lib_name}/")
if dir_path.exists():
return dir_path
file_path = pathlib.Path(f"{prefix}/Lib/test/test_{lib_name}.py")
if file_path.exists():
return file_path
return dir_path
else:
lib_name = src_path.stem if src_path.suffix == ".py" else src_path.name
if lib_name == "__init__":
lib_name = src_path.parent.name
dir_path = pathlib.Path(f"Lib/test/test_{lib_name}/")
if dir_path.exists():
return dir_path
file_path = pathlib.Path(f"Lib/test/test_{lib_name}.py")
if file_path.exists():
return file_path
return dir_path
def get_test_files(path: pathlib.Path) -> list[pathlib.Path]:
"""Get all .py test files in a path (file or directory)."""
if path.is_file():
return [path]
return sorted(path.glob("**/*.py"))
def get_test_module_name(test_path: pathlib.Path) -> str:
"""
Extract test module name from a test file path.
Examples:
Lib/test/test_foo.py -> test_foo
Lib/test/test_ctypes/test_bar.py -> test_ctypes.test_bar
"""
test_path = pathlib.Path(test_path)
if test_path.parent.name.startswith("test_"):
return f"{test_path.parent.name}.{test_path.stem}"
return test_path.stem
def resolve_module_path(
name: str, prefix: str = "cpython", prefer: str = "file"
) -> pathlib.Path:
"""
Resolve module path, trying file or directory.
Args:
name: Module name (e.g., "dataclasses", "json")
prefix: CPython directory prefix
prefer: "file" to try .py first, "dir" to try directory first
"""
file_path = pathlib.Path(f"{prefix}/Lib/{name}.py")
dir_path = pathlib.Path(f"{prefix}/Lib/{name}")
if prefer == "file":
if file_path.exists():
return file_path
if dir_path.exists():
return dir_path
return file_path
else:
if dir_path.exists():
return dir_path
if file_path.exists():
return file_path
return dir_path
def construct_lib_path(prefix: str, *parts: str) -> pathlib.Path:
"""Build a path under prefix/Lib/."""
return pathlib.Path(prefix) / "Lib" / pathlib.Path(*parts)
def resolve_test_path(
test_name: str, prefix: str = "cpython", prefer: str = "dir"
) -> pathlib.Path:
"""Resolve a test module path under Lib/test/."""
return resolve_module_path(f"test/{test_name}", prefix, prefer=prefer)
def cpython_to_local_path(
cpython_path: pathlib.Path,
cpython_prefix: str,
lib_prefix: str,
) -> pathlib.Path | None:
"""Convert CPython path to local Lib path."""
try:
rel_path = cpython_path.relative_to(cpython_prefix)
return pathlib.Path(lib_prefix) / rel_path.relative_to("Lib")
except ValueError:
return None
def get_module_name(path: pathlib.Path) -> str:
"""Extract module name from path, handling __init__.py."""
if path.suffix == ".py":
name = path.stem
if name == "__init__":
return path.parent.name
return name
return path.name
def get_cpython_dir(src_path: pathlib.Path) -> pathlib.Path:
"""Extract CPython directory from a path containing /Lib/."""
path_str = str(src_path).replace("\\", "/")
lib_marker = "/Lib/"
if lib_marker in path_str:
idx = path_str.index(lib_marker)
return pathlib.Path(path_str[:idx])
return pathlib.Path("cpython")
# === Comparison utilities ===
def _dircmp_is_same(dcmp: filecmp.dircmp) -> bool:
"""Recursively check if two directories are identical."""
if dcmp.diff_files or dcmp.left_only or dcmp.right_only:
return False
for subdir in dcmp.subdirs.values():
if not _dircmp_is_same(subdir):
return False
return True
def compare_paths(cpython_path: pathlib.Path, local_path: pathlib.Path) -> bool:
"""Compare a CPython path with a local path (file or directory)."""
if not local_path.exists():
return False
if cpython_path.is_file():
return filecmp.cmp(cpython_path, local_path, shallow=False)
dcmp = filecmp.dircmp(cpython_path, local_path)
return _dircmp_is_same(dcmp)
def compare_file_contents(
cpython_path: pathlib.Path,
local_path: pathlib.Path,
*,
local_filter: Callable[[str], str] | None = None,
encoding: str = "utf-8",
) -> bool:
"""Compare two files as text, optionally filtering local content."""
try:
cpython_content = cpython_path.read_text(encoding=encoding)
local_content = local_path.read_text(encoding=encoding)
except (OSError, UnicodeDecodeError):
return False
if local_filter is not None:
local_content = local_filter(local_content)
return cpython_content == local_content
def compare_dir_contents(
cpython_dir: pathlib.Path,
local_dir: pathlib.Path,
*,
pattern: str = "*.py",
local_filter: Callable[[str], str] | None = None,
encoding: str = "utf-8",
) -> bool:
"""Compare directory contents for matching files and text."""
cpython_files = {f.relative_to(cpython_dir) for f in cpython_dir.rglob(pattern)}
local_files = {f.relative_to(local_dir) for f in local_dir.rglob(pattern)}
if cpython_files != local_files:
return False
for rel_path in cpython_files:
if not compare_file_contents(
cpython_dir / rel_path,
local_dir / rel_path,
local_filter=local_filter,
encoding=encoding,
):
return False
return True