diff --git a/oracletrace/tracer.py b/oracletrace/tracer.py index 31fcbe1..6a885c4 100644 --- a/oracletrace/tracer.py +++ b/oracletrace/tracer.py @@ -1,4 +1,6 @@ import sys +import sysconfig +import site import os import time from collections import defaultdict @@ -76,11 +78,31 @@ def _is_ignored(self, filename: str) -> bool: return False def _is_user_code(self, filename: str) -> bool: - # Filter out files not in the project root - if not filename.startswith(str(self._root_path)): + # Normalize all paths to resolve symlinks + filename = os.path.realpath(filename) + root_path = os.path.realpath(self._root_path) + + # Check project root + if os.path.commonpath([root_path, filename]) != root_path: + return False + + # Exclude Python stdlib + stdlib_path = os.path.realpath(sysconfig.get_path("stdlib")) + if os.path.commonpath([stdlib_path, filename]) == stdlib_path: return False - # Filter out third-party libraries - if "site-packages" in filename or "dist-packages" in filename: + + # Exclude all site-packages + site_paths = site.getsitepackages() + [site.getusersitepackages()] + normalized_site_paths = [os.path.realpath(p) for p in site_paths] + for sp in normalized_site_paths: + if os.path.commonpath([sp, filename]) == sp: + return False + + # Exclude venv-like directories that may be inside the project root + venv_markers = {"venv", ".venv", "env", ".env", "virtualenv"} + path_relative_to_root = os.path.relpath(filename, root_path) + path_parts = Path(path_relative_to_root).parts + if any(part in venv_markers for part in path_parts): return False return True diff --git a/tests/test_tracer.py b/tests/test_tracer.py new file mode 100644 index 0000000..07e1889 --- /dev/null +++ b/tests/test_tracer.py @@ -0,0 +1,112 @@ +import os +import sysconfig +import tempfile +import pytest +from pathlib import Path + +from oracletrace.tracer import Tracer + + +@pytest.fixture +def tracer(): + with tempfile.TemporaryDirectory() as tmpdir: + yield Tracer(tmpdir) + + +@pytest.fixture +def stdlib_path(): + return os.path.realpath(sysconfig.get_path("stdlib")) + + +@pytest.fixture +def site_paths(): + import site + paths = site.getsitepackages() + [site.getusersitepackages()] + return [os.path.realpath(p) for p in paths] + + +def test_user_code_under_root(tracer): + user_file = os.path.join(tracer._root_path, "my_module.py") + Path(user_file).touch() + assert tracer._is_user_code(user_file) + + +def test_user_code_nested_under_root(tracer): + nested_dir = os.path.join(tracer._root_path, "subdir", "deep") + os.makedirs(nested_dir, exist_ok=True) + user_file = os.path.join(nested_dir, "my_module.py") + Path(user_file).touch() + assert tracer._is_user_code(user_file) + + +def test_external_code_outside_root(tracer): + with tempfile.TemporaryDirectory() as tmpdir: + external_file = os.path.join(tmpdir, "external.py") + Path(external_file).touch() + assert not tracer._is_user_code(external_file) + + +def test_stdlib_excluded(tracer, stdlib_path): + stdlib_file = os.path.join(stdlib_path, "os.py") + assert not tracer._is_user_code(stdlib_file) + + +def test_site_packages_excluded(tracer, site_paths): + for sp in site_paths: + if os.path.exists(sp): + site_file = os.path.join(sp, "requests", "__init__.py") + assert not tracer._is_user_code(site_file) + + +def test_venv_site_packages_excluded(tracer): + venv_path = os.path.join(tracer._root_path, "venv", "lib", "python", "site-packages", "numpy", "__init__.py") + assert not tracer._is_user_code(venv_path) + + +def test_prefix_false_positive(tracer): + similar_path = tracer._root_path + "_backup" + os.makedirs(similar_path, exist_ok=True) + similar_file = os.path.join(similar_path, "module.py") + Path(similar_file).touch() + assert not tracer._is_user_code(similar_file) + + +def test_symlinked_user_code(tracer): + real_file = os.path.join(tracer._root_path, "real_module.py") + Path(real_file).touch() + + symlink_dir = tempfile.mkdtemp() + symlink_file = os.path.join(symlink_dir, "linked_module.py") + os.symlink(real_file, symlink_file) + + try: + assert tracer._is_user_code(symlink_file) + finally: + os.unlink(symlink_file) + os.rmdir(symlink_dir) + + +def test_dist_packages_excluded(tracer): + dist_pkg_path = "/usr/lib/python3/dist-packages/numpy/__init__.py" + assert not tracer._is_user_code(dist_pkg_path) + + +def test_dot_venv_excluded(tracer): + venv_path = os.path.join(tracer._root_path, ".venv", "lib", "python", "site-packages", "flask", "__init__.py") + assert not tracer._is_user_code(venv_path) + + +def test_env_excluded(tracer): + env_path = os.path.join(tracer._root_path, "env", "lib", "python", "site-packages", "django", "__init__.py") + assert not tracer._is_user_code(env_path) + + +def test_dot_env_excluded(tracer): + env_path = os.path.join(tracer._root_path, ".env", "lib", "python", "site-packages", "pandas", "__init__.py") + assert not tracer._is_user_code(env_path) + + +def test_root_path_with_trailing_slash(tracer): + user_file = os.path.join(tracer._root_path, "module.py") + Path(user_file).touch() + assert tracer._is_user_code(user_file)