From 6ba4d2a998a19bd5532f397a0109e953963dcd2d Mon Sep 17 00:00:00 2001 From: Zach Maddox Date: Thu, 11 Jun 2026 11:51:49 -0400 Subject: [PATCH 1/6] align file finding functionality --- README.md | 22 +- docs/byoc_runtime_contract.md | 67 ++++++ src/datacustomcode/client.py | 35 +++- src/datacustomcode/file/path/default.py | 69 ++++--- tests/file/test_path_default.py | 260 +++++++++++++++++------- 5 files changed, 344 insertions(+), 109 deletions(-) create mode 100644 docs/byoc_runtime_contract.md diff --git a/README.md b/README.md index f5b27c6..ca7a243 100644 --- a/README.md +++ b/README.md @@ -148,7 +148,7 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing Your entry point script will define logic using the `Client` object which wraps data access layers. You should only need the following methods: -* `find_file_path(file_name)` - Returns a file path +* `find_file_path(file_name)` – Resolve a bundled file (placed under `payload/files/`) to a `pathlib.Path` that exists. Works the same locally and inside Data Cloud — see [Bundled file resolution](#bundled-file-resolution) below for the full lookup order. Raises `FileNotFoundError` if the file isn't found. * `read_dlo(name)` – Read from a Data Lake Object by name * `read_dmo(name)` – Read from a Data Model Object by name * `write_to_dlo(name, spark_dataframe, write_mode)` – Write to a Data Model Object by name with a Spark dataframe @@ -169,6 +169,25 @@ client.write_to_dlo('output_DLO') > [!WARNING] > Currently we only support reading from DMOs and writing to DMOs or reading from DLOs and writing to DLOs, but they cannot mix. +### Bundled file resolution + +Place bundled files (CSVs, prompt files, etc.) under `payload/files/`. The same `client.find_file_path("data.csv")` call resolves consistently across all three runtimes: + +- `datacustomcode run` (local) → `/payload/files/data.csv` +- Data Cloud script package → `$LIBRARY_PATH/files/data.csv` +- Data Cloud function package → `$LIBRARY_PATH/files/data.csv` + +Resolution order (first existing path wins): + +1. `/files/`, then `/` — when the SDK is constructed with an explicit `base_path`. +2. `$LIBRARY_PATH/files/`, then `$LIBRARY_PATH/` — when `LIBRARY_PATH` is set. Data Cloud sets this for you to the package root. +3. `payload/files/` relative to the current working directory. +4. `/files/` where `` is the directory of the nearest `config.json` discoverable by walking down from cwd. + +If none of these exist, `find_file_path` raises `FileNotFoundError` with the list of paths it tried. + +`LIBRARY_PATH` MUST point to the directory that *contains* `files/` — i.e., the package root (the directory that holds `config.json` and `entrypoint.py`). See [BYOC runtime contract](./docs/byoc_runtime_contract.md) for the full runtime contract between the SDK and Data Cloud. + ## CLI @@ -445,4 +464,5 @@ If you're using OAuth Tokens authentication, the initial configure will retrieve ## Other docs - [Troubleshooting](./docs/troubleshooting.md) +- [BYOC runtime contract](./docs/byoc_runtime_contract.md) - [For Contributors](./FOR_CONTRIBUTORS.md) diff --git a/docs/byoc_runtime_contract.md b/docs/byoc_runtime_contract.md new file mode 100644 index 0000000..bb9bcb4 --- /dev/null +++ b/docs/byoc_runtime_contract.md @@ -0,0 +1,67 @@ +# BYOC Runtime Contract + +This document defines the contract between `salesforce-data-customcode` (the SDK) +and the Data Cloud Code Extension runtimes — script packages and function packages — for filesystem-bundled assets +referenced via `Client.find_file_path`. + +## LIBRARY_PATH + +The runtime MUST set `LIBRARY_PATH` to the directory that *contains* the +extracted package's `files/` directory — i.e., the package root, the same +directory that holds `config.json` and `entrypoint.py`. + +Concretely: + +``` +$LIBRARY_PATH/ +├── config.json +├── entrypoint.py +└── files/ + └── +``` + +Given this layout, `client.find_file_path("data.csv")` resolves to +`$LIBRARY_PATH/files/data.csv`. Relative subpaths under `files/` are supported: +`client.find_file_path("dir/data.csv")` resolves to +`$LIBRARY_PATH/files/dir/data.csv`. + +## WORKDIR + +The customer-function `WORKDIR` MAY be `/app` (or anything else). The SDK does +not depend on `cwd` for file resolution when `LIBRARY_PATH` is set. Calling +`os.chdir($LIBRARY_PATH)` from runtime code is no longer required and should be +removed when the runtime is updated to a `salesforce-data-customcode` release +that includes this contract. + +## File layout + +Customer packages place bundled files under `payload/files/`. After +extraction in the runtime, the file lives at `$LIBRARY_PATH/files/`. +Files placed directly at the package root (`payload/`, extracted to +`$LIBRARY_PATH/`) are still resolvable but the canonical location is +`files/`. + +## Resolution order + +`Client.find_file_path` tries candidate paths in this order, returning the +first one that exists: + +1. `$BASE_PATH/files/`, then `$BASE_PATH/` — when the SDK is + constructed with an explicit `base_path`. +2. `$LIBRARY_PATH/files/`, then `$LIBRARY_PATH/` — when + `LIBRARY_PATH` is set. +3. `payload/files/` relative to cwd — the local `datacustomcode run` + flow from a project root. +4. `/files/` discovered by walking the cwd subtree for + `config.json`. + +If no candidate exists, `FileNotFoundError` is raised; the message lists every +path that was tried. + +## Versioning + +This contract applies to `salesforce-data-customcode` releases that include +the `$LIBRARY_PATH/files/` lookup step. Older runtime images pinned to +prior SDK releases must continue to set `cwd = $LIBRARY_PATH` (script +runtime's existing behavior) or `os.chdir($LIBRARY_PATH)` (function runtime +workaround) until they upgrade. diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index 9ad95be..fb0d868 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -221,8 +221,41 @@ def write_to_dmo( return self._writer.write_to_dmo(name, dataframe, write_mode, **kwargs) # type: ignore[no-any-return] def find_file_path(self, file_name: str) -> Path: - """Return a file path""" + """Resolve a bundled file shipped in the package to an absolute path. + + Resolution order (first existing path wins): + + 1. ``base_path//`` then ``base_path/`` + — when the underlying ``DefaultFindFilePath`` was constructed with + an explicit ``base_path``. + 2. ``$LIBRARY_PATH//`` then + ``$LIBRARY_PATH/`` — when the ``LIBRARY_PATH`` environment + variable is set. The Data Cloud runtime sets this to the + directory containing the extracted package. + 3. ``//`` relative to the current + working directory — the default ``payload/files/`` layout + used by ``datacustomcode run`` from a project root. + 4. ``//`` where ```` is + the directory containing the nearest ``config.json`` discoverable + by walking the cwd subtree. + + ``LIBRARY_PATH`` must point to the directory that *contains* + ``files/`` — i.e., the package root, the same directory that holds + ``config.json`` and ``entrypoint.py``. See + ``docs/byoc_runtime_contract.md`` for the full runtime contract. + Args: + file_name: A file under the package's ``files/`` folder. Relative + subpaths (e.g., ``"file/data2.csv"``) are supported. + + Returns: + A ``pathlib.Path`` that exists. + + Raises: + FileNotFoundError: If the file does not exist at any of the + resolution-order locations. The message lists every candidate + path that was tried. + """ return self._file.find_file_path(file_name) # type: ignore[no-any-return] def _validate_data_layer_history_does_not_contain( diff --git a/src/datacustomcode/file/path/default.py b/src/datacustomcode/file/path/default.py index 96d2f51..0e7733a 100644 --- a/src/datacustomcode/file/path/default.py +++ b/src/datacustomcode/file/path/default.py @@ -16,7 +16,7 @@ import os from pathlib import Path -from typing import Optional +from typing import Iterator, Optional, Union from datacustomcode.file.base import BaseDataAccessLayer @@ -44,6 +44,7 @@ class DefaultFindFilePath(BaseDataAccessLayer): def __init__( self, + base_path: Optional[Union[Path, str]] = None, code_package: Optional[str] = None, file_folder: Optional[str] = None, config_file: Optional[str] = None, @@ -51,10 +52,16 @@ def __init__( """Initialize the file reader with configuration. Args: + base_path: Optional explicit anchor for resolution. When provided, + ``base_path//`` and ``base_path/`` are + tried before any environment- or cwd-based lookups. Used by + runtime hosts that know the package root and don't want to + rely on ``LIBRARY_PATH`` or the current working directory. code_package: The default code package directory to search file_folder: The folder containing files relative to the code package config_file: The configuration file to use for path resolution """ + self.base_path = Path(base_path) if base_path is not None else None self.code_package = code_package or self.DEFAULT_CODE_PACKAGE self.file_folder = file_folder or self.DEFAULT_FILE_FOLDER self.config_file = config_file or self.DEFAULT_CONFIG_FILE @@ -66,7 +73,7 @@ def find_file_path(self, file_name: str) -> Path: file_name: The name of the file to open Returns: - A file path + A file path that exists Raises: FileNotFoundError: If the file cannot be found @@ -74,46 +81,45 @@ def find_file_path(self, file_name: str) -> Path: if not file_name: raise ValueError("file_name cannot be empty") - file_path = self._resolve_file_path(file_name) + tried: list[Path] = [] + for candidate in self._candidate_paths(file_name): + tried.append(candidate) + if candidate.exists(): + return candidate - if not file_path.exists(): - raise FileNotFoundError( - f"File '{file_name}' not found in any search location" - ) + raise FileNotFoundError( + f"File '{file_name}' not found in any search location. " + f"Tried: {[str(p) for p in tried]}" + ) - return file_path - - def _resolve_file_path(self, file_name: str) -> Path: - """Resolve the full path to a file. + def _candidate_paths(self, file_name: str) -> Iterator[Path]: + """Yield candidate paths for ``file_name`` in resolution order. Args: file_name: The name of the file to resolve Returns: - The full path to the file + An iterator of candidate paths """ - # First check if environment variable is set + # 1. base_path//, then base_path/ + if self.base_path is not None: + yield self.base_path / self.file_folder / file_name + yield self.base_path / file_name + + # 2. $LIBRARY_PATH//, then $LIBRARY_PATH/ env_path = os.getenv(self.DEFAULT_ENV_VAR) if env_path: - file_path = Path(env_path) / file_name - if file_path.exists(): - return file_path + yield Path(env_path) / self.file_folder / file_name + yield Path(env_path) / file_name - # First try the default code package location + # 3. // relative to cwd if self._code_package_exists(): - file_path = self._get_code_package_file_path(file_name) - if file_path.exists(): - return file_path + yield self._get_code_package_file_path(file_name) - # Fall back to config.json-based location + # 4. // via config.json discovery config_path = self._find_config_file() - if config_path: - file_path = self._get_config_based_file_path(file_name, config_path) - if file_path.exists(): - return file_path - - # Return the file name as a Path if not found in any location - return Path(file_name) + if config_path is not None: + yield self._get_config_based_file_path(file_name, config_path) def _code_package_exists(self) -> bool: """Check if the default code package directory exists. @@ -146,6 +152,10 @@ def _find_config_file(self) -> Optional[Path]: def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path: """Get the file path relative to the config file location. + Anchors on the directory containing the discovered ``config.json`` so a + package found by walking up from cwd resolves files relative to its own + root, not the caller's cwd. + Args: file_name: The name of the file config_path: The path to the config file @@ -153,8 +163,7 @@ def _get_config_based_file_path(self, file_name: str, config_path: Path) -> Path Returns: The full path to the file """ - relative_path = f"{self.file_folder}/{file_name}" - return Path(relative_path) + return config_path.parent / self.file_folder / file_name def _find_file_in_tree(self, filename: str, search_path: Path) -> Optional[Path]: """Find a file within a directory tree. diff --git a/tests/file/test_path_default.py b/tests/file/test_path_default.py index 8350122..c63987e 100644 --- a/tests/file/test_path_default.py +++ b/tests/file/test_path_default.py @@ -50,6 +50,127 @@ def test_init_with_custom_values(self): assert finder.code_package == "custom_package" assert finder.file_folder == "custom_files" assert finder.config_file == "custom_config.json" + assert finder.base_path is None + + def test_init_with_base_path(self): + """base_path is stored as a Path even when given a string.""" + finder = DefaultFindFilePath(base_path="/extracted") + assert finder.base_path == Path("/extracted") + + finder_path = DefaultFindFilePath(base_path=Path("/extracted")) + assert finder_path.base_path == Path("/extracted") + + def test_resolve_with_base_path_files_subdir(self, tmp_path, monkeypatch): + """base_path// is preferred over base_path/.""" + monkeypatch.delenv("LIBRARY_PATH", raising=False) + files_dir = tmp_path / "files" + files_dir.mkdir() + target = files_dir / "data1.csv" + target.write_text("hello") + # Also a stray at the root to prove files/ wins. + (tmp_path / "data1.csv").write_text("stray") + + finder = DefaultFindFilePath(base_path=tmp_path) + assert finder.find_file_path("data1.csv") == target + + def test_resolve_with_base_path_root_fallback(self, tmp_path, monkeypatch): + """When base_path/files/ is missing, fall back to base_path/.""" + monkeypatch.delenv("LIBRARY_PATH", raising=False) + target = tmp_path / "data1.csv" + target.write_text("hello") + + finder = DefaultFindFilePath(base_path=tmp_path) + assert finder.find_file_path("data1.csv") == target + + def test_resolve_base_path_takes_precedence_over_env(self, tmp_path, monkeypatch): + """base_path is checked before $LIBRARY_PATH.""" + # base_path holds the file; LIBRARY_PATH points elsewhere. + base_files = tmp_path / "base" / "files" + base_files.mkdir(parents=True) + target = base_files / "data1.csv" + target.write_text("from-base") + + env_dir = tmp_path / "env" + env_dir.mkdir() + (env_dir / "data1.csv").write_text("from-env") + monkeypatch.setenv("LIBRARY_PATH", str(env_dir)) + + finder = DefaultFindFilePath(base_path=tmp_path / "base") + assert finder.find_file_path("data1.csv") == target + + def test_resolve_library_path_files_subdir(self, tmp_path, monkeypatch): + """$LIBRARY_PATH// resolves the BYOC layout.""" + files_dir = tmp_path / "files" + files_dir.mkdir() + target = files_dir / "data1.csv" + target.write_text("hello") + monkeypatch.setenv("LIBRARY_PATH", str(tmp_path)) + + finder = DefaultFindFilePath() + assert finder.find_file_path("data1.csv") == target + + def test_resolve_library_path_root_fallback(self, tmp_path, monkeypatch): + """When $LIBRARY_PATH/files/ is missing, fall back to $LIBRARY_PATH/.""" + target = tmp_path / "data1.csv" + target.write_text("hello") + monkeypatch.setenv("LIBRARY_PATH", str(tmp_path)) + + finder = DefaultFindFilePath() + assert finder.find_file_path("data1.csv") == target + + def test_resolve_library_path_subpath_under_files(self, tmp_path, monkeypatch): + """Relative subpaths like 'file/data2.csv' resolve under $LIBRARY_PATH/files/.""" + nested = tmp_path / "files" / "file" + nested.mkdir(parents=True) + target = nested / "data2.csv" + target.write_text("hello") + monkeypatch.setenv("LIBRARY_PATH", str(tmp_path)) + + finder = DefaultFindFilePath() + assert finder.find_file_path("file/data2.csv") == target + + def test_local_run_payload_files_default_layout(self, tmp_path, monkeypatch): + """AC1: local-run resolves payload/files/ with no env, no base_path. + + Mirrors ``datacustomcode run payload/entrypoint.py`` from a freshly + ``init``ed package where neither ``LIBRARY_PATH`` nor ``base_path`` is + in play. + """ + monkeypatch.delenv("LIBRARY_PATH", raising=False) + package_dir = tmp_path / "my_package" + files_dir = package_dir / "payload" / "files" + files_dir.mkdir(parents=True) + target = files_dir / "data1.csv" + target.write_text("hello") + monkeypatch.chdir(package_dir) + + finder = DefaultFindFilePath() + result = finder.find_file_path("data1.csv") + + assert result.resolve() == target.resolve() + + def test_resolve_config_based_anchors_on_config_dir(self, tmp_path, monkeypatch): + """config.json discovery anchors on the config's parent, not cwd. + + ``_find_config_file`` walks down from cwd via ``rglob``, so we put cwd + at an ancestor of the package. The file lives only under + ``/files/`` — a cwd-relative ``files/`` would miss it. + """ + monkeypatch.delenv("LIBRARY_PATH", raising=False) + package_dir = tmp_path / "pkg" + files_dir = package_dir / "files" + files_dir.mkdir(parents=True) + (package_dir / "config.json").write_text("{}") + target = files_dir / "data1.csv" + target.write_text("hello") + + monkeypatch.chdir(tmp_path) + + # Use a code_package that doesn't exist relative to tmp_path, so step 3 + # is skipped and resolution falls through to config.json discovery. + finder = DefaultFindFilePath(code_package="nonexistent_pkg") + result = finder.find_file_path("data1.csv") + assert result.resolve() == target.resolve() def test_find_file_path_empty_filename(self): """Test find_file_path with empty filename raises ValueError.""" @@ -65,10 +186,10 @@ def test_find_file_path_file_not_found(self): """Test find_file_path when file doesn't exist raises FileNotFoundError.""" finder = DefaultFindFilePath() - with patch.object(finder, "_resolve_file_path") as mock_resolve: + with patch.object(finder, "_candidate_paths") as mock_candidates: mock_path = MagicMock() mock_path.exists.return_value = False - mock_resolve.return_value = mock_path + mock_candidates.return_value = iter([mock_path]) with pytest.raises( FileNotFoundError, @@ -80,33 +201,34 @@ def test_find_file_path_success(self): """Test find_file_path when file exists returns Path.""" finder = DefaultFindFilePath() - with patch.object(finder, "_resolve_file_path") as mock_resolve: + with patch.object(finder, "_candidate_paths") as mock_candidates: mock_path = MagicMock() mock_path.exists.return_value = True - mock_resolve.return_value = mock_path + mock_candidates.return_value = iter([mock_path]) result = finder.find_file_path("test.txt") assert result == mock_path - mock_resolve.assert_called_once_with("test.txt") + mock_candidates.assert_called_once_with("test.txt") - def test_resolve_file_path_env_var_set_file_exists(self): - """Test _resolve_file_path when environment variable is set and file exists.""" + def test_find_file_path_env_var_set_file_exists(self): + """find_file_path returns $LIBRARY_PATH/files/ when present.""" finder = DefaultFindFilePath() with tempfile.TemporaryDirectory() as temp_dir: - test_file = Path(temp_dir) / "test.txt" + files_dir = Path(temp_dir) / "files" + files_dir.mkdir() + test_file = files_dir / "test.txt" test_file.write_text("test content") with patch.dict(os.environ, {finder.DEFAULT_ENV_VAR: str(temp_dir)}): - result = finder._resolve_file_path("test.txt") + result = finder.find_file_path("test.txt") assert result == test_file assert result.exists() - def test_resolve_file_path_env_var_set_file_not_found(self): - """Test _resolve_file_path when environment variable is set but file not found, - falls back to code package.""" + def test_find_file_path_env_var_set_falls_through_to_code_package(self): + """When $LIBRARY_PATH has no match, resolution falls through to code_package.""" finder = DefaultFindFilePath() with tempfile.TemporaryDirectory() as temp_dir: @@ -122,68 +244,33 @@ def test_resolve_file_path_env_var_set_file_not_found(self): mock_path.exists.return_value = True mock_get_path.return_value = mock_path - result = finder._resolve_file_path("test.txt") + result = finder.find_file_path("test.txt") assert result == mock_path mock_exists.assert_called_once() mock_get_path.assert_called_once_with("test.txt") - def test_resolve_file_path_env_var_not_set(self): - """Test _resolve_file_path when environment variable is not set, - uses normal flow.""" + def test_find_file_path_env_var_not_set_uses_code_package(self, monkeypatch): + """With LIBRARY_PATH unset, code_package is the next candidate.""" + monkeypatch.delenv("LIBRARY_PATH", raising=False) finder = DefaultFindFilePath() - # Ensure env var is not set - env_backup = os.environ.pop(finder.DEFAULT_ENV_VAR, None) - try: - with patch.object( - finder, "_code_package_exists", return_value=True - ) as mock_exists: - with patch.object( - finder, "_get_code_package_file_path" - ) as mock_get_path: - mock_path = MagicMock() - mock_path.exists.return_value = True - mock_get_path.return_value = mock_path - - result = finder._resolve_file_path("test.txt") - - assert result == mock_path - mock_exists.assert_called_once() - mock_get_path.assert_called_once_with("test.txt") - finally: - if env_backup is not None: - os.environ[finder.DEFAULT_ENV_VAR] = env_backup - - def test_resolve_file_path_code_package_exists(self): - """Test _resolve_file_path when code package exists and file is found.""" - finder = DefaultFindFilePath() + with patch.object( + finder, "_code_package_exists", return_value=True + ) as mock_exists: + with patch.object(finder, "_get_code_package_file_path") as mock_get_path: + mock_path = MagicMock() + mock_path.exists.return_value = True + mock_get_path.return_value = mock_path - # Ensure env var is not set to test normal flow - env_backup = os.environ.pop(finder.DEFAULT_ENV_VAR, None) - try: - with patch.object( - finder, "_code_package_exists", return_value=True - ) as mock_exists: - with patch.object( - finder, "_get_code_package_file_path" - ) as mock_get_path: - mock_path = MagicMock() - mock_path.exists.return_value = True - mock_get_path.return_value = mock_path - - result = finder._resolve_file_path("test.txt") - - assert result == mock_path - mock_exists.assert_called_once() - mock_get_path.assert_called_once_with("test.txt") - finally: - if env_backup is not None: - os.environ[finder.DEFAULT_ENV_VAR] = env_backup - - def test_resolve_file_path_code_package_exists_file_not_found(self): - """Test _resolve_file_path when code package exists but file not found, - falls back to config.""" + result = finder.find_file_path("test.txt") + + assert result == mock_path + mock_exists.assert_called_once() + mock_get_path.assert_called_once_with("test.txt") + + def test_find_file_path_code_package_exists_falls_through_to_config(self): + """When code_package candidate is missing, config.json discovery runs.""" finder = DefaultFindFilePath() with patch.object(finder, "_code_package_exists", return_value=True): @@ -205,7 +292,7 @@ def test_resolve_file_path_code_package_exists_file_not_found(self): mock_config_file_path.exists.return_value = True mock_get_config_path.return_value = mock_config_file_path - result = finder._resolve_file_path("test.txt") + result = finder.find_file_path("test.txt") assert result == mock_config_file_path mock_find_config.assert_called_once() @@ -213,16 +300,35 @@ def test_resolve_file_path_code_package_exists_file_not_found(self): "test.txt", mock_config_path ) - def test_resolve_file_path_fallback_to_filename(self): - """Test _resolve_file_path falls back to Path(filename) - when no other location works.""" + def test_find_file_path_no_candidates_raises(self, monkeypatch): + """When no candidate paths exist, find_file_path raises FileNotFoundError.""" + monkeypatch.delenv("LIBRARY_PATH", raising=False) finder = DefaultFindFilePath() with patch.object(finder, "_code_package_exists", return_value=False): with patch.object(finder, "_find_config_file", return_value=None): - result = finder._resolve_file_path("test.txt") - - assert result == Path("test.txt") + with pytest.raises(FileNotFoundError): + finder.find_file_path("test.txt") + + def test_find_file_path_error_lists_tried_locations(self, tmp_path, monkeypatch): + """FileNotFoundError lists every candidate location that was tried.""" + env_dir = tmp_path / "env" + env_dir.mkdir() + monkeypatch.setenv("LIBRARY_PATH", str(env_dir)) + + finder = DefaultFindFilePath(base_path=tmp_path / "base") + with pytest.raises(FileNotFoundError) as exc_info: + finder.find_file_path("missing.txt") + + message = str(exc_info.value) + assert "missing.txt" in message + assert "Tried:" in message + # base_path candidates should appear + assert str(tmp_path / "base" / "files" / "missing.txt") in message + assert str(tmp_path / "base" / "missing.txt") in message + # LIBRARY_PATH candidates should appear + assert str(env_dir / "files" / "missing.txt") in message + assert str(env_dir / "missing.txt") in message def test_code_package_exists_true(self): """Test _code_package_exists returns True when directory exists.""" @@ -281,23 +387,23 @@ def test_find_config_file_not_found(self): assert result is None def test_get_config_based_file_path(self): - """Test _get_config_based_file_path constructs correct path.""" + """_get_config_based_file_path anchors on the discovered config dir.""" finder = DefaultFindFilePath() config_path = Path("/some/path/config.json") result = finder._get_config_based_file_path("test.txt", config_path) - expected = Path("files/test.txt") + expected = Path("/some/path/files/test.txt") assert result == expected def test_get_config_based_file_path_custom_folder(self): - """Test _get_config_based_file_path with custom file folder.""" + """_get_config_based_file_path uses custom file_folder under config dir.""" finder = DefaultFindFilePath(file_folder="custom_files") config_path = Path("/some/path/config.json") result = finder._get_config_based_file_path("test.txt", config_path) - expected = Path("custom_files/test.txt") + expected = Path("/some/path/custom_files/test.txt") assert result == expected def test_find_file_in_tree_found(self): From db3cb04cc72804eb55753d7e73b42f3d58c1271c Mon Sep 17 00:00:00 2001 From: Zach Maddox Date: Thu, 11 Jun 2026 12:18:10 -0400 Subject: [PATCH 2/6] remove unnecessary details --- docs/byoc_runtime_contract.md | 16 ---------------- 1 file changed, 16 deletions(-) diff --git a/docs/byoc_runtime_contract.md b/docs/byoc_runtime_contract.md index bb9bcb4..bc2c5a4 100644 --- a/docs/byoc_runtime_contract.md +++ b/docs/byoc_runtime_contract.md @@ -25,14 +25,6 @@ Given this layout, `client.find_file_path("data.csv")` resolves to `client.find_file_path("dir/data.csv")` resolves to `$LIBRARY_PATH/files/dir/data.csv`. -## WORKDIR - -The customer-function `WORKDIR` MAY be `/app` (or anything else). The SDK does -not depend on `cwd` for file resolution when `LIBRARY_PATH` is set. Calling -`os.chdir($LIBRARY_PATH)` from runtime code is no longer required and should be -removed when the runtime is updated to a `salesforce-data-customcode` release -that includes this contract. - ## File layout Customer packages place bundled files under `payload/files/`. After @@ -57,11 +49,3 @@ first one that exists: If no candidate exists, `FileNotFoundError` is raised; the message lists every path that was tried. - -## Versioning - -This contract applies to `salesforce-data-customcode` releases that include -the `$LIBRARY_PATH/files/` lookup step. Older runtime images pinned to -prior SDK releases must continue to set `cwd = $LIBRARY_PATH` (script -runtime's existing behavior) or `os.chdir($LIBRARY_PATH)` (function runtime -workaround) until they upgrade. From acf5d3f3abbf7c3a8c4c628f3510cf04a0213b5c Mon Sep 17 00:00:00 2001 From: Zach Maddox Date: Thu, 11 Jun 2026 12:57:53 -0400 Subject: [PATCH 3/6] remove base_path and clean up docs --- README.md | 22 +++++----- docs/byoc_runtime_contract.md | 51 ---------------------- src/datacustomcode/client.py | 13 +++--- src/datacustomcode/file/path/default.py | 20 ++------- tests/file/test_path_default.py | 57 ++----------------------- 5 files changed, 22 insertions(+), 141 deletions(-) delete mode 100644 docs/byoc_runtime_contract.md diff --git a/README.md b/README.md index ca7a243..4f0f367 100644 --- a/README.md +++ b/README.md @@ -124,8 +124,8 @@ The SDK automatically handles all dependency packaging for Data Cloud deployment ├── payload │ ├── config.json │ ├── entrypoint.py -├── files -│ ├── data.csv +│ ├── files +│ │ ├── data.csv ``` ## py-files directory @@ -137,10 +137,10 @@ Your Python dependencies can be packaged as .py files, .zip archives (containing ├── payload │ ├── config.json │ ├── entrypoint.py -├── py-files -│ ├── moduleA -│ │ ├── __init__.py -│ │ ├── moduleA.py +│ ├── py-files +│ │ ├── moduleA +│ │ │ ├── __init__.py +│ │ │ ├── moduleA.py ``` ## API @@ -179,14 +179,13 @@ Place bundled files (CSVs, prompt files, etc.) under `payload/files/`. The same Resolution order (first existing path wins): -1. `/files/`, then `/` — when the SDK is constructed with an explicit `base_path`. -2. `$LIBRARY_PATH/files/`, then `$LIBRARY_PATH/` — when `LIBRARY_PATH` is set. Data Cloud sets this for you to the package root. -3. `payload/files/` relative to the current working directory. -4. `/files/` where `` is the directory of the nearest `config.json` discoverable by walking down from cwd. +1. `$LIBRARY_PATH/files/`, then `$LIBRARY_PATH/` — when `LIBRARY_PATH` is set. Data Cloud sets this for you to the package root. +2. `payload/files/` relative to the current working directory. +3. `/files/` where `` is the directory of the nearest `config.json` discoverable by walking down from cwd. If none of these exist, `find_file_path` raises `FileNotFoundError` with the list of paths it tried. -`LIBRARY_PATH` MUST point to the directory that *contains* `files/` — i.e., the package root (the directory that holds `config.json` and `entrypoint.py`). See [BYOC runtime contract](./docs/byoc_runtime_contract.md) for the full runtime contract between the SDK and Data Cloud. +`$LIBRARY_PATH` is set automatically to the root of the package at runtime inside Data Cloud. ## CLI @@ -464,5 +463,4 @@ If you're using OAuth Tokens authentication, the initial configure will retrieve ## Other docs - [Troubleshooting](./docs/troubleshooting.md) -- [BYOC runtime contract](./docs/byoc_runtime_contract.md) - [For Contributors](./FOR_CONTRIBUTORS.md) diff --git a/docs/byoc_runtime_contract.md b/docs/byoc_runtime_contract.md deleted file mode 100644 index bc2c5a4..0000000 --- a/docs/byoc_runtime_contract.md +++ /dev/null @@ -1,51 +0,0 @@ -# BYOC Runtime Contract - -This document defines the contract between `salesforce-data-customcode` (the SDK) -and the Data Cloud Code Extension runtimes — script packages and function packages — for filesystem-bundled assets -referenced via `Client.find_file_path`. - -## LIBRARY_PATH - -The runtime MUST set `LIBRARY_PATH` to the directory that *contains* the -extracted package's `files/` directory — i.e., the package root, the same -directory that holds `config.json` and `entrypoint.py`. - -Concretely: - -``` -$LIBRARY_PATH/ -├── config.json -├── entrypoint.py -└── files/ - └── -``` - -Given this layout, `client.find_file_path("data.csv")` resolves to -`$LIBRARY_PATH/files/data.csv`. Relative subpaths under `files/` are supported: -`client.find_file_path("dir/data.csv")` resolves to -`$LIBRARY_PATH/files/dir/data.csv`. - -## File layout - -Customer packages place bundled files under `payload/files/`. After -extraction in the runtime, the file lives at `$LIBRARY_PATH/files/`. -Files placed directly at the package root (`payload/`, extracted to -`$LIBRARY_PATH/`) are still resolvable but the canonical location is -`files/`. - -## Resolution order - -`Client.find_file_path` tries candidate paths in this order, returning the -first one that exists: - -1. `$BASE_PATH/files/`, then `$BASE_PATH/` — when the SDK is - constructed with an explicit `base_path`. -2. `$LIBRARY_PATH/files/`, then `$LIBRARY_PATH/` — when - `LIBRARY_PATH` is set. -3. `payload/files/` relative to cwd — the local `datacustomcode run` - flow from a project root. -4. `/files/` discovered by walking the cwd subtree for - `config.json`. - -If no candidate exists, `FileNotFoundError` is raised; the message lists every -path that was tried. diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index fb0d868..596eeb7 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -225,17 +225,14 @@ def find_file_path(self, file_name: str) -> Path: Resolution order (first existing path wins): - 1. ``base_path//`` then ``base_path/`` - — when the underlying ``DefaultFindFilePath`` was constructed with - an explicit ``base_path``. - 2. ``$LIBRARY_PATH//`` then + 1. ``$LIBRARY_PATH//`` then ``$LIBRARY_PATH/`` — when the ``LIBRARY_PATH`` environment - variable is set. The Data Cloud runtime sets this to the - directory containing the extracted package. - 3. ``//`` relative to the current + variable is set. The Data Cloud runtime sets this to the directory + containing the extracted package. + 2. ``//`` relative to the current working directory — the default ``payload/files/`` layout used by ``datacustomcode run`` from a project root. - 4. ``//`` where ```` is + 3. ``//`` where ```` is the directory containing the nearest ``config.json`` discoverable by walking the cwd subtree. diff --git a/src/datacustomcode/file/path/default.py b/src/datacustomcode/file/path/default.py index 0e7733a..3c120d7 100644 --- a/src/datacustomcode/file/path/default.py +++ b/src/datacustomcode/file/path/default.py @@ -16,7 +16,7 @@ import os from pathlib import Path -from typing import Iterator, Optional, Union +from typing import Iterator, Optional from datacustomcode.file.base import BaseDataAccessLayer @@ -44,7 +44,6 @@ class DefaultFindFilePath(BaseDataAccessLayer): def __init__( self, - base_path: Optional[Union[Path, str]] = None, code_package: Optional[str] = None, file_folder: Optional[str] = None, config_file: Optional[str] = None, @@ -52,16 +51,10 @@ def __init__( """Initialize the file reader with configuration. Args: - base_path: Optional explicit anchor for resolution. When provided, - ``base_path//`` and ``base_path/`` are - tried before any environment- or cwd-based lookups. Used by - runtime hosts that know the package root and don't want to - rely on ``LIBRARY_PATH`` or the current working directory. code_package: The default code package directory to search file_folder: The folder containing files relative to the code package config_file: The configuration file to use for path resolution """ - self.base_path = Path(base_path) if base_path is not None else None self.code_package = code_package or self.DEFAULT_CODE_PACKAGE self.file_folder = file_folder or self.DEFAULT_FILE_FOLDER self.config_file = config_file or self.DEFAULT_CONFIG_FILE @@ -101,22 +94,17 @@ def _candidate_paths(self, file_name: str) -> Iterator[Path]: Returns: An iterator of candidate paths """ - # 1. base_path//, then base_path/ - if self.base_path is not None: - yield self.base_path / self.file_folder / file_name - yield self.base_path / file_name - - # 2. $LIBRARY_PATH//, then $LIBRARY_PATH/ + # 1. $LIBRARY_PATH//, then $LIBRARY_PATH/ env_path = os.getenv(self.DEFAULT_ENV_VAR) if env_path: yield Path(env_path) / self.file_folder / file_name yield Path(env_path) / file_name - # 3. // relative to cwd + # 2. // relative to cwd if self._code_package_exists(): yield self._get_code_package_file_path(file_name) - # 4. // via config.json discovery + # 3. // via config.json discovery config_path = self._find_config_file() if config_path is not None: yield self._get_config_based_file_path(file_name, config_path) diff --git a/tests/file/test_path_default.py b/tests/file/test_path_default.py index c63987e..2a52228 100644 --- a/tests/file/test_path_default.py +++ b/tests/file/test_path_default.py @@ -50,53 +50,6 @@ def test_init_with_custom_values(self): assert finder.code_package == "custom_package" assert finder.file_folder == "custom_files" assert finder.config_file == "custom_config.json" - assert finder.base_path is None - - def test_init_with_base_path(self): - """base_path is stored as a Path even when given a string.""" - finder = DefaultFindFilePath(base_path="/extracted") - assert finder.base_path == Path("/extracted") - - finder_path = DefaultFindFilePath(base_path=Path("/extracted")) - assert finder_path.base_path == Path("/extracted") - - def test_resolve_with_base_path_files_subdir(self, tmp_path, monkeypatch): - """base_path// is preferred over base_path/.""" - monkeypatch.delenv("LIBRARY_PATH", raising=False) - files_dir = tmp_path / "files" - files_dir.mkdir() - target = files_dir / "data1.csv" - target.write_text("hello") - # Also a stray at the root to prove files/ wins. - (tmp_path / "data1.csv").write_text("stray") - - finder = DefaultFindFilePath(base_path=tmp_path) - assert finder.find_file_path("data1.csv") == target - - def test_resolve_with_base_path_root_fallback(self, tmp_path, monkeypatch): - """When base_path/files/ is missing, fall back to base_path/.""" - monkeypatch.delenv("LIBRARY_PATH", raising=False) - target = tmp_path / "data1.csv" - target.write_text("hello") - - finder = DefaultFindFilePath(base_path=tmp_path) - assert finder.find_file_path("data1.csv") == target - - def test_resolve_base_path_takes_precedence_over_env(self, tmp_path, monkeypatch): - """base_path is checked before $LIBRARY_PATH.""" - # base_path holds the file; LIBRARY_PATH points elsewhere. - base_files = tmp_path / "base" / "files" - base_files.mkdir(parents=True) - target = base_files / "data1.csv" - target.write_text("from-base") - - env_dir = tmp_path / "env" - env_dir.mkdir() - (env_dir / "data1.csv").write_text("from-env") - monkeypatch.setenv("LIBRARY_PATH", str(env_dir)) - - finder = DefaultFindFilePath(base_path=tmp_path / "base") - assert finder.find_file_path("data1.csv") == target def test_resolve_library_path_files_subdir(self, tmp_path, monkeypatch): """$LIBRARY_PATH// resolves the BYOC layout.""" @@ -130,11 +83,10 @@ def test_resolve_library_path_subpath_under_files(self, tmp_path, monkeypatch): assert finder.find_file_path("file/data2.csv") == target def test_local_run_payload_files_default_layout(self, tmp_path, monkeypatch): - """AC1: local-run resolves payload/files/ with no env, no base_path. + """AC1: local-run resolves payload/files/ with no LIBRARY_PATH set. Mirrors ``datacustomcode run payload/entrypoint.py`` from a freshly - ``init``ed package where neither ``LIBRARY_PATH`` nor ``base_path`` is - in play. + ``init``ed package. """ monkeypatch.delenv("LIBRARY_PATH", raising=False) package_dir = tmp_path / "my_package" @@ -316,16 +268,13 @@ def test_find_file_path_error_lists_tried_locations(self, tmp_path, monkeypatch) env_dir.mkdir() monkeypatch.setenv("LIBRARY_PATH", str(env_dir)) - finder = DefaultFindFilePath(base_path=tmp_path / "base") + finder = DefaultFindFilePath() with pytest.raises(FileNotFoundError) as exc_info: finder.find_file_path("missing.txt") message = str(exc_info.value) assert "missing.txt" in message assert "Tried:" in message - # base_path candidates should appear - assert str(tmp_path / "base" / "files" / "missing.txt") in message - assert str(tmp_path / "base" / "missing.txt") in message # LIBRARY_PATH candidates should appear assert str(env_dir / "files" / "missing.txt") in message assert str(env_dir / "missing.txt") in message From a54e4421375b08e5b59dbb46f4d032e550de5721 Mon Sep 17 00:00:00 2001 From: Zach Maddox Date: Thu, 11 Jun 2026 13:07:31 -0400 Subject: [PATCH 4/6] fix linting errors --- tests/file/test_path_default.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/file/test_path_default.py b/tests/file/test_path_default.py index 2a52228..f6f8f78 100644 --- a/tests/file/test_path_default.py +++ b/tests/file/test_path_default.py @@ -63,7 +63,7 @@ def test_resolve_library_path_files_subdir(self, tmp_path, monkeypatch): assert finder.find_file_path("data1.csv") == target def test_resolve_library_path_root_fallback(self, tmp_path, monkeypatch): - """When $LIBRARY_PATH/files/ is missing, fall back to $LIBRARY_PATH/.""" + """Fall back to $LIBRARY_PATH/ when files/ is missing.""" target = tmp_path / "data1.csv" target.write_text("hello") monkeypatch.setenv("LIBRARY_PATH", str(tmp_path)) @@ -72,7 +72,7 @@ def test_resolve_library_path_root_fallback(self, tmp_path, monkeypatch): assert finder.find_file_path("data1.csv") == target def test_resolve_library_path_subpath_under_files(self, tmp_path, monkeypatch): - """Relative subpaths like 'file/data2.csv' resolve under $LIBRARY_PATH/files/.""" + """Relative subpaths like 'file/data2.csv' resolve under $LIBRARY_PATH/files.""" nested = tmp_path / "files" / "file" nested.mkdir(parents=True) target = nested / "data2.csv" From 7f3b33efa72c39f47410f97098d9d1a02a056542 Mon Sep 17 00:00:00 2001 From: Zach Maddox Date: Fri, 12 Jun 2026 13:08:02 -0400 Subject: [PATCH 5/6] fix documentation --- src/datacustomcode/client.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/datacustomcode/client.py b/src/datacustomcode/client.py index 596eeb7..003e389 100644 --- a/src/datacustomcode/client.py +++ b/src/datacustomcode/client.py @@ -238,8 +238,8 @@ def find_file_path(self, file_name: str) -> Path: ``LIBRARY_PATH`` must point to the directory that *contains* ``files/`` — i.e., the package root, the same directory that holds - ``config.json`` and ``entrypoint.py``. See - ``docs/byoc_runtime_contract.md`` for the full runtime contract. + ``config.json`` and ``entrypoint.py``. See ``Bundled file resolution`` + in the ``readme.md`` for more details. Args: file_name: A file under the package's ``files/`` folder. Relative From 39f7c1a83fc60e1d23530480290339b1c743ea46 Mon Sep 17 00:00:00 2001 From: Zach Maddox Date: Fri, 12 Jun 2026 13:48:52 -0400 Subject: [PATCH 6/6] no-op commit