From 0bb19e51ccf86e4fbbb527a18fe2b3295e5b9489 Mon Sep 17 00:00:00 2001 From: jplfaria Date: Fri, 12 Jun 2026 06:44:06 +0000 Subject: [PATCH 1/3] fix: validate genome_fasta content at schema level (fixes #8) genome_fasta had no content validation, so workspace paths or header-only strings would pass request parsing and only fail inside the Celery task with a confusing ValueError. Add a field_validator that checks for at least one >header + sequence pair and returns a 422 Unprocessable Entity at request time. Co-Authored-By: Claude Sonnet 4.6 --- src/modelseed_api/schemas/jobs.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/modelseed_api/schemas/jobs.py b/src/modelseed_api/schemas/jobs.py index 6519e44..1851859 100644 --- a/src/modelseed_api/schemas/jobs.py +++ b/src/modelseed_api/schemas/jobs.py @@ -7,7 +7,7 @@ from typing import Literal, Optional -from pydantic import BaseModel, Field, model_validator +from pydantic import BaseModel, Field, field_validator, model_validator # Valid template_type values. Mirrors TEMPLATE_FILES in jobs/tasks.py @@ -87,6 +87,29 @@ class ReconstructionRequest(BaseModel): media: Optional[str] = None output_path: Optional[str] = None + @field_validator("genome_fasta") + @classmethod + def _validate_genome_fasta_content(cls, v): + if v is None: + return v + has_seq = False + in_record = False + for line in v.splitlines(): + stripped = line.strip() + if stripped.startswith(">"): + in_record = True + elif stripped and in_record: + has_seq = True + break + if not has_seq: + raise ValueError( + "genome_fasta must contain at least one FASTA record " + "(a >header line followed by sequence data); got no valid " + "records. Pass actual FASTA content, not a file path or " + "workspace reference." + ) + return v + @model_validator(mode="after") def _validate_input_modes(self) -> "ReconstructionRequest": """At most ONE of (genome_fasta, rast_job_id) may be set. From d7a645ae25cf9bfeb71a86127e4b3c7cf7fea0e0 Mon Sep 17 00:00:00 2001 From: jplfaria Date: Fri, 12 Jun 2026 06:44:26 +0000 Subject: [PATCH 2/3] test: add route-level tests for genome_fasta validation (issue #8) --- tests/routes/test_job_routes.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/routes/test_job_routes.py b/tests/routes/test_job_routes.py index 5772f65..614490a 100644 --- a/tests/routes/test_job_routes.py +++ b/tests/routes/test_job_routes.py @@ -57,6 +57,36 @@ def test_with_dna_fasta(self, local_client, auth_headers): assert resp.status_code == 200 + def test_invalid_genome_fasta_no_sequence_returns_422(self, local_client, auth_headers): + # Headers-only FASTA (no sequence data) must be rejected at request + # time with a 422, not bubble up as a Celery task failure. + resp = local_client.post( + "/api/jobs/reconstruct", + json={"genome": "custom", "genome_fasta": ">contig1\n>contig2\n"}, + headers=auth_headers, + ) + assert resp.status_code == 422 + assert "FASTA" in resp.text + + def test_genome_fasta_as_path_returns_422(self, local_client, auth_headers): + # A workspace path passed as genome_fasta content should be rejected. + resp = local_client.post( + "/api/jobs/reconstruct", + json={"genome": "custom", "genome_fasta": "/username/data/genome.fasta"}, + headers=auth_headers, + ) + assert resp.status_code == 422 + assert "FASTA" in resp.text + + def test_whitespace_only_genome_fasta_returns_422(self, local_client, auth_headers): + resp = local_client.post( + "/api/jobs/reconstruct", + json={"genome": "custom", "genome_fasta": " \n \n"}, + headers=auth_headers, + ) + assert resp.status_code == 422 + + class TestGapfillJob: def test_dispatch(self, local_client, auth_headers): resp = local_client.post( From 90030d711a5c13a889a52016ab413f09b7798e10 Mon Sep 17 00:00:00 2001 From: jplfaria Date: Fri, 12 Jun 2026 06:45:29 +0000 Subject: [PATCH 3/3] test: add unit tests for ReconstructionRequest genome_fasta validator (issue #8) --- tests/unit/test_reconstruction_schema.py | 30 ++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tests/unit/test_reconstruction_schema.py diff --git a/tests/unit/test_reconstruction_schema.py b/tests/unit/test_reconstruction_schema.py new file mode 100644 index 0000000..3c477a1 --- /dev/null +++ b/tests/unit/test_reconstruction_schema.py @@ -0,0 +1,30 @@ +"""Unit tests for ReconstructionRequest schema validation.""" +import pytest +from pydantic import ValidationError +from modelseed_api.schemas.jobs import ReconstructionRequest + + +class TestGenomeFastaValidation: + def test_none_is_valid(self): + req = ReconstructionRequest(genome="83333.1", genome_fasta=None) + assert req.genome_fasta is None + + def test_valid_protein_fasta(self): + req = ReconstructionRequest(genome="custom", genome_fasta=">p1\nMKKLVAV") + assert req.genome_fasta is not None + + def test_valid_dna_fasta(self): + req = ReconstructionRequest(genome="custom", genome_fasta=">contig1\nACGTACGT") + assert req.genome_fasta is not None + + def test_headers_only_rejected(self): + with pytest.raises(ValidationError, match="FASTA"): + ReconstructionRequest(genome="custom", genome_fasta=">c1\n>c2\n") + + def test_path_string_rejected(self): + with pytest.raises(ValidationError, match="FASTA"): + ReconstructionRequest(genome="custom", genome_fasta="/user/data/genome.fasta") + + def test_whitespace_only_rejected(self): + with pytest.raises(ValidationError, match="FASTA"): + ReconstructionRequest(genome="custom", genome_fasta=" \n ")