From 7c5cc1a52d0ac9f9c4c2a31f66f84c75d28dfb1a Mon Sep 17 00:00:00 2001 From: Ben Davies Date: Tue, 16 Jun 2026 22:43:28 +0100 Subject: [PATCH] feat: positional schema definition mutations Add methods to Flow\ETL\Schema for controlling where definitions land, instead of add() only ever appending: - prepend(): insert at the beginning - insertAt(): insert at an explicit index - addBefore() / addAfter(): insert relative to an existing column - moveTo(): relocate an existing column to an index - moveBefore() / moveAfter(): relocate relative to another column - reorder(): reorder by name, keeping unlisted columns in relative order Closes #2453 --- src/core/etl/src/Flow/ETL/Schema.php | 218 ++++++++++++ .../Flow/ETL/Tests/Unit/Schema/SchemaTest.php | 316 ++++++++++++++++++ 2 files changed, 534 insertions(+) diff --git a/src/core/etl/src/Flow/ETL/Schema.php b/src/core/etl/src/Flow/ETL/Schema.php index ac9bf75e19..9b8e342460 100644 --- a/src/core/etl/src/Flow/ETL/Schema.php +++ b/src/core/etl/src/Flow/ETL/Schema.php @@ -15,8 +15,11 @@ use Flow\ETL\Schema\Metadata; use function array_key_exists; +use function array_keys; use function array_map; use function array_merge; +use function array_search; +use function array_splice; use function array_values; use function count; use function Flow\ETL\DSL\definition_from_array; @@ -121,6 +124,34 @@ public function add(Definition ...$definitions): self return $this; } + /** + * Inserts definitions immediately after an existing column. + * + * @param Definition ...$definitions + * + * @throws SchemaDefinitionNotFoundException + * + * @return Schema + */ + public function addAfter(string|Reference $reference, Definition ...$definitions): self + { + return $this->insertAt($this->indexOf($reference) + 1, ...$definitions); + } + + /** + * Inserts definitions immediately before an existing column. + * + * @param Definition ...$definitions + * + * @throws SchemaDefinitionNotFoundException + * + * @return Schema + */ + public function addBefore(string|Reference $reference, Definition ...$definitions): self + { + return $this->insertAt($this->indexOf($reference), ...$definitions); + } + /** * Adds metadata to a given definition. * @@ -200,6 +231,35 @@ public function gracefulRemove(string|Reference ...$entries): self return $this; } + /** + * Inserts definitions at an explicit position. Index 0 prepends, an index equal to the + * number of definitions appends. + * + * @param Definition ...$definitions + * + * @throws InvalidArgumentException + * + * @return Schema + */ + public function insertAt(int $index, Definition ...$definitions): self + { + $definitionsList = array_values($this->definitions); + + if ($index < 0 || $index > count($definitionsList)) { + throw new InvalidArgumentException(sprintf( + 'Cannot insert definitions at index %d, schema has %d definitions', + $index, + count($definitionsList), + )); + } + + array_splice($definitionsList, $index, 0, $definitions); + + $this->setDefinitions(...$definitionsList); + + return $this; + } + public function isSame(self $schema): bool { if (count($this->definitions) !== count($schema->definitions)) { @@ -300,6 +360,66 @@ public function merge(self $schema): self return $this; } + /** + * Moves an existing column to immediately after another column, preserving its definition. + * + * @throws InvalidArgumentException + * @throws SchemaDefinitionNotFoundException + * + * @return Schema + */ + public function moveAfter(string|Reference $name, string|Reference $reference): self + { + return $this->moveRelative($name, $reference, 1); + } + + /** + * Moves an existing column to immediately before another column, preserving its definition. + * + * @throws InvalidArgumentException + * @throws SchemaDefinitionNotFoundException + * + * @return Schema + */ + public function moveBefore(string|Reference $name, string|Reference $reference): self + { + return $this->moveRelative($name, $reference, 0); + } + + /** + * Moves an existing column to an explicit position, preserving its definition. + * The index is the final position of the column in the resulting schema. + * + * @throws InvalidArgumentException + * @throws SchemaDefinitionNotFoundException + * + * @return Schema + */ + public function moveTo(string|Reference $name, int $index): self + { + $from = $this->indexOf($name); + $definitionsList = array_values($this->definitions); + + if ($index < 0 || $index >= count($definitionsList)) { + throw new InvalidArgumentException(sprintf( + 'Cannot move entry "%s" to index %d, schema has %d definitions', + (string) $name, + $index, + count($definitionsList), + )); + } + + // Mago infers array_splice()'s return as array, dropping the element type, + // so we restore it explicitly. See https://github.com/carthage-software/mago/issues/1982 + /** @var list> $moved */ + $moved = array_splice($definitionsList, $from, 1); + array_splice($definitionsList, $index, 0, $moved); + + $this->setDefinitions(...$definitionsList); + + return $this; + } + /** * @return array> */ @@ -314,6 +434,20 @@ public function normalize(): array return $definitions; } + /** + * Inserts definitions at the beginning of the schema. + * + * @param Definition ...$definitions + * + * @return Schema + */ + public function prepend(Definition ...$definitions): self + { + $this->setDefinitions(...$definitions, ...array_values($this->definitions)); + + return $this; + } + public function references(): References { $refs = []; @@ -375,6 +509,42 @@ public function rename(string|Reference $entry, string $newName): self return $this; } + /** + * Reorders columns by name. Any columns not listed keep their relative order and are appended. + * + * @throws InvalidArgumentException + * @throws SchemaDefinitionNotFoundException + * + * @return Schema + */ + public function reorder(string|Reference ...$names): self + { + $definitions = []; + $reordered = []; + + foreach ($names as $name) { + $definition = $this->findDefinition($name) ?: throw new SchemaDefinitionNotFoundException((string) $name); + $key = $definition->entry()->name(); + + if (array_key_exists($key, $reordered)) { + throw new InvalidArgumentException(sprintf('Cannot reorder entry "%s" more than once', (string) $name)); + } + + $reordered[$key] = true; + $definitions[] = $definition; + } + + foreach ($this->definitions as $key => $definition) { + if (!array_key_exists($key, $reordered)) { + $definitions[] = $definition; + } + } + + $this->setDefinitions(...$definitions); + + return $this; + } + /** * @param Definition $definition * @@ -415,6 +585,54 @@ public function setMetadata(string $definition, Metadata $metadata): self return $this; } + private function indexOf(string|Reference $reference): int + { + $index = array_search(EntryReference::init($reference)->name(), array_keys($this->definitions), true); + + if ($index === false) { + throw new SchemaDefinitionNotFoundException((string) $reference); + } + + return $index; + } + + private function moveRelative(string|Reference $name, string|Reference $reference, int $offset): self + { + $from = $this->indexOf($name); + $referenceName = EntryReference::init($reference)->name(); + + if (!$this->findDefinition($reference)) { + throw new SchemaDefinitionNotFoundException((string) $reference); + } + + if (EntryReference::init($name)->name() === $referenceName) { + throw new InvalidArgumentException(sprintf('Cannot move entry "%s" relative to itself', (string) $name)); + } + + $definitionsList = array_values($this->definitions); + + // Mago infers array_splice()'s return as array, dropping the element type, + // so we restore it explicitly. See https://github.com/carthage-software/mago/issues/1982 + /** @var list> $moved */ + $moved = array_splice($definitionsList, $from, 1); + + $referenceIndex = 0; + + foreach ($definitionsList as $position => $definition) { + if ($definition->entry()->name() === $referenceName) { + $referenceIndex = $position; + + break; + } + } + + array_splice($definitionsList, $referenceIndex + $offset, 0, $moved); + + $this->setDefinitions(...$definitionsList); + + return $this; + } + /** * @param Definition ...$definitions */ diff --git a/src/core/etl/tests/Flow/ETL/Tests/Unit/Schema/SchemaTest.php b/src/core/etl/tests/Flow/ETL/Tests/Unit/Schema/SchemaTest.php index 4c8435e2b3..4276c0dce4 100644 --- a/src/core/etl/tests/Flow/ETL/Tests/Unit/Schema/SchemaTest.php +++ b/src/core/etl/tests/Flow/ETL/Tests/Unit/Schema/SchemaTest.php @@ -9,6 +9,7 @@ use Flow\ETL\Exception\SchemaDefinitionNotFoundException; use Flow\ETL\Exception\SchemaDefinitionNotUniqueException; use Flow\ETL\Row\EntryReference; +use Flow\ETL\Row\Reference; use Flow\ETL\Schema; use Flow\ETL\Schema\Metadata; use Flow\ETL\Tests\FlowTestCase; @@ -16,12 +17,14 @@ use JsonException; use PHPUnit\Framework\Attributes\DataProvider; +use function array_keys; use function Flow\ETL\DSL\bool_schema; use function Flow\ETL\DSL\int_schema; use function Flow\ETL\DSL\integer_schema; use function Flow\ETL\DSL\json_schema; use function Flow\ETL\DSL\list_schema; use function Flow\ETL\DSL\map_schema; +use function Flow\ETL\DSL\ref; use function Flow\ETL\DSL\refs; use function Flow\ETL\DSL\schema; use function Flow\ETL\DSL\schema_from_json; @@ -39,6 +42,18 @@ final class SchemaTest extends FlowTestCase { + public static function provide_add_after_reference_inputs(): Generator + { + yield 'string reference' => ['id']; + yield 'Reference reference' => [ref('id')]; + } + + public static function provide_add_before_reference_inputs(): Generator + { + yield 'string reference' => ['name']; + yield 'Reference reference' => [ref('name')]; + } + public static function provide_is_same_cases(): Generator { yield 'identical simple schemas' => [ @@ -192,6 +207,83 @@ public static function provide_is_same_cases(): Generator ]; } + public static function provide_move_after_reference_inputs(): Generator + { + yield 'string name, string reference' => ['active', 'id']; + yield 'string name, Reference reference' => ['active', ref('id')]; + yield 'Reference name, string reference' => [ref('active'), 'id']; + yield 'Reference name, Reference reference' => [ref('active'), ref('id')]; + } + + public static function provide_move_before_reference_inputs(): Generator + { + yield 'string name, string reference' => ['active', 'name']; + yield 'string name, Reference reference' => ['active', ref('name')]; + yield 'Reference name, string reference' => [ref('active'), 'name']; + yield 'Reference name, Reference reference' => [ref('active'), ref('name')]; + } + + public static function provide_move_to_reference_inputs(): Generator + { + yield 'string name' => ['active']; + yield 'Reference name' => [ref('active')]; + } + + public static function provide_reorder_reference_inputs(): Generator + { + yield 'string names' => [['id', 'name', 'email']]; + yield 'Reference names' => [[ref('id'), ref('name'), ref('email')]]; + yield 'mixed names' => [[ref('id'), 'name', ref('email')]]; + } + + #[DataProvider('provide_add_after_reference_inputs')] + public function test_add_after(string|Reference $reference): void + { + $schema = schema(int_schema('id'), str_schema('name'))->addAfter($reference, bool_schema('active')); + + static::assertSame(['id', 'active', 'name'], array_keys($schema->definitions())); + } + + public function test_add_after_duplicate_definition(): void + { + $this->expectException(SchemaDefinitionNotUniqueException::class); + + schema(int_schema('id'), str_schema('name'))->addAfter('name', int_schema('id')); + } + + public function test_add_after_non_existing_reference(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->addAfter('not-existing', bool_schema('active')); + } + + #[DataProvider('provide_add_before_reference_inputs')] + public function test_add_before(string|Reference $reference): void + { + $schema = schema(int_schema('id'), str_schema('name'))->addBefore( + $reference, + bool_schema('active'), + str_schema('email'), + ); + + static::assertSame(['id', 'active', 'email', 'name'], array_keys($schema->definitions())); + } + + public function test_add_before_duplicate_definition(): void + { + $this->expectException(SchemaDefinitionNotUniqueException::class); + + schema(int_schema('id'), str_schema('name'))->addBefore('name', int_schema('id')); + } + + public function test_add_before_non_existing_reference(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->addBefore('not-existing', bool_schema('active')); + } + public function test_add_metadata(): void { $schema = schema(int_schema('id'), str_schema('name')); @@ -274,6 +366,50 @@ public function test_graceful_remove_non_existing_definition(): void ); } + public function test_insert_at(): void + { + $schema = schema(int_schema('id'), str_schema('name'))->insertAt(1, bool_schema('active')); + + static::assertSame(['id', 'active', 'name'], array_keys($schema->definitions())); + } + + public function test_insert_at_appends_when_index_equals_count(): void + { + $schema = schema(int_schema('id'), str_schema('name'))->insertAt(2, bool_schema('active')); + + static::assertSame(['id', 'name', 'active'], array_keys($schema->definitions())); + } + + public function test_insert_at_duplicate_definition(): void + { + $this->expectException(SchemaDefinitionNotUniqueException::class); + + schema(int_schema('id'), str_schema('name'))->insertAt(1, int_schema('id')); + } + + public function test_insert_at_negative_index(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Cannot insert definitions at index -1, schema has 2 definitions'); + + schema(int_schema('id'), str_schema('name'))->insertAt(-1, bool_schema('active')); + } + + public function test_insert_at_out_of_range(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Cannot insert definitions at index 3, schema has 2 definitions'); + + schema(int_schema('id'), str_schema('name'))->insertAt(3, bool_schema('active')); + } + + public function test_insert_at_prepends_at_zero(): void + { + $schema = schema(int_schema('id'), str_schema('name'))->insertAt(0, bool_schema('active')); + + static::assertSame(['active', 'id', 'name'], array_keys($schema->definitions())); + } + #[DataProvider('provide_is_same_cases')] public function test_is_same(Schema $schema1, Schema $schema2, bool $expected): void { @@ -312,6 +448,116 @@ public function test_merge_returns_self_when_schemas_are_identical(): void static::assertSame($schema1, $merged); } + #[DataProvider('provide_move_after_reference_inputs')] + public function test_move_after(string|Reference $name, string|Reference $reference): void + { + $schema = schema(int_schema('id'), str_schema('name'), bool_schema('active'))->moveAfter($name, $reference); + + static::assertSame(['id', 'active', 'name'], array_keys($schema->definitions())); + } + + public function test_move_after_forward(): void + { + $schema = schema(int_schema('id'), str_schema('name'), bool_schema('active'))->moveAfter('id', 'name'); + + static::assertSame(['name', 'id', 'active'], array_keys($schema->definitions())); + } + + public function test_move_after_non_existing_entry(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->moveAfter('not-existing', 'name'); + } + + public function test_move_after_non_existing_reference(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->moveAfter('name', 'not-existing'); + } + + #[DataProvider('provide_move_before_reference_inputs')] + public function test_move_before(string|Reference $name, string|Reference $reference): void + { + $schema = schema(int_schema('id'), str_schema('name'), bool_schema('active'))->moveBefore($name, $reference); + + static::assertSame(['id', 'active', 'name'], array_keys($schema->definitions())); + } + + public function test_move_before_forward(): void + { + $schema = schema(int_schema('id'), str_schema('name'), bool_schema('active'))->moveBefore('id', 'active'); + + static::assertSame(['name', 'id', 'active'], array_keys($schema->definitions())); + } + + public function test_move_before_non_existing_entry(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->moveBefore('not-existing', 'name'); + } + + public function test_move_before_non_existing_reference(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->moveBefore('name', 'not-existing'); + } + + public function test_move_preserves_definition_metadata(): void + { + $schema = schema( + str_schema('name'), + int_schema('id', metadata: Metadata::fromArray(['primary_key' => true])), + )->moveTo('id', 0); + + static::assertSame(['id', 'name'], array_keys($schema->definitions())); + static::assertEquals( + int_schema('id', metadata: Metadata::fromArray(['primary_key' => true])), + $schema->get('id'), + ); + } + + public function test_move_relative_to_itself(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Cannot move entry "id" relative to itself'); + + schema(int_schema('id'), str_schema('name'))->moveBefore('id', 'id'); + } + + #[DataProvider('provide_move_to_reference_inputs')] + public function test_move_to(string|Reference $name): void + { + $schema = schema(int_schema('id'), str_schema('name'), bool_schema('active'))->moveTo($name, 0); + + static::assertSame(['active', 'id', 'name'], array_keys($schema->definitions())); + } + + public function test_move_to_forward(): void + { + $schema = schema(int_schema('id'), str_schema('name'), bool_schema('active'))->moveTo('id', 2); + + static::assertSame(['name', 'active', 'id'], array_keys($schema->definitions())); + } + + public function test_move_to_non_existing(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->moveTo('not-existing', 0); + } + + public function test_move_to_out_of_range(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Cannot move entry "id" to index 2, schema has 2 definitions'); + + schema(int_schema('id'), str_schema('name'))->moveTo('id', 2); + } + public function test_normalizing_and_recreating_schema(): void { $schema = schema( @@ -330,6 +576,41 @@ public function test_normalizing_and_recreating_schema(): void static::assertEquals($schema, Schema::fromArray($schema->normalize())); } + public function test_positional_mutation_returns_same_instance(): void + { + $schema = schema(int_schema('id'), str_schema('name')); + + static::assertSame($schema, $schema->prepend(bool_schema('active'))); + } + + public function test_prepend(): void + { + $schema = schema( + str_schema('name'), + str_schema('email'), + )->prepend(int_schema('id', metadata: Metadata::fromArray(['primary_key' => true]))); + + static::assertSame(['id', 'name', 'email'], array_keys($schema->definitions())); + static::assertEquals( + int_schema('id', metadata: Metadata::fromArray(['primary_key' => true])), + $schema->get('id'), + ); + } + + public function test_prepend_duplicate_definition(): void + { + $this->expectException(SchemaDefinitionNotUniqueException::class); + + schema(int_schema('id'), str_schema('name'))->prepend(int_schema('id')); + } + + public function test_prepend_multiple(): void + { + $schema = schema(int_schema('id'))->prepend(str_schema('name'), bool_schema('active')); + + static::assertSame(['name', 'active', 'id'], array_keys($schema->definitions())); + } + public function test_remove_non_existing_definition(): void { $this->expectException(SchemaDefinitionNotFoundException::class); @@ -356,6 +637,41 @@ public function test_rename_non_existing(): void schema(int_schema('id'), str_schema('name'))->rename('not-existing', 'new_name'); } + /** + * @param list $names + */ + #[DataProvider('provide_reorder_reference_inputs')] + public function test_reorder(array $names): void + { + $schema = schema(str_schema('name'), str_schema('email'), int_schema('id'))->reorder(...$names); + + static::assertSame(['id', 'name', 'email'], array_keys($schema->definitions())); + } + + public function test_reorder_duplicate_name(): void + { + $this->expectException(InvalidArgumentException::class); + $this->expectExceptionMessage('Cannot reorder entry "id" more than once'); + + schema(int_schema('id'), str_schema('name'))->reorder('id', 'id'); + } + + public function test_reorder_keeps_unlisted_columns(): void + { + $schema = schema(str_schema('name'), str_schema('email'), int_schema('id'), bool_schema('active'))->reorder( + 'id', + ); + + static::assertSame(['id', 'name', 'email', 'active'], array_keys($schema->definitions())); + } + + public function test_reorder_non_existing(): void + { + $this->expectException(SchemaDefinitionNotFoundException::class); + + schema(int_schema('id'), str_schema('name'))->reorder('not-existing'); + } + public function test_replace_non_existing_reference(): void { $this->expectException(SchemaDefinitionNotFoundException::class);