From c39aff8a63ea513ab0921da23d28d300e2e73a9a Mon Sep 17 00:00:00 2001 From: Michael Weiss Date: Fri, 12 Jun 2026 13:15:18 +0200 Subject: [PATCH] =?UTF-8?q?=E2=9C=A8=20Make=20blocks=20and=20fields=20hash?= =?UTF-8?q?able?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 --- bibtexparser/library.py | 21 +++++++++++++++++++-- bibtexparser/model.py | 12 ++++++++++++ tests/test_library.py | 31 +++++++++++++++++++++++++++++++ tests/test_model.py | 31 +++++++++++++++++++++++++++++++ 4 files changed, 93 insertions(+), 2 deletions(-) diff --git a/bibtexparser/library.py b/bibtexparser/library.py index c19e144..fd50471 100644 --- a/bibtexparser/library.py +++ b/bibtexparser/library.py @@ -80,16 +80,30 @@ def _find_duplicate_keys(self, blocks: List[Block]) -> List[str]: seen_string_keys.add(block.key) return duplicate_keys + def _block_index(self, block: Block) -> int: + """Index of a block in the library, preferring identity over equality. + + :param block: Block to look up. + :raises ValueError: If block is not in library.""" + for i, b in enumerate(self._blocks): + if b is block: + return i + # No identity match; fall back to equality (raises ValueError if not found). + return self._blocks.index(block) + def remove(self, blocks: Union[List[Block], Block]): """Remove blocks from library. + If equal duplicate blocks exist in the library, the exact (identical) + instance is removed, if present; otherwise the first equal block. + :param blocks: Block or list of blocks to remove. :raises ValueError: If block is not in library.""" if isinstance(blocks, Block): blocks = [blocks] for block in blocks: - self._blocks.remove(block) + del self._blocks[self._block_index(block)] if isinstance(block, Entry): del self._entries_by_key[block.key] elif isinstance(block, String): @@ -98,6 +112,9 @@ def remove(self, blocks: Union[List[Block], Block]): def replace(self, old_block: Block, new_block: Block, fail_on_duplicate_key: bool = True): """Replace a block with another block, at the same position. + If equal duplicate blocks exist in the library, the exact (identical) + instance is replaced, if present; otherwise the first equal block. + :param old_block: Block to replace. :param new_block: Block to replace with. :param fail_on_duplicate_key: If False, adds a DuplicateKeyBlock if @@ -105,7 +122,7 @@ def replace(self, old_block: Block, new_block: Block, fail_on_duplicate_key: boo :raises ValueError: If old_block is not in library or if fail_on_duplicate_key is True and a block with new_block.key (other than old_block) already exists.""" try: - index = self._blocks.index(old_block) + index = self._block_index(old_block) self.remove(old_block) except ValueError: raise ValueError("Block to replace is not in library.") diff --git a/bibtexparser/model.py b/bibtexparser/model.py index 3e93f63..2434d50 100644 --- a/bibtexparser/model.py +++ b/bibtexparser/model.py @@ -86,6 +86,12 @@ def __eq__(self, other: object) -> bool: and self.__dict__ == other.__dict__ ) + def __hash__(self) -> int: + # Hash on a stable subset of the attributes compared in `__eq__` + # (equal blocks thus have equal hashes). Mutable or potentially + # unhashable attributes (e.g. fields, parser_metadata) are excluded. + return hash((type(self), self._start_line_in_file, self._raw)) + class String(Block): """Bibtex Blocks of the ``@string`` type, e.g. ``@string{me = "My Name"}``.""" @@ -289,6 +295,12 @@ def __eq__(self, other: object) -> bool: and self.__dict__ == other.__dict__ ) + def __hash__(self) -> int: + # Hash on a stable subset of the attributes compared in `__eq__` + # (equal fields thus have equal hashes). The value is excluded as it + # may be mutable or unhashable (e.g. a list after middleware was applied). + return hash((type(self), self._start_line, self._key)) + def __str__(self) -> str: return f"Field (line: {self.start_line}, key: `{self.key}`): `{self.value}`" diff --git a/tests/test_library.py b/tests/test_library.py index 9b515d0..e0668d0 100644 --- a/tests/test_library.py +++ b/tests/test_library.py @@ -3,6 +3,7 @@ from bibtexparser import Library from bibtexparser.model import Entry from bibtexparser.model import Field +from bibtexparser.model import ImplicitComment def get_dummy_entry(): @@ -94,3 +95,33 @@ def test_constructor_fails_on_duplicate_by_default(): library = Library(blocks=[get_dummy_entry(), get_dummy_entry()], fail_on_duplicate_key=False) assert len(library.blocks) == 2 assert len(library.failed_blocks) == 1 + + +def test_remove_prefers_identical_instance_over_equal_block(): + """With two equal blocks, remove() must remove the passed instance. See issue 537.""" + first_comment = ImplicitComment("#same") + second_comment = ImplicitComment("#same") + assert first_comment == second_comment + + library = Library() + library.add([first_comment, second_comment]) + + library.remove(second_comment) + assert len(library.blocks) == 1 + assert library.blocks[0] is first_comment + + +def test_replace_prefers_identical_instance_over_equal_block(): + """With two equal blocks, replace() must replace the passed instance. See issue 537.""" + first_comment = ImplicitComment("#same") + second_comment = ImplicitComment("#same") + assert first_comment == second_comment + + library = Library() + library.add([first_comment, second_comment]) + + new_comment = ImplicitComment("#new") + library.replace(second_comment, new_comment) + assert len(library.blocks) == 2 + assert library.blocks[0] is first_comment + assert library.blocks[1] is new_comment diff --git a/tests/test_model.py b/tests/test_model.py index 497fce4..ece3d51 100644 --- a/tests/test_model.py +++ b/tests/test_model.py @@ -368,6 +368,37 @@ def test_string_enclosing_validation(): String("myKey", "myValue", 1, "raw", enclosing="invalid") +def test_entry_hash(): + # Equal entries have equal hashes + entry_1 = Entry("article", "key", [Field("field", "value", 1)], 1, "raw") + entry_2 = Entry("article", "key", [Field("field", "value", 1)], 1, "raw") + assert hash(entry_1) == hash(entry_2) + # Entries are usable in sets and dicts + assert len({entry_1, deepcopy(entry_1)}) == 1 + assert {entry_1: "value"}[entry_2] == "value" + + +def test_string_hash(): + # Equal strings have equal hashes + string_1 = String("key", "value", 1, "raw") + string_2 = String("key", "value", 1, "raw") + assert hash(string_1) == hash(string_2) + # Strings are usable in sets and dicts + assert len({string_1, deepcopy(string_1)}) == 1 + assert {string_1: "value"}[string_2] == "value" + + +def test_field_hash(): + # Equal fields have equal hashes + field_1 = Field("field", "value", 1) + field_2 = Field("field", "value", 1) + assert hash(field_1) == hash(field_2) + # Fields are usable in sets and dicts, even with unhashable values + field_1.value = ["some", "unhashable", "value"] + assert len({field_1, deepcopy(field_1)}) == 1 + assert {field_1: "value"}[deepcopy(field_1)] == "value" + + def test_entry_fields_shorthand(): entry = Entry( entry_type="article",