From 8192e0925bf9562ac80fcceb95ad73a6e3baadd0 Mon Sep 17 00:00:00 2001 From: Dimitri Yatsenko Date: Fri, 27 Mar 2026 15:53:42 -0500 Subject: [PATCH] docs+test: document skip_duplicates behavior with secondary unique constraints MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Resolves #1049 — on PostgreSQL, skip_duplicates=True already enforces secondary unique constraints (ON CONFLICT (pk) DO NOTHING targets only the primary key). On MySQL, ON DUPLICATE KEY UPDATE catches all unique keys, silently skipping secondary violations too. Changes: - Update insert() docstring to document the backend difference. - Add integration tests covering: PK-only skip, secondary unique violation on PostgreSQL (raises), MySQL silent skip (documented asymmetry), composite unique indexes, batch inserts with mixed duplicates, and tables without secondary unique indexes. Co-Authored-By: Claude Opus 4.6 (1M context) --- src/datajoint/table.py | 8 +- tests/integration/test_skip_duplicates.py | 205 ++++++++++++++++++++++ 2 files changed, 212 insertions(+), 1 deletion(-) create mode 100644 tests/integration/test_skip_duplicates.py diff --git a/src/datajoint/table.py b/src/datajoint/table.py index 256fab6e9..028493fc0 100644 --- a/src/datajoint/table.py +++ b/src/datajoint/table.py @@ -745,7 +745,13 @@ def insert( replace : bool, optional If True, replaces the existing tuple. skip_duplicates : bool, optional - If True, silently skip duplicate inserts. + If True, silently skip rows with duplicate primary key values. + On **PostgreSQL**, secondary unique constraint violations still + raise an error even when ``skip_duplicates=True``, because the + generated ``ON CONFLICT (pk) DO NOTHING`` clause targets only + the primary key. On **MySQL**, ``ON DUPLICATE KEY UPDATE`` + catches all unique-key conflicts, so secondary unique violations + are also silently skipped. ignore_extra_fields : bool, optional If False (default), fields that are not in the heading raise error. allow_direct_insert : bool, optional diff --git a/tests/integration/test_skip_duplicates.py b/tests/integration/test_skip_duplicates.py new file mode 100644 index 000000000..132921cd5 --- /dev/null +++ b/tests/integration/test_skip_duplicates.py @@ -0,0 +1,205 @@ +""" +Tests for skip_duplicates behavior with secondary unique constraints. + +Verifies that skip_duplicates=True on PostgreSQL skips primary key +duplicates while still raising on secondary unique constraint violations. +Resolves #1049. +""" + +import time + +import pytest + +import datajoint as dj +from datajoint.errors import DuplicateError + + +@pytest.fixture(scope="function") +def schema_by_backend(connection_by_backend, db_creds_by_backend): + """Create a fresh schema per test, parameterized across backends.""" + backend = db_creds_by_backend["backend"] + test_id = str(int(time.time() * 1000))[-8:] + schema_name = f"djtest_skipdup_{backend}_{test_id}"[:64] + + if connection_by_backend.is_connected: + try: + connection_by_backend.query( + f"DROP DATABASE IF EXISTS {connection_by_backend.adapter.quote_identifier(schema_name)}" + ) + except Exception: + pass + + schema = dj.Schema(schema_name, connection=connection_by_backend) + yield schema + + if connection_by_backend.is_connected: + try: + connection_by_backend.query( + f"DROP DATABASE IF EXISTS {connection_by_backend.adapter.quote_identifier(schema_name)}" + ) + except Exception: + pass + + +def test_skip_duplicates_pk_match(schema_by_backend): + """skip_duplicates=True silently skips rows whose PK already exists.""" + + @schema_by_backend + class Item(dj.Manual): + definition = """ + item_id : int + --- + name : varchar(100) + email : varchar(100) + unique index (email) + """ + + Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com")) + + # Same PK, different values — should be silently skipped + Item.insert1( + dict(item_id=1, name="Bob", email="bob@example.com"), + skip_duplicates=True, + ) + + # Original row unchanged + row = (Item & "item_id=1").fetch1() + assert row["name"] == "Alice" + assert row["email"] == "alice@example.com" + + +def test_skip_duplicates_unique_violation_raises_on_postgres(schema_by_backend, db_creds_by_backend): + """On PostgreSQL, skip_duplicates=True still raises on secondary unique violations. + + Regression test for #1049: a row with a *new* PK but a *conflicting* + secondary unique index value must raise DuplicateError on PostgreSQL. + """ + if db_creds_by_backend["backend"] != "postgresql": + pytest.skip("PostgreSQL-specific: ON CONFLICT (pk) DO NOTHING preserves unique constraints") + + @schema_by_backend + class Item(dj.Manual): + definition = """ + item_id : int + --- + name : varchar(100) + email : varchar(100) + unique index (email) + """ + + Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com")) + + # New PK (2) but email conflicts with existing row (1) + with pytest.raises(DuplicateError): + Item.insert1( + dict(item_id=2, name="Bob", email="alice@example.com"), + skip_duplicates=True, + ) + + +def test_skip_duplicates_unique_on_mysql(schema_by_backend, db_creds_by_backend): + """On MySQL, skip_duplicates=True silently skips secondary unique conflicts. + + Documents the known MySQL asymmetry: ON DUPLICATE KEY UPDATE catches + all unique key conflicts, not just primary key. + """ + if db_creds_by_backend["backend"] != "mysql": + pytest.skip("MySQL-specific: ON DUPLICATE KEY UPDATE catches all unique keys") + + @schema_by_backend + class Item(dj.Manual): + definition = """ + item_id : int + --- + name : varchar(100) + email : varchar(100) + unique index (email) + """ + + Item.insert1(dict(item_id=1, name="Alice", email="alice@example.com")) + + # New PK (2) but email conflicts — MySQL silently skips + Item.insert1( + dict(item_id=2, name="Bob", email="alice@example.com"), + skip_duplicates=True, + ) + + # Only the original row exists + assert len(Item()) == 1 + assert (Item & "item_id=1").fetch1()["name"] == "Alice" + + +def test_skip_duplicates_no_unique_index(schema_by_backend): + """skip_duplicates=True works normally on tables without secondary unique indexes.""" + + @schema_by_backend + class Simple(dj.Manual): + definition = """ + item_id : int + --- + name : varchar(100) + """ + + Simple.insert1(dict(item_id=1, name="Alice")) + + # Same PK, different name — silently skipped + Simple.insert1(dict(item_id=1, name="Bob"), skip_duplicates=True) + assert (Simple & "item_id=1").fetch1()["name"] == "Alice" + + # New PK — inserted + Simple.insert1(dict(item_id=2, name="Bob"), skip_duplicates=True) + assert len(Simple()) == 2 + + +def test_skip_duplicates_composite_unique(schema_by_backend, db_creds_by_backend): + """skip_duplicates=True with a composite secondary unique index.""" + if db_creds_by_backend["backend"] != "postgresql": + pytest.skip("PostgreSQL-specific unique constraint enforcement") + + @schema_by_backend + class Record(dj.Manual): + definition = """ + record_id : int + --- + first_name : varchar(100) + last_name : varchar(100) + data : varchar(255) + unique index (first_name, last_name) + """ + + Record.insert1(dict(record_id=1, first_name="Alice", last_name="Smith", data="v1")) + + # New PK but composite unique (first_name, last_name) conflicts + with pytest.raises(DuplicateError): + Record.insert1( + dict(record_id=2, first_name="Alice", last_name="Smith", data="v2"), + skip_duplicates=True, + ) + + +def test_skip_duplicates_batch_mixed(schema_by_backend, db_creds_by_backend): + """Batch insert with skip_duplicates=True: PK duplicates skipped, unique conflicts raise.""" + if db_creds_by_backend["backend"] != "postgresql": + pytest.skip("PostgreSQL-specific unique constraint enforcement") + + @schema_by_backend + class Item(dj.Manual): + definition = """ + item_id : int + --- + email : varchar(100) + unique index (email) + """ + + Item.insert1(dict(item_id=1, email="alice@example.com")) + + # Batch: row 2 is new (OK), row 1 is PK dup (skip), row 3 conflicts on email + with pytest.raises(DuplicateError): + Item.insert( + [ + dict(item_id=2, email="bob@example.com"), + dict(item_id=1, email="duplicate-pk@example.com"), # PK dup — skipped + dict(item_id=3, email="alice@example.com"), # unique conflict — error + ], + skip_duplicates=True, + )