diff --git a/api/migrations/versions/2025_12_10_1126-8797680b6263_add_index_to_dataset_name.py b/api/migrations/versions/2025_12_10_1126-8797680b6263_add_index_to_dataset_name.py new file mode 100644 index 0000000000..095ea85857 --- /dev/null +++ b/api/migrations/versions/2025_12_10_1126-8797680b6263_add_index_to_dataset_name.py @@ -0,0 +1,31 @@ +"""add_index_to_dataset_name + +Revision ID: 8797680b6263 +Revises: d57accd375ae +Create Date: 2025-12-10 11:26:04.884783 + +""" +from alembic import op +import models as models +import sqlalchemy as sa +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = '8797680b6263' +down_revision = 'd57accd375ae' +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.create_index('dataset_tenant_name_idx', ['tenant_id', 'name'], unique=False) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + with op.batch_alter_table('datasets', schema=None) as batch_op: + batch_op.drop_index('dataset_tenant_name_idx') + # ### end Alembic commands ### diff --git a/api/models/dataset.py b/api/models/dataset.py index 445ac6086f..aadee8abdf 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -46,6 +46,7 @@ class Dataset(Base): __table_args__ = ( sa.PrimaryKeyConstraint("id", name="dataset_pkey"), sa.Index("dataset_tenant_idx", "tenant_id"), + sa.Index("dataset_tenant_name_idx", "tenant_id", "name"), adjusted_json_index("retrieval_model_idx", "retrieval_model"), ) diff --git a/api/services/dataset_service.py b/api/services/dataset_service.py index ac4b25c5dc..e281f3c44e 100644 --- a/api/services/dataset_service.py +++ b/api/services/dataset_service.py @@ -2374,7 +2374,15 @@ class DocumentService: cut_length = 18 cut_name = documents[0].name[:cut_length] - dataset.name = cut_name + "..." + proposed_name = cut_name + "..." + + # Check for duplicate names and auto-increment if needed + existing_dataset_names = db.session.scalars( + select(Dataset.name).where(Dataset.tenant_id == tenant_id, Dataset.name.like(f"{proposed_name}%")) + ).all() + if existing_dataset_names: + proposed_name = generate_incremental_name(existing_dataset_names, proposed_name) + dataset.name = proposed_name dataset.description = "useful for when you want to answer queries about the " + documents[0].name db.session.commit()