mirror of
https://github.com/langgenius/dify.git
synced 2026-04-28 20:17:29 +08:00
add qdrant to tidb
This commit is contained in:
parent
201de91665
commit
03e3208c71
128
api/commands.py
128
api/commands.py
@ -32,7 +32,7 @@ from libs.helper import email as email_validate
|
|||||||
from libs.password import hash_password, password_pattern, valid_password
|
from libs.password import hash_password, password_pattern, valid_password
|
||||||
from libs.rsa import generate_key_pair
|
from libs.rsa import generate_key_pair
|
||||||
from models import Tenant
|
from models import Tenant
|
||||||
from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding, DocumentSegment
|
from models.dataset import Dataset, DatasetCollectionBinding, DatasetMetadata, DatasetMetadataBinding
|
||||||
from models.dataset import Document as DatasetDocument
|
from models.dataset import Document as DatasetDocument
|
||||||
from models.model import Account, App, AppAnnotationSetting, AppMode, Conversation, MessageAnnotation, UploadFile
|
from models.model import Account, App, AppAnnotationSetting, AppMode, Conversation, MessageAnnotation, UploadFile
|
||||||
from models.oauth import DatasourceOauthParamConfig, DatasourceProvider
|
from models.oauth import DatasourceOauthParamConfig, DatasourceProvider
|
||||||
@ -175,54 +175,67 @@ def migrate_annotation_vector_database():
|
|||||||
create_count = 0
|
create_count = 0
|
||||||
skipped_count = 0
|
skipped_count = 0
|
||||||
total_count = 0
|
total_count = 0
|
||||||
page = 1
|
error_count = 0
|
||||||
|
per_page = 50
|
||||||
|
# Keyset pagination on AppAnnotationSetting (much smaller dataset than App table)
|
||||||
|
last_created_at = None
|
||||||
|
last_id = None
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
# get apps info
|
|
||||||
per_page = 50
|
|
||||||
with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
|
with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
|
||||||
apps = (
|
# Query AppAnnotationSetting directly instead of scanning all Apps
|
||||||
session.query(App)
|
query = (
|
||||||
|
session.query(AppAnnotationSetting, App, DatasetCollectionBinding)
|
||||||
|
.join(App, App.id == AppAnnotationSetting.app_id)
|
||||||
|
.join(
|
||||||
|
DatasetCollectionBinding,
|
||||||
|
DatasetCollectionBinding.id == AppAnnotationSetting.collection_binding_id,
|
||||||
|
)
|
||||||
.where(App.status == "normal")
|
.where(App.status == "normal")
|
||||||
.order_by(App.created_at.desc())
|
)
|
||||||
|
|
||||||
|
# Apply keyset pagination condition
|
||||||
|
if last_created_at is not None and last_id is not None:
|
||||||
|
query = query.where(
|
||||||
|
sa.or_(
|
||||||
|
AppAnnotationSetting.created_at < last_created_at,
|
||||||
|
sa.and_(
|
||||||
|
AppAnnotationSetting.created_at == last_created_at,
|
||||||
|
AppAnnotationSetting.id < last_id,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
results = (
|
||||||
|
query.order_by(AppAnnotationSetting.created_at.desc(), AppAnnotationSetting.id.desc())
|
||||||
.limit(per_page)
|
.limit(per_page)
|
||||||
.offset((page - 1) * per_page)
|
|
||||||
.all()
|
.all()
|
||||||
)
|
)
|
||||||
if not apps:
|
|
||||||
|
if not results:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Update cursor to the last record of current batch
|
||||||
|
last_created_at = results[-1][0].created_at
|
||||||
|
last_id = results[-1][0].id
|
||||||
except SQLAlchemyError:
|
except SQLAlchemyError:
|
||||||
raise
|
raise
|
||||||
|
|
||||||
page += 1
|
for app_annotation_setting, app, dataset_collection_binding in results:
|
||||||
for app in apps:
|
total_count += 1
|
||||||
total_count = total_count + 1
|
|
||||||
click.echo(
|
click.echo(
|
||||||
f"Processing the {total_count} app {app.id}. " + f"{create_count} created, {skipped_count} skipped."
|
f"Processing the {total_count} app {app.id}. {create_count} created, {skipped_count} skipped."
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
click.echo(f"Creating app annotation index: {app.id}")
|
click.echo(f"Creating app annotation index: {app.id}")
|
||||||
|
if not app_annotation_setting:
|
||||||
|
skipped_count += 1
|
||||||
|
continue
|
||||||
with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
|
with sessionmaker(db.engine, expire_on_commit=False).begin() as session:
|
||||||
app_annotation_setting = (
|
|
||||||
session.query(AppAnnotationSetting).where(AppAnnotationSetting.app_id == app.id).first()
|
|
||||||
)
|
|
||||||
|
|
||||||
if not app_annotation_setting:
|
|
||||||
skipped_count = skipped_count + 1
|
|
||||||
click.echo(f"App annotation setting disabled: {app.id}")
|
|
||||||
continue
|
|
||||||
# get dataset_collection_binding info
|
|
||||||
dataset_collection_binding = (
|
|
||||||
session.query(DatasetCollectionBinding)
|
|
||||||
.where(DatasetCollectionBinding.id == app_annotation_setting.collection_binding_id)
|
|
||||||
.first()
|
|
||||||
)
|
|
||||||
if not dataset_collection_binding:
|
|
||||||
click.echo(f"App annotation collection binding not found: {app.id}")
|
|
||||||
continue
|
|
||||||
annotations = session.scalars(
|
annotations = session.scalars(
|
||||||
select(MessageAnnotation).where(MessageAnnotation.app_id == app.id)
|
select(MessageAnnotation).where(MessageAnnotation.app_id == app.id)
|
||||||
).all()
|
).all()
|
||||||
|
|
||||||
dataset = Dataset(
|
dataset = Dataset(
|
||||||
id=app.id,
|
id=app.id,
|
||||||
tenant_id=app.tenant_id,
|
tenant_id=app.tenant_id,
|
||||||
@ -230,6 +243,12 @@ def migrate_annotation_vector_database():
|
|||||||
embedding_model_provider=dataset_collection_binding.provider_name,
|
embedding_model_provider=dataset_collection_binding.provider_name,
|
||||||
embedding_model=dataset_collection_binding.model_name,
|
embedding_model=dataset_collection_binding.model_name,
|
||||||
collection_binding_id=dataset_collection_binding.id,
|
collection_binding_id=dataset_collection_binding.id,
|
||||||
|
index_struct=json.dumps({
|
||||||
|
"type": VectorType.QDRANT,
|
||||||
|
"vector_store": {
|
||||||
|
"class_prefix": dataset_collection_binding.collection_name,
|
||||||
|
}
|
||||||
|
}),
|
||||||
)
|
)
|
||||||
documents = []
|
documents = []
|
||||||
if annotations:
|
if annotations:
|
||||||
@ -240,31 +259,34 @@ def migrate_annotation_vector_database():
|
|||||||
)
|
)
|
||||||
documents.append(document)
|
documents.append(document)
|
||||||
|
|
||||||
vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
|
original_vector = Vector(dataset, attributes=["doc_id", "annotation_id", "app_id"])
|
||||||
click.echo(f"Migrating annotations for app: {app.id}.")
|
click.echo(f"Migrating annotations for app: {app.id}.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
vector.delete()
|
original_annotarions = original_vector.search_by_metadata_field("app_id", app.id)
|
||||||
click.echo(click.style(f"Deleted vector index for app {app.id}.", fg="green"))
|
if original_annotarions:
|
||||||
except Exception as e:
|
new_dataset = Dataset(
|
||||||
click.echo(click.style(f"Failed to delete vector index for app {app.id}.", fg="red"))
|
id=app.id,
|
||||||
raise e
|
tenant_id=app.tenant_id,
|
||||||
if documents:
|
indexing_technique="high_quality",
|
||||||
try:
|
embedding_model_provider=dataset_collection_binding.provider_name,
|
||||||
click.echo(
|
embedding_model=dataset_collection_binding.model_name,
|
||||||
click.style(
|
collection_binding_id=dataset_collection_binding.id,
|
||||||
f"Creating vector index with {len(documents)} annotations for app {app.id}.",
|
|
||||||
fg="green",
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
vector.create(documents)
|
new_vector = Vector(new_dataset)
|
||||||
|
new_vector.create_with_vectors(original_annotarions)
|
||||||
click.echo(click.style(f"Created vector index for app {app.id}.", fg="green"))
|
click.echo(click.style(f"Created vector index for app {app.id}.", fg="green"))
|
||||||
except Exception as e:
|
else:
|
||||||
click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red"))
|
click.echo(click.style(f"No original annotations found for app {app.id}.", fg="green"))
|
||||||
raise e
|
skipped_count += 1
|
||||||
|
continue
|
||||||
|
except Exception as e:
|
||||||
|
click.echo(click.style(f"Failed to created vector index for app {app.id}.", fg="red"))
|
||||||
|
raise e
|
||||||
click.echo(f"Successfully migrated app annotation {app.id}.")
|
click.echo(f"Successfully migrated app annotation {app.id}.")
|
||||||
create_count += 1
|
create_count += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
error_count += 1
|
||||||
click.echo(
|
click.echo(
|
||||||
click.style(f"Error creating app annotation index: {e.__class__.__name__} {str(e)}", fg="red")
|
click.style(f"Error creating app annotation index: {e.__class__.__name__} {str(e)}", fg="red")
|
||||||
)
|
)
|
||||||
@ -272,7 +294,7 @@ def migrate_annotation_vector_database():
|
|||||||
|
|
||||||
click.echo(
|
click.echo(
|
||||||
click.style(
|
click.style(
|
||||||
f"Migration complete. Created {create_count} app annotation indexes. Skipped {skipped_count} apps.",
|
f"Migration complete. Created {create_count} app annotation indexes. Skipped {skipped_count} apps. Errors {error_count} apps.",
|
||||||
fg="green",
|
fg="green",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
@ -299,7 +321,7 @@ def migrate_knowledge_vector_database():
|
|||||||
VectorType.OPENGAUSS,
|
VectorType.OPENGAUSS,
|
||||||
VectorType.TABLESTORE,
|
VectorType.TABLESTORE,
|
||||||
VectorType.MATRIXONE,
|
VectorType.MATRIXONE,
|
||||||
VectorType.TIDB_ON_QDRANT
|
VectorType.TIDB_ON_QDRANT,
|
||||||
}
|
}
|
||||||
lower_collection_vector_types = {
|
lower_collection_vector_types = {
|
||||||
VectorType.ANALYTICDB,
|
VectorType.ANALYTICDB,
|
||||||
@ -391,8 +413,9 @@ def migrate_knowledge_vector_database():
|
|||||||
segments_count = 0
|
segments_count = 0
|
||||||
original_index_vector = Vector(dataset)
|
original_index_vector = Vector(dataset)
|
||||||
for dataset_document in dataset_documents:
|
for dataset_document in dataset_documents:
|
||||||
|
single_documents = original_index_vector.search_by_metadata_field(
|
||||||
single_documents = original_index_vector.search_by_metadata_field("document_id", dataset_document.id)
|
"document_id", dataset_document.id
|
||||||
|
)
|
||||||
|
|
||||||
if single_documents:
|
if single_documents:
|
||||||
documents.extend(single_documents)
|
documents.extend(single_documents)
|
||||||
@ -431,7 +454,8 @@ def migrate_knowledge_vector_database():
|
|||||||
|
|
||||||
click.echo(
|
click.echo(
|
||||||
click.style(
|
click.style(
|
||||||
f"Migration complete. Created {create_count} dataset indexes. Skipped {skipped_count} datasets. Errors {error_count} datasets.", fg="green"
|
f"Migration complete. Created {create_count} dataset indexes. Skipped {skipped_count} datasets. Errors {error_count} datasets.",
|
||||||
|
fg="green",
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user