mirror of
https://github.com/langgenius/dify.git
synced 2026-05-09 04:36:31 +08:00
Signed-off-by: EvanYao826 <2869018789@qq.com>
This commit is contained in:
parent
af754f497a
commit
4d80892d7b
@ -842,24 +842,24 @@ class WorkflowResponseConverter:
|
||||
return []
|
||||
|
||||
files: list[Mapping[str, Any]] = []
|
||||
if isinstance(value, FileSegment):
|
||||
files.append(value.value.to_dict())
|
||||
elif isinstance(value, ArrayFileSegment):
|
||||
files.extend([i.to_dict() for i in value.value])
|
||||
elif isinstance(value, File):
|
||||
files.append(value.to_dict())
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
file = cls._get_file_var_from_value(item)
|
||||
match value:
|
||||
case FileSegment():
|
||||
files.append(value.value.to_dict())
|
||||
case ArrayFileSegment():
|
||||
files.extend([i.to_dict() for i in value.value])
|
||||
case File():
|
||||
files.append(value.to_dict())
|
||||
case list():
|
||||
for item in value:
|
||||
file = cls._get_file_var_from_value(item)
|
||||
if file:
|
||||
files.append(file)
|
||||
case dict():
|
||||
file = cls._get_file_var_from_value(value)
|
||||
if file:
|
||||
files.append(file)
|
||||
elif isinstance(
|
||||
value,
|
||||
dict,
|
||||
):
|
||||
file = cls._get_file_var_from_value(value)
|
||||
if file:
|
||||
files.append(file)
|
||||
case _:
|
||||
pass
|
||||
|
||||
return files
|
||||
|
||||
|
||||
@ -53,24 +53,27 @@ class PromptMessageUtil:
|
||||
files = []
|
||||
if isinstance(prompt_message.content, list):
|
||||
for content in prompt_message.content:
|
||||
if isinstance(content, TextPromptMessageContent):
|
||||
text += content.data
|
||||
elif isinstance(content, ImagePromptMessageContent):
|
||||
files.append(
|
||||
{
|
||||
"type": "image",
|
||||
"data": content.data[:10] + "...[TRUNCATED]..." + content.data[-10:],
|
||||
"detail": content.detail.value,
|
||||
}
|
||||
)
|
||||
elif isinstance(content, AudioPromptMessageContent):
|
||||
files.append(
|
||||
{
|
||||
"type": "audio",
|
||||
"data": content.data[:10] + "...[TRUNCATED]..." + content.data[-10:],
|
||||
"format": content.format,
|
||||
}
|
||||
)
|
||||
match content:
|
||||
case TextPromptMessageContent():
|
||||
text += content.data
|
||||
case ImagePromptMessageContent():
|
||||
files.append(
|
||||
{
|
||||
"type": "image",
|
||||
"data": content.data[:10] + "...[TRUNCATED]..." + content.data[-10:],
|
||||
"detail": content.detail.value,
|
||||
}
|
||||
)
|
||||
case AudioPromptMessageContent():
|
||||
files.append(
|
||||
{
|
||||
"type": "audio",
|
||||
"data": content.data[:10] + "...[TRUNCATED]..." + content.data[-10:],
|
||||
"format": content.format,
|
||||
}
|
||||
)
|
||||
case _:
|
||||
continue
|
||||
else:
|
||||
text = cast(str, prompt_message.content)
|
||||
|
||||
|
||||
@ -23,36 +23,37 @@ _TOOL_FILE_URL_PATTERN = re.compile(r"(?:^|/+)files/tools/(?P<tool_file_id>[^/?#
|
||||
|
||||
|
||||
def safe_json_value(v):
|
||||
if isinstance(v, datetime):
|
||||
tz_name = "UTC"
|
||||
if isinstance(current_user, Account) and current_user.timezone is not None:
|
||||
tz_name = current_user.timezone
|
||||
return v.astimezone(pytz.timezone(tz_name)).isoformat()
|
||||
elif isinstance(v, date):
|
||||
return v.isoformat()
|
||||
elif isinstance(v, UUID):
|
||||
return str(v)
|
||||
elif isinstance(v, Decimal):
|
||||
return float(v)
|
||||
elif isinstance(v, bytes):
|
||||
try:
|
||||
return v.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return v.hex()
|
||||
elif isinstance(v, memoryview):
|
||||
return v.tobytes().hex()
|
||||
elif isinstance(v, np.integer):
|
||||
return int(v)
|
||||
elif isinstance(v, np.floating):
|
||||
return float(v)
|
||||
elif isinstance(v, np.ndarray):
|
||||
return v.tolist()
|
||||
elif isinstance(v, dict):
|
||||
return safe_json_dict(v)
|
||||
elif isinstance(v, list | tuple | set):
|
||||
return [safe_json_value(i) for i in v]
|
||||
else:
|
||||
return v
|
||||
match v:
|
||||
case datetime():
|
||||
tz_name = "UTC"
|
||||
if isinstance(current_user, Account) and current_user.timezone is not None:
|
||||
tz_name = current_user.timezone
|
||||
return v.astimezone(pytz.timezone(tz_name)).isoformat()
|
||||
case date():
|
||||
return v.isoformat()
|
||||
case UUID():
|
||||
return str(v)
|
||||
case Decimal():
|
||||
return float(v)
|
||||
case bytes():
|
||||
try:
|
||||
return v.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
return v.hex()
|
||||
case memoryview():
|
||||
return v.tobytes().hex()
|
||||
case np.integer():
|
||||
return int(v)
|
||||
case np.floating():
|
||||
return float(v)
|
||||
case np.ndarray():
|
||||
return v.tolist()
|
||||
case dict():
|
||||
return safe_json_dict(v)
|
||||
case list() | tuple() | set():
|
||||
return [safe_json_value(i) for i in v]
|
||||
case _:
|
||||
return v
|
||||
|
||||
|
||||
def safe_json_dict(d: dict[str, Any]):
|
||||
|
||||
@ -194,14 +194,15 @@ class VariableTruncator(BaseTruncator):
|
||||
|
||||
result: _PartResult[Any]
|
||||
# Apply type-specific truncation with target size
|
||||
if isinstance(segment, ArraySegment):
|
||||
result = self._truncate_array(segment.value, target_size)
|
||||
elif isinstance(segment, StringSegment):
|
||||
result = self._truncate_string(segment.value, target_size)
|
||||
elif isinstance(segment, ObjectSegment):
|
||||
result = self._truncate_object(segment.value, target_size)
|
||||
else:
|
||||
raise AssertionError("this should be unreachable.")
|
||||
match segment:
|
||||
case ArraySegment():
|
||||
result = self._truncate_array(segment.value, target_size)
|
||||
case StringSegment():
|
||||
result = self._truncate_string(segment.value, target_size)
|
||||
case ObjectSegment():
|
||||
result = self._truncate_object(segment.value, target_size)
|
||||
case _:
|
||||
raise AssertionError("this should be unreachable.")
|
||||
|
||||
return _PartResult(
|
||||
value=segment.model_copy(update={"value": result.value}),
|
||||
@ -219,40 +220,41 @@ class VariableTruncator(BaseTruncator):
|
||||
return VariableTruncator.calculate_json_size(value.model_dump(), depth=depth + 1)
|
||||
if depth > _MAX_DEPTH:
|
||||
raise MaxDepthExceededError()
|
||||
if isinstance(value, str):
|
||||
# Ideally, the size of strings should be calculated based on their utf-8 encoded length.
|
||||
# However, this adds complexity as we would need to compute encoded sizes consistently
|
||||
# throughout the code. Therefore, we approximate the size using the string's length.
|
||||
# Rough estimate: number of characters, plus 2 for quotes
|
||||
return len(value) + 2
|
||||
elif isinstance(value, (int, float)):
|
||||
return len(str(value))
|
||||
elif isinstance(value, bool):
|
||||
return 4 if value else 5 # "true" or "false"
|
||||
elif value is None:
|
||||
return 4 # "null"
|
||||
elif isinstance(value, list):
|
||||
# Size = sum of elements + separators + brackets
|
||||
total = 2 # "[]"
|
||||
for i, item in enumerate(value):
|
||||
if i > 0:
|
||||
total += 1 # ","
|
||||
total += VariableTruncator.calculate_json_size(item, depth=depth + 1)
|
||||
return total
|
||||
elif isinstance(value, dict):
|
||||
# Size = sum of keys + values + separators + brackets
|
||||
total = 2 # "{}"
|
||||
for index, key in enumerate(value.keys()):
|
||||
if index > 0:
|
||||
total += 1 # ","
|
||||
total += VariableTruncator.calculate_json_size(str(key), depth=depth + 1) # Key as string
|
||||
total += 1 # ":"
|
||||
total += VariableTruncator.calculate_json_size(value[key], depth=depth + 1)
|
||||
return total
|
||||
elif isinstance(value, File):
|
||||
return VariableTruncator.calculate_json_size(value.model_dump(), depth=depth + 1)
|
||||
else:
|
||||
raise UnknownTypeError(f"got unknown type {type(value)}")
|
||||
match value:
|
||||
case str():
|
||||
# Ideally, the size of strings should be calculated based on their utf-8 encoded length.
|
||||
# However, this adds complexity as we would need to compute encoded sizes consistently
|
||||
# throughout the code. Therefore, we approximate the size using the string's length.
|
||||
# Rough estimate: number of characters, plus 2 for quotes
|
||||
return len(value) + 2
|
||||
case bool():
|
||||
return 4 if value else 5 # "true" or "false"
|
||||
case int() | float():
|
||||
return len(str(value))
|
||||
case None:
|
||||
return 4 # "null"
|
||||
case list():
|
||||
# Size = sum of elements + separators + brackets
|
||||
total = 2 # "[]"
|
||||
for i, item in enumerate(value):
|
||||
if i > 0:
|
||||
total += 1 # ","
|
||||
total += VariableTruncator.calculate_json_size(item, depth=depth + 1)
|
||||
return total
|
||||
case dict():
|
||||
# Size = sum of keys + values + separators + brackets
|
||||
total = 2 # "{}"
|
||||
for index, key in enumerate(value.keys()):
|
||||
if index > 0:
|
||||
total += 1 # ","
|
||||
total += VariableTruncator.calculate_json_size(str(key), depth=depth + 1) # Key as string
|
||||
total += 1 # ":"
|
||||
total += VariableTruncator.calculate_json_size(value[key], depth=depth + 1)
|
||||
return total
|
||||
case File():
|
||||
return VariableTruncator.calculate_json_size(value.model_dump(), depth=depth + 1)
|
||||
case _:
|
||||
raise UnknownTypeError(f"got unknown type {type(value)}")
|
||||
|
||||
def _truncate_string(self, value: str, target_size: int) -> _PartResult[str]:
|
||||
if (size := self.calculate_json_size(value)) < target_size:
|
||||
@ -419,22 +421,23 @@ class VariableTruncator(BaseTruncator):
|
||||
target_size: int,
|
||||
) -> _PartResult[Any]:
|
||||
"""Truncate a value within an object to fit within budget."""
|
||||
if isinstance(val, UpdatedVariable):
|
||||
# TODO(Workflow): push UpdatedVariable normalization closer to its producer.
|
||||
return self._truncate_object(val.model_dump(), target_size)
|
||||
elif isinstance(val, str):
|
||||
return self._truncate_string(val, target_size)
|
||||
elif isinstance(val, list):
|
||||
return self._truncate_array(val, target_size)
|
||||
elif isinstance(val, dict):
|
||||
return self._truncate_object(val, target_size)
|
||||
elif isinstance(val, File):
|
||||
# File objects should not be truncated, return as-is
|
||||
return _PartResult(val, self.calculate_json_size(val), False)
|
||||
elif val is None or isinstance(val, (bool, int, float)):
|
||||
return _PartResult(val, self.calculate_json_size(val), False)
|
||||
else:
|
||||
raise AssertionError("this statement should be unreachable.")
|
||||
match val:
|
||||
case UpdatedVariable():
|
||||
# TODO(Workflow): push UpdatedVariable normalization closer to its producer.
|
||||
return self._truncate_object(val.model_dump(), target_size)
|
||||
case str():
|
||||
return self._truncate_string(val, target_size)
|
||||
case list():
|
||||
return self._truncate_array(val, target_size)
|
||||
case dict():
|
||||
return self._truncate_object(val, target_size)
|
||||
case File():
|
||||
# File objects should not be truncated, return as-is
|
||||
return _PartResult(val, self.calculate_json_size(val), False)
|
||||
case None | bool() | int() | float():
|
||||
return _PartResult(val, self.calculate_json_size(val), False)
|
||||
case _:
|
||||
raise AssertionError("this statement should be unreachable.")
|
||||
|
||||
|
||||
class DummyVariableTruncator(BaseTruncator):
|
||||
|
||||
Loading…
Reference in New Issue
Block a user