diff --git a/api/constants/pipeline_templates.json b/api/constants/pipeline_templates.json index 32b42769e3..ac63ac39d2 100644 --- a/api/constants/pipeline_templates.json +++ b/api/constants/pipeline_templates.json @@ -50,6 +50,22 @@ "chunk_structure": "qa_model", "language": "en-US" }, + { + "id": "103825d3-7018-43ae-bcf0-f3c001f3eb69", + "name": "Contextual Enrichment Using LLM", + "description": "This knowledge pipeline uses LLMs to extract content from images and tables in documents and automatically generate descriptive annotations for contextual enrichment.", + "icon": { + "icon_type": "image", + "icon": "e642577f-da15-4c03-81b9-c9dec9189a3c", + "icon_background": null, + "icon_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ//gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss/XQ+FFPtRK1UmreriMJkz/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L/DbbY/uozqmjwOUSvvVtuN8+tKLa4/73GI1KDEAYek8x7vta/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7//u2m8e9VyweGIdQAPenLpD/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL/YOcjg/X1IrKyvd3mo313JQKAXQLgSEgBGO3v/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB/G8FZXLwh8k761gt0PCJ8/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA/EHwDoO9rY/0cJ7iIC+JEgSQUwHpB4/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm//EWkDqiiw1qR6W1TC7r11JlIurX/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9/aBROfCkQLT/Iugiwfp/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw/0wCy9WO595tiBVmLoviZBTBq/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE/v1MAjjI+rHcYgVZifz7mfo5pACsE/XRDycjlYUVhPvT1QV1dTmT/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP/n2k/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e/tRtuYtuPnd3he/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE/CGqZOfa5kAkOViENFy++A/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD/baSh8bDvA9zb1ZAe5N67J/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb/rQ2MzBxABG4ePMJAFhtC0o1o/VLo4/EYCD4GM5bEMYtYJi/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF//9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O/LoZClX/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT/l2N6O94WMl03iLx6QtwR/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3/S/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN/vrq09CsfVAyB6JrRE/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0/D18PHAwHETdfX1x5SI/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq/Y8fTrFGENESMBQ/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd/E0A2Hh31YSYwnYlgHx/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf/6po5x6m7bEJa1q2JnURg/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII=" + }, + "copyright": "Copyright 2023 Dify", + "privacy_policy": "https://dify.ai\n", + "position": 4, + "chunk_structure": "hierarchical_model", + "language": "en-US" + }, { "id": "982d1788-837a-40c8-b7de-d37b09a9b2bc", "name": "Convert to Markdown", @@ -81,6 +97,22 @@ "position": 6, "chunk_structure": "qa_model", "language": "en-US" + }, + { + "id": "629cb5b8-490a-48bc-808b-ffc13085cb4f", + "name": "Complex PDF with Images & Tables", + "description": "This Knowledge Pipeline extracts images and tables from complex PDF documents for downstream processing.", + "icon": { + "icon_type": "image", + "icon": "87426868-91d6-4774-a535-5fd4595a77b3", + "icon_background": null, + "icon_url": "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7/aIw93xPvBBHPDezBHYBbC7+O2Pb9++/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp/uehbXzPWuizmNoFaC4CQdFxCE3V9/bcd4vk8txpLwW/f6FPZ9RT8c/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU/u3//Uk/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y/HaZJH1oAgnyflHZAPfrrSieOJkS/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn/HUDChQgkHIqAvcg3ijM5/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq/myUJUxCV+5/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx/bjpDSDEp7EgYLQgjWR8GEywTcBHmz/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+//JlMVdOrOfzrKY8p3/C9/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s//8U+x9/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd/xN/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO/CBMxwDWP2TN5JyATMMAFRNJBw98t/Z7yU4xePCTg+dqk9Wf/6a/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8/Yb7ALxxH5+lmBn+nY7H3/g04/qFnRJDtvvSWO/faTcbIoxDOFaYLnLl/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa/+9P/tH9Oj9kGKAaCTI85gSCQTN/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy/MBU66HwmbXboI9qyZd160CiYBaLCww/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv/FYX+/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII=" + }, + "copyright": "Copyright 2023 Dify", + "privacy_policy": "https://dify.ai", + "position": 7, + "chunk_structure": "hierarchical_model", + "language": "en-US" } ] }, @@ -5153,7 +5185,7 @@ "language": "zh-Hans", "position": 5 }, - { + "103825d3-7018-43ae-bcf0-f3c001f3eb69": { "chunk_structure": "hierarchical_model", "description": "This knowledge pipeline uses LLMs to extract content from images and tables in documents and automatically generate descriptive annotations for contextual enrichment.", "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/anthropic:0.2.0@a776815b091c81662b2b54295ef4b8a54b5533c2ec1c66c7c8f2feea724f3248\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: e642577f-da15-4c03-81b9-c9dec9189a3c\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAAP9UlEQVR4Ae2dTXPbxhnHdwFRr5ZN2b1kJraouk57i\/IJrJx6jDPT9Fpnkrvj3DOOv0DsXDvJxLk2nUnSW09hPkGc6aWdOBEtpZNLE9Gy3iiSQJ\/\/gg8DQnyFFiAAPjtDLbAA9uWPn5595VKrjLjtn\/YqrZaq+L6quL5X9pQqO1qtI3u+0mXy8MFJxfihP1qrss\/XQ+FFPtRK1UmreriMJkz\/GqaVX8N1z1dPHdyvnZpP1+fmVG3jhTVzDden6SjP6brt7b1y21VbWnk3CawKAbWp9Fmo0s3VbKamffWYgKz5vv+t1s5jt62qGxtrPVAnrUwqAH63u7dF\/4E3qaBbVCB8zjjHcZRDJs91XaXJpOGDMDgSx5zj2HWDMByz4\/v5fBZ80lLhE3Y498jcsfO8Nt1DlYbvmXs9L\/DbbY\/uozqmjwOUSvvVtuN8+tKLa4\/73GI1KDEAYek8x7vta\/0a5XiLcw1Y5uZcAxpgK5VKXeD4HvHTUaDdbivA2Go1yW+rZrPVkzDBUSOk7\/\/u2m8e9VyweGIdQAPenLpD\/3LvcLsM0C0szBNs8wY+nIvLpgKA8PS0YWBkKwkQyUo8un517b7tXFsl4cnO\/25p33lA7YoKMloqzanFxSXj2864xJe8Ao3GaRdGpAYQbVtEKwCS1au0Xf8TyuMWMirgQYXiOFjFw8PDcLvxC7ek79roSZ8bwO3dvTue77+P6hZV69LSElm9heKoLyXpKgCLeHx8zCBSb9m7e972YWwATVvPVfeoL\/YOcjg\/X1IrKyvd3mo313JQKAXQLgSEgBGO3v\/DG9eu3I1byFgAosr1HP9zauttitWLK32+nzs5aRgQMfSDoRtnXr8ep0qeGMAOfF+ho4FxuosXV7vjdfmWVHI\/qQKwhvv7z02VTCDVnJJ+dVIIJwIwDB\/G8FZXLwh8k761gt0PCJ8\/PzDjiHEgHBvAKHywfDKeVzCaYhYH1TAsIQazJ4VwLAAFvphvZoYeiwvh2YnVPqJ1OhwVVLti+foIJEGmNgQbYISG5Creqf85Ga7yKGlGAvj9zh5mNjbR4UCbT6rdUZLO7nWwwf0CMNNyvXuj1BhaBdPU2m2lnE8Q8aVLF6XDMUpNuW4UQMfk2bN9swKHqua7N9avPBwkzUAATbvP9b\/BDMfy8rLMbgxSUML7KoBxwqOjI1yr07TdK4OGZwZWwTS3+wDwYRWLTK311VgChygAZjA7Rq7cbpp1An3v7gtgUPWqW2j3YW5XnCgQR4HQ1OzWk529W\/3i6AsgLakyjUfAx6uS+z0sYaLAMAXQd2ADRt9PedCvV3wGwO939+7xNBuqX3GiwHkUQFWM5XnUnKu0HM8sXAnHdwZA+grVbdwA8ylOFLChABYlw5FFvBO1gj0Aou0H6wdi8REnCthQIMRTmazg7XCcPQBy229+XhaUhkWS4\/MrELKC+JJa13UB3P5xb1Pafl1d5MCyArCC6JSQ28LXdDn6LoD09bzbCJSql6UR37YC3U6t521x3F0AtaNvIlCqX5ZGfNsK4Gu5cGQJDWs4NgCiZ0JLujYRIBYQKohLQgFsSMDVMPeGDYBtt72FBAW+JGSXOFkBwAcI4bA\/EHwDoO9rY\/0cJ7iIC+JEgSQUwHpB4\/ygHWgAJDJfRiD2aREnCiSpAANodkajhDoAqgoS7bfzFMLFiQK2FGAjR7WxMXqdKjjogDCdthKTeESBqAKdTgiCK\/jjUG8kOOjsxYdAcaJAUgoAQF5hhV1xndacVL9JiS3x9leArSC2ZHa03y7jNg7s\/4iEigL2FOChGGIPAOoKosY2uOJEgTQUYGNHw39lB7vRI1HszyxOFEhDAQaQ0io7fqc3EgpMIw+SxgwrwJ0QRzvr3XpXAJxhIqZYdKp59TrSl2m4Kb6FGUuajR3trLvWtYAzpoEUd4oKcIeXhgQvCYBTfBGStFJzm\/\/EWkDqiiw1qR6W1TC7r11JlIurX\/6caPy5iJx+uUkd7SOrFYfgM8MwNBKYi7xLJoulgFTBxXqfuSuNAJi7V1asDM99+8fLpvYtly91VykUq4jDSzPtNpntNme0PLbjH67meFexf2C9Hmx8QMOAwVQcj82MF4XcJQrEVyDEmpmKk9Uw8bWUJ2Mo0ANgjOflEVHAmgLSCbEmpUQURwEBMI5q8ow1BQRAa1JKRHEUyAWAPx7Rj+I1afpGXOEUyAWAn+2cqI9\/aBROfCkQLT\/Iugiwfp\/tNtRH3x+LFcz6y4qRv8wDCOu3a6pgX6xgjBec9UcyDSBbPxZRrCArURw\/0wCy9WO595tiBVmLoviZBTBq\/VhwsYKsRDH8zAIYtX4st1hBVqIYfiYBHGT9WHKxgqxE\/v1MAjjI+rHcYgVZifz7mfo5pACsE\/XRDycjlYUVhPvT1QV1dTmT\/0cjyyA30LfisiBCFzwz2Ezf0BvD4ZkP\/n2k\/kbjhH++tiggjqFZFm+ZKoBxwIuKiPaigBhVJT\/n+snOL8bkXL68llqubYA3KLMvUnU8iUVM+zsU0fQGlaPw4Yd1U8RULWCS4PELE4vISuTDT7X1DgCxC8OlUvLJ\/pqWfOE+yyimagFRPb77h2VTRaLz8PfdU1po0Laqz8WSVm\/9dlG9fX1J4VhcthVIFUCWIgkQ8wqe7e\/tRtuYtuPnd3he\/5dfglpwKgBy5m2AmFfwWINZ96cKIIsfBfFjGohGG26YE\/CGqZOfa5kAkOViENFy++A\/wUwHX4v6b1Eb793fL0WD5TxnCiTfHY0hCOAa1oF4cdlVb9AUnLj8K3AuAD\/baSh8bDvA9zb1ZAe5N67J\/O8gbfIWHrsKBnjvfnPQLS+gsOlgBbEoIdoWFOtnU+XpxxXLAkbhA4i2LeEgKyjWb\/rQ2MzBxABG4ePMJAFhtC0o1o\/VLo4\/EYCD4GM5bEMYtYJi\/Vjp4vhjAzgKPpbENoRsBcX6scLF8sfqhIwLH0sDCOFsdEzYCvq0lausfGaFi+OPBHBS+FgamxDCCj4bMTPC6YqfLwWGAhgXPpbAFoSwgviIK54CA9uA54WPpbLdJuR4xS+GAn0BtAUfSyQQshLiRxU4A6Bt+DhBgZCVED+sQA+AScHHCQqErIT4rEAXwKTh4wQFQlZCfChgesH\/+G9DvfdDenswA0I4G+OEJiL5k1sFHAPfvw5TL4BYwtQlz2SCzntTgI+VEAhZidn1u23AaUkgEE5L+WykO3UAIYNAmA0YppGLTAAoEE7j1WcjzcwAKBBmA4i0c5EpAAXCtF\/\/9NPLHIAC4fShSDMHmQRQIEwTgemmlVkABcLpgpFW6pkGUCBMC4PppZN5AAXC6cGRRsq5AFAgTAOF6aSRGwAFwukAknSquQJQIEwah\/Tjzx2AAmH6kCSZYi4BFAiTRCLduHMLoECYLihJpUYA6uAna+j3O\/LoZClX\/t4afium4+oEoJ9rAFEQgZDfZz78MIB65a9PtinbFbV0USkn1zWyFfWT\/l2N6O94WMl03iLx6QtwR\/vIdU2Iy9vLK1h+BcCCvdC8FUcAzNsbK0J+u50QXcfvBX9FZdpaXV1VpdLQ3dqKUHQpQwYUaDZb6vnz58hJVSxgBl7ILGcBAJphmFDXeJb1kLKnrIDj+f4zpOmjayxOFEhBAc8LfiNaKy3DMCnoLUlEFOj2QSjcoZ2Xa7jueWIBoYO45BXg2tbzvaeY+zBtQM\/rzs8lnwNJYaYVCPU36k5bd+aClQA401SkWHiubbV2ao7Wbg1pt1pBwzDFfEhSM6oAW0Bfq7oz1wragBw4o5pIsVNUoN0O+htzc7QYYWNjrYa0YRYFwhTfwgwnxVXwxgtrnWEYX6zgDPOQatG5qad99RgJB1NxOjhpNpupZkYSmz0FeBCaKuGnKH0AoO+bE6Zz9mSREqelQKvV6iTlhy2gX0Uo09m5QzxRwLoC7XZnGk47vwLott0qUoIFlI6Idc0lwpACWIoF57ZVFb6pgqknjNmQKuCTahiyiEtCAYYPHZAOc502IKVG8H2NRE9PT5NIW+IUBYithlHBVwFrOAk6IebIqcITAKGCuCQUYAvoec4jjr8L4I2ra1UKNNUw38g3iS8KnFeBRqNhJjuw+uqljTXTAUGcXQBxon3\/S\/gnJ8fwxIkC1hTgmtVX+n440h4AHTKNRGgdFlCsYFgmOT6PAswTrN\/vrq09CsfVAyB6JrRE\/0PcIFYwLJMcn0eBw8Pg11iJrU+j8RCUvW57e6\/sOf43tFSmsry8pBYXF3tvkDNRYAIF0PY7PDxSsH7Xr13eiD7aYwFxEVbQ1\/oujo+PT2RgGkKIi6UAll2BIbho248jPAMgLlA9\/QV5pkd8cJD+j1lz5sTPtwJoxnWWXn0RbftxyfoCiItuW79JZpM6JE1qDwYU80PiiwKjFDg5aahG4xRVb90tBTVqv2cGAkhVcU35QZcZZpRXsfaLRMJEgbACQdUbDOVR1XsXC0\/D18PHAwHETdfX1x5SI\/BDzBFjLw+BMCydHPdTAIyAFbOohdgZVPXys2Qhh7tOr\/gr6hVvuq6rLl5cVVqPfGx4pHK1kAoAuv19GKo2TWqox9fXL78yqqBDLSAeRq\/Y8fTrFGENESMBQ\/eomOX6TCnQAx8NuTjz+vVxBBjblJElrND4ICxhRSzhONLOzj1n4CvpV4e1+8LKjA0gHopCeOHCBeW6I41oOD05LpgCaPMdHBwE1S4s3wTwQYqJAMQDYQgd2tgDG1sKhFBm9hx3ODDWRyBNDB8UmxhAPNSB8HN0TNAhWVpalCk7CDNDDuN8x8fHpj+ADgfafONWu2GZYgHIETx5+vND6hLfwfnCwjxBuCTWkMUpqI\/2HhYXnJ52vsJLQy2u57yPzmqcIp8LQCT4ZGfvtlb+A9raqIwqGdZwYWEhTl7kmYwr0GP1aIaDVrfcv7F+5eF5sn1uAJE4quS2qx7QlPMtnAPElZUV2fQcYhTAYT0f5nVDa0SrNL32ZpwqNyqHFQA5UmMNff8ehmoQhl335+fnxSKyQDnzo+ARLDVMrXUWq1gpjVUAOUffPf35fUfpvzCIsIgBjAtiFVmkDPpo3+Fruc3mqVlIgHM4gsQsVJ7znIdx23qDipsIgJxY1CJyOGDEYPYc7c\/lOPBdviR+SgoALnyw2gkzXPj02Zigqn39peOpR7bB42ImCiAnsv3j3iaNGVFnRd\/E0A2Hh31YSYwnYlgHx\/D5A0jZBdd7s8338T2z4DNA0bJibA4O+zCzBeOt93DOkPEWadHn6bxK931NL6Ha+aZkn1vsBfW+SXvxDoyJOixl6rBskUAYQ3yZxpAqg6AcGIlcsKMAtuXDzmjYnEo7VWyXkZSlG5Th1AEclJHtn\/YqtHFShYAsA0pPeWXawn8d91PDt0KecbiOIR8+h0\/G8kxY+HoRj+nF1cmg1c+UTQd7PVJ4nYbHzHXaf\/6po5x6m7bEJa1q2JnURg\/2TNoxAv4PoGedQHqhulIAAAAASUVORK5CYII=\n name: Contextual Enrichment Using LLM\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751336942081-source-1750400198569-target\n selected: false\n source: '1751336942081'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: llm\n targetType: tool\n id: 1758002850987-source-1751336942081-target\n source: '1758002850987'\n sourceHandle: source\n target: '1751336942081'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInIteration: false\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1756915693835-source-1758027159239-target\n source: '1756915693835'\n sourceHandle: source\n target: '1758027159239'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: llm\n id: 1758027159239-source-1758002850987-target\n source: '1758027159239'\n sourceHandle: source\n target: '1758002850987'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751336942081'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: false\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 474.7618603027596\n y: 282\n positionAbsolute:\n x: 474.7618603027596\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 458\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 5 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Text Input, Online Drive, Online Doc, and Web Crawler. Different\n types of Data Sources have different input and output types. The output\n of File Upload and Online Drive are files, while the output of Online Doc\n and WebCrawler are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 458\n id: '1751264451381'\n position:\n x: -893.2836123260277\n y: 378.2537898330178\n positionAbsolute:\n x: -893.2836123260277\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\"},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\".\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -704.0614991386192\n y: -73.30453110517956\n positionAbsolute:\n x: -704.0614991386192\n y: -73.30453110517956\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 304\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 304\n id: '1751266402561'\n position:\n x: -555.2228329530462\n y: 592.0458661166498\n positionAbsolute:\n x: -555.2228329530462\n y: 592.0458661166498\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 153.2996965006646\n y: 378.2537898330178\n positionAbsolute:\n x: 153.2996965006646\n y: 378.2537898330178\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 482.3389174180554\n y: 437.9839361130071\n positionAbsolute:\n x: 482.3389174180554\n y: 437.9839361130071\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1758002850987.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751336942081'\n position:\n x: 144.55897745117755\n y: 282\n positionAbsolute:\n x: 144.55897745117755\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 446\n selected: true\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"In\n this step, the LLM is responsible for enriching and reorganizing content,\n along with images and tables. The goal is to maintain the integrity of image\n URLs and tables while providing contextual descriptions and summaries to\n enhance understanding. The content should be structured into well-organized\n paragraphs, using double newlines to separate them. The LLM should enrich\n the document by adding relevant descriptions for images and extracting key\n insights from tables, ensuring the content remains easy to retrieve within\n a Retrieval-Augmented Generation (RAG) system. The final output should preserve\n the original structure, making it more accessible for knowledge retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 446\n id: '1753967810859'\n position:\n x: -176.67459682201036\n y: 405.2790698865377\n positionAbsolute:\n x: -176.67459682201036\n y: 405.2790698865377\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - pdf\n - doc\n - docx\n - pptx\n - ppt\n - jpg\n - png\n - jpeg\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File\n type: datasource\n height: 52\n id: '1756915693835'\n position:\n x: -893.2836123260277\n y: 282\n positionAbsolute:\n x: -893.2836123260277\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n context:\n enabled: false\n variable_selector: []\n model:\n completion_params:\n temperature: 0.7\n mode: chat\n name: claude-3-5-sonnet-20240620\n provider: langgenius\/anthropic\/anthropic\n prompt_template:\n - id: beb97761-d30d-4549-9b67-de1b8292e43d\n role: system\n text: \"You are an AI document assistant. \\nYour tasks are:\\nEnrich the content\\\n \\ contextually:\\nAdd meaningful descriptions for each image.\\nSummarize\\\n \\ key information from each table.\\nOutput the enriched content\u00a0with clear\\\n \\ annotations showing the\u00a0corresponding image and table positions, so\\\n \\ the text can later be aligned back into the original document. Preserve\\\n \\ any ![image] URLs from the input text.\\nYou will receive two inputs:\\n\\\n The file and text\u00a0(may contain images url and tables).\\nThe final output\\\n \\ should be a\u00a0single, enriched version of the original document with ![image]\\\n \\ url preserved.\\nGenerate output directly without saying words like:\\\n \\ Here's the enriched version of the original text with the image description\\\n \\ inserted.\"\n - id: f92ef0cd-03a7-48a7-80e8-bcdc965fb399\n role: user\n text: The file is {{#1756915693835.file#}} and the text are\u00a0{{#1758027159239.text#}}.\n selected: false\n title: LLM\n type: llm\n vision:\n configs:\n detail: high\n variable_selector:\n - '1756915693835'\n - file\n enabled: true\n height: 88\n id: '1758002850987'\n position:\n x: -176.67459682201036\n y: 282\n positionAbsolute:\n x: -176.67459682201036\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: The file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: The file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v1\u3068v2\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment v1 and v2) Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v1\u548cv2\u7248\u672c\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: (For local deployment v1 and v2) Parsing method, can be\n auto, ocr, or txt. Default is auto. If results are not satisfactory, try\n ocr\n max: null\n min: null\n name: parse_method\n options:\n - icon: ''\n label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - icon: ''\n label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - icon: ''\n label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable formula\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable formula\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API and local deployment v2) Whether to enable table\n recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API and local deployment v2) Whether to enable table\n recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API and local deployment v2) Whether to enable\n table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\u3068\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u3067\u306f\u8a00\u8a9e\u3092\u6307\u5b9a\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3059\uff09\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n pt_BR: '(For official API and local deployment v2) Specify document language,\n default ch, can be set to auto(local deployment need to specify the\n language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n zh_Hans: \uff08\u4ec5\u9650\u5b98\u65b9api\u548c\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff08\u672c\u5730\u90e8\u7f72\u9700\u8981\u6307\u5b9a\u660e\u786e\u7684\u8bed\u8a00\uff0c\u9ed8\u8ba4ch\uff09\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API and local deployment v2) Specify document\n language, default ch, can be set to auto(local deployment need to specify\n the language, default ch), other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/version3.x\/pipeline_usage\/OCR.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: pipeline\n form: form\n human_description:\n en_US: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528\uff09\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306fpipeline\n pt_BR: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c\uff09\u793a\u4f8b\uff1apipeline\u3001vlm-transformers\u3001vlm-sglang-engine\u3001vlm-sglang-client\uff0c\u9ed8\u8ba4\u503c\u4e3apipeline\n label:\n en_US: Backend type\n ja_JP: \u30d0\u30c3\u30af\u30a8\u30f3\u30c9\u30bf\u30a4\u30d7\n pt_BR: Backend type\n zh_Hans: \u89e3\u6790\u540e\u7aef\n llm_description: '(For local deployment v2) Example: pipeline, vlm-transformers,\n vlm-sglang-engine, vlm-sglang-client, default is pipeline'\n max: null\n min: null\n name: backend\n options:\n - icon: ''\n label:\n en_US: pipeline\n ja_JP: pipeline\n pt_BR: pipeline\n zh_Hans: pipeline\n value: pipeline\n - icon: ''\n label:\n en_US: vlm-transformers\n ja_JP: vlm-transformers\n pt_BR: vlm-transformers\n zh_Hans: vlm-transformers\n value: vlm-transformers\n - icon: ''\n label:\n en_US: vlm-sglang-engine\n ja_JP: vlm-sglang-engine\n pt_BR: vlm-sglang-engine\n zh_Hans: vlm-sglang-engine\n value: vlm-sglang-engine\n - icon: ''\n label:\n en_US: vlm-sglang-client\n ja_JP: vlm-sglang-client\n pt_BR: vlm-sglang-client\n zh_Hans: vlm-sglang-client\n value: vlm-sglang-client\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: ''\n form: form\n human_description:\n en_US: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8v2\u7528 \u89e3\u6790\u5f8c\u7aef\u304cvlm-sglang-client\u306e\u5834\u5408\uff09\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u306f\u7a7a\n pt_BR: '(For local deployment v2 when backend is vlm-sglang-client) Example:\n http:\/\/127.0.0.1:8000, default is empty'\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72v2\u7248\u672c \u89e3\u6790\u540e\u7aef\u4e3avlm-sglang-client\u65f6\uff09\u793a\u4f8b\uff1ahttp:\/\/127.0.0.1:8000\uff0c\u9ed8\u8ba4\u503c\u4e3a\u7a7a\n label:\n en_US: sglang-server url\n ja_JP: sglang-server\u30a2\u30c9\u30ec\u30b9\n pt_BR: sglang-server url\n zh_Hans: sglang-server\u5730\u5740\n llm_description: '(For local deployment v2 when backend is vlm-sglang-client)\n Example: http:\/\/127.0.0.1:8000, default is empty'\n max: null\n min: null\n name: sglang_server_url\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n backend: ''\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n parse_method: ''\n sglang_server_url: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: Parse File\n tool_configurations:\n backend:\n type: constant\n value: pipeline\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: true\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: mixed\n value: '[]'\n language:\n type: mixed\n value: auto\n parse_method:\n type: constant\n value: auto\n sglang_server_url:\n type: mixed\n value: ''\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1756915693835'\n - file\n type: tool\n height: 270\n id: '1758027159239'\n position:\n x: -544.9739996945534\n y: 282\n positionAbsolute:\n x: -544.9739996945534\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 679.9701291615181\n y: -191.49392257836791\n zoom: 0.8239704766223018\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: ''\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: ''\n type: checkbox\n unit: null\n variable: clean_2\n", @@ -6310,7 +6342,7 @@ "id": "103825d3-7018-43ae-bcf0-f3c001f3eb69", "name": "Contextual Enrichment Using LLM" }, -{ + "629cb5b8-490a-48bc-808b-ffc13085cb4f": { "chunk_structure": "hierarchical_model", "description": "This Knowledge Pipeline extracts images and tables from complex PDF documents for downstream processing.", "export_data": "dependencies:\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/jina:0.0.8@d3a6766fbb80890d73fea7ea04803f3e1702c6e6bd621aafb492b86222a193dd\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/parentchild_chunker:0.0.7@ee9c253e7942436b4de0318200af97d98d094262f3c1a56edbe29dcb01fbc158\n- current_identifier: null\n type: marketplace\n value:\n marketplace_plugin_unique_identifier: langgenius\/mineru:0.5.0@ca04f2dceb4107e3adf24839756954b7c5bcb7045d035dbab5821595541c093d\nkind: rag_pipeline\nrag_pipeline:\n description: ''\n icon: 87426868-91d6-4774-a535-5fd4595a77b3\n icon_background: null\n icon_type: image\n icon_url: data:image\/png;base64,iVBORw0KGgoAAAANSUhEUgAAAKAAAACgCAYAAACLz2ctAAAAAXNSR0IArs4c6QAAAERlWElmTU0AKgAAAAgAAYdpAAQAAAABAAAAGgAAAAAAA6ABAAMAAAABAAEAAKACAAQAAAABAAAAoKADAAQAAAABAAAAoAAAAACn7BmJAAARwElEQVR4Ae1dvXPcxhVfLMAP0RR1pL7MGVu8G7sXXdszotNYne1x6kgpktZSiiRNIrtMilgqnNZSb4\/lzm4i5i8w1TvDE+UZyZIlnihKOvIAbN5v7\/aIw93xPvBBHPDezBHYBbC7+O2Pb9++\/YAlMiIPHjwoO65btpQqK6VKVKySsqwV9fQpSliy6IcTubhYxrFTrJJqXe+Mz2+I8KgJoeh3IIRBTW1vt+MoXLWWlgRheo\/uqlmWVSVMa67jVJeXl6sHTx7dGb1HurK9uVnybHtNKXFBWAKEW1XCKvcrhb+tCdi+LBeX2ud80o3AaHipDUGkFErdJXJu2J63vliptAncnXr8MakQ8PH9+2tU9Av0omtCCZx3iZSSsLCE49j6iHPE+U+fCEnnCEOmTp\/uehbXzPWuizmNoFaC4CQdFxCE3V9\/bcd4vk8txpLwW\/f6FPZ9RT8c\/fZ9nSdESmGtK1veOvPGG3SerCRGQGg6V8rLxIwPg6QDUWzb1kTDcXrKaROu16v6T550RMuTJzvCHOhEYBS8PM8TIGmj4QrX9ejndiRG5Kj6lvj8zLlzNzsuxBiInYCaeI7zqeWrK8YuA+lmZqbF9PSUcIh0o2irUQCNEZeJTSoqXg0i4d7evial0ZIgopLWzdNvvvl53MDESsBfNrc+sqX6wth0juOIublZMUXHcSUqoOPmO6nPxYkXiFinn9GMIGLcGjEWApLWK7u2\/ZVpauMgniFAnICaNPN8TAIvaMXd3ZcHdqMlbjve1NXFSvSetIxaGU\/u3\/\/Uk\/aPIB+a1rm5Y+LEwnwkrRe1TPx8vAigBVssLYj51+Z0x5Dq+iNXNn58tLV1OWpOYxMQtt7jra0vqFd1HbYe7DsU8tjsTNQy8fMZRQB2PJQLjiQlS4mvwIEoxR2rCdZNrpTfUnd9FVrv2LHZxIiXRJMSBbCsP5sWXvX6nnj1qq5dPOQQ33D86Y\/HaZJH1oAgnyflHZAPfrrSieOJkS\/rlV3k8s1SS3eC6h4cABc82bizvfmgPComIxHQkA+9XPjwoI6bBRg1W74\/Dwig7sEBuNbIDCPFNDoJhyYgky8PlIn\/HUDChQgkHIqAvcg3ijM5\/tfmFLOEALgwLgmHIiANqX0bbHaZfFmq\/myUJUxCV+5\/S4qrNKh0AwnY7GY3OxwLx18baRhtUOZ8PV8IgITHiSOmY0KDE9cGveGhBHy0SY5GJa4gYe5wDIKSrwMB0zHBDCZw5+G9e1cOQ6YvAWH3kX2pnYzw8zVZfVhSfI0RaCIAroAzEJp6cu0w90xfApL6pEkFogSvN49uNIHlv8MjAD8hRsdISq7d+Krfkz0J2Gp6PwKT51pM7pcAxzMC\/RDQY8fNpnjtV5op1eu+ngSUUmnjEeTjprcXbBw3DALoO5imWJA516tX3EVAmt1yDS4XEK816DxMXnwPI9ATATTFmJ5H5lx5X8quDkkXAZXvX0ZK8\/NzPRPkSEZgVAQwKRlCq34+DWvBDgLC9oP2w\/yvKLOYdW78hxFoIQAuQQuSNNcJBZDpIKCx\/bjpDSDEp7EgYLQgjWR8GEywTcBHmz\/r9bls+wXh4fO4EIAWbDmn1x5v3l8z6bYJKKV3GZFTtEyShRFIAoHp5kxq4Ut\/zaTfJqAS8gIiufk10PAxbgRajmloQs01pK+n5KNn4kp7GxEnlwZOYMBtqUl4inlqGeckoywt5MfODbXajp7G7\/jeIrYB0RoQe7UAb+755oR1GX0NOKYlzZ6GGM5pAhIzVxFp074sLIxAkghg7x8I7VezhmPTBrSs8wiwBgQKLEkigLVEEIyM4Njs8iqLAtQNsdt9ElzLhGTJhskEIBNeCGxG9YLegaZpaaXXYlyzCcbqJhZGIEkEYAdCjAaUD2jiKSJ41gtQYEkaAd0RoYkuEOyKK2mMroyA3YrEOQsjkCQCRgs6dbcsaYtc7fizZFM1Jpkxp80IAAHTE7ZsVZbkgikjkptgoMCSBgJGAxL3SmiMmxqwZRymUQDOo9gIGAKCe9L0RgKRxUaH3z5xBExrS5xbaTv+9FSZxLPmDBiBTgSId9YKorLohO4sKofygoBRdp5Si20NmJeX4\/fIPgLG40JEPMEEzH595bqEtF7Ool4wLUWa0F7wr+\/\/JlMVdOrOfzrKY8p3\/C9\/FjMXL3ZcK2rADHrQHtPkiBa+dsOYdrmooCT93s\/\/8U+x9\/33SWczcelzE5xilYGEjY2NFHPMflZMwJTraOdvfxfuTz+lnGt2s3O8bb0URPheA+NxsZeU5\/N1Qqp2d8Wzq38SJ774l3DefrvzYgZDSazJ0V\/r3Hmu3xZTEHgoLuWKNyT0Hj5MOedsZBfo8OqhOCbgEdQLSLhDmrCIJOwg4BFgz1m2EAD5ikpCQwIHX9SGyJjWAydhM5jC5vFoSLhANqH9+uuZf8W4bHppNZd\/xN\/ryDyE2SugIWERm2MmYEb4aEgI27BIwgTMUG2DhDXqmBSJhEzADBEQRfHISV0kEjIBM0ZAQ0KMmBRBmIAZrWWMGWPsOO\/CBMxwDWP2TN5JyATMMAFRNJBw98t\/Z7yU4xePCTg+dqk9Wf\/6a\/Hy1q3U8kszIyZgmmhHyOvlzVu5JCETMAIp0n40jyRkAqbNooj55Y2ETMCIhDiKx0HCV19\/cxRZx54nEzB2SNNJ8MWXX+ZikRMTMB2+JJJLHnyE\/FmkRKhxkGh4nfDBFT4DAqwBmQdHigAT8Ejh58yZgMyBI0WAbcCY4Td7wcScbN\/kJt3GZA3Yt2r5QhoIMAHTQJnz6IsAE7AvNHwhDQSYgGmgzHn0RYAJ2BcavpAGAkzANFDmPPoiwATsCw1fSAOBifcDTrofLI1KznIerAGzXDsFKBsTsACVnOVXZAJmuXYKUDYmYAEqOcuvyATMcu0UoGxMwAJUcpZfkQmY5dopQNkmzg846nw7m77Fge9xzH7wgZhaPT+wSodN35qf1+kibef8eTHz3rsD0+51w7D59Xq2V9yk+UUnjoC9QD8sDhs+4odNfqZWV8U8fTQwjs3AsYsptlDTn96ivVt2iZDT770n5i79Lpb0D3unPF0rVBMMstT+8MdEPpUFQoLkSD8vi8bTIHqhCAhAQRR8KiupHemRPhaN53lLtTiJOfFN8CCbp7FxV9RJM+398EMbN5Bkl3YfxffaBkm\/9P2Hv2gSI2337t0uQmNLNeSD7wSPIv3yGyWNSbp34gk4CGx0PPCD3RfcY8\/Yb7ALxxH5+lmBn+nY7H3\/g04\/qFnRJDtvvSWO\/faTcbIoxDOFaYLnLl\/SnZBgrYI0ccnMxQ9Er68doTnmz7P2R7kwBAQE6KEGpUFNZ5wCLdubhPndYjcqfoUiYPj7vMHmMiqQ5nmQEK6eoKC5hz3I0o1AoQgI53EaArsybFvWY2zu03iHtPIoFAHRIw5KWCMGr0U9n363c2QEznCWbgQKRcB6wBUDKOTZs92IxBRjescmubjtTZPupB9z74YxFQQXDNwiQZm9eDEYjPU8PNznD2kDjjo2POl+w1wTEIa\/+9P\/tH9Oj9kGKAaCTI85gSCQTN\/TsL3JnZDeUE08AUfVGIAB5IC7hOXoESiUDQi4QT4MwYWbyLirIqzxwhox7vwmNb2J14CjAB\/ndKxB+aLpD8qwhJ90my74zsOc556Akmy9GXKJYK5euGc6DEDj3hMefkuyxz1uGbPw3MQTMKsao\/5N54dkZugfgKUbgcLZgN0QxB+DSQ7hYT5niOUA8Zck+yk6\/vZTXUpfedkv7QSUEMQLTvtCkWdoPcqwNmDWX9F\/8iSWIvq1Zzod1oCxwNlMBOTb6THbGlPBWHoj4FhC1JQQJaWUsCwKsYyFwCuy+fARwbD7Ze7Spdxov7GA6fEQuNaSmkOnNQowAQ0kQx4xJb9BEwwwHR\/T8sPEQzJoeln7dQPaQUB7cVGQ7hOytCCk5BY5DNc4Iy2GfMf\/+pdwchMXlidPxl9m3xfSniLWCTHxbpj40YmWIkY80OzyOpDhcGQCDofTwLtAvGOffKKJx8NuA+Fq38AEbEMx2glIBtfKFG3LgVEW5+239DjzaKkU826\/1QlRQtWsx1tbd8gIXFtYmBdTDvOxmJRI960brit2dmiNjCXWudeRLvacWwgBEBBuGKH8tm8mdAsHGYHkEJDkk9FjIgHfTHK5ccqMACHgeb7GgdwwVW6CmRLpI3AwEiIkWIgSeOQcZGEE0kCg3QtW6t6BDRhgZRqF4DyKi0DA3KtJy7eanRAmYHEZkfKb+8YGtKyqVI5VRf6uy\/MBU66HwmbXboI9qyZd160CiYBaLCww\/OLpIOC3+hvurFOVy5VKFdkikn2B6VRA0XMxBFxeXm66YSyhqgCFxuaKjg2\/f8IIuJ4x9dQGstKDv8qyaAM7UW40XDEzM51wEUZLPq41CKPlmp+7E5nPFwEe0wEhp989JKMd0Rb5YxA4YCdCLIxA\/AhgIgKEiKc1YHMkxLLWEelxTxgwsCSIgPG20PqjAwLanreOPKEBuSOSIPqcNLn7mhrQcE7bgIuVSo3mBa6TK2bN9T0xJbM7LzBrNk3WOJVlm9k0v9Td3QDngF2zCcaZUv\/FYX+\/gQMLIxA7Anv1fZ0m+Vo01xA4IKAv1xGxt9e8CecsjECcCLQ1oO\/fNOm2CXi68uY6pkhjRKR9o7mLj4xARASg2PRgB82+OlOp6A4IkmwTUKev1Hc4vnpZ10H+wwjEhUDdtKyW+DyYZgcBnaZqrEEDshYMwsTnURAAl9D7JduveubcuZvBtDoI2OyZqBu4gbVgECY+j4LA7u5L\/Ti5+G6F0+kgIC6SFrxOY8JVsLZe3wvfz2FGYCQEgrbf2crKZ+GHuwgILSh96ypufPmqzo7pMGIcHhoBLPMAh7SEbD+TSBcBceFU5dxt0yPefdFUn+YBPjICwyIAM05PvbLE7bDtZ9LoSUBcpGG539Ohtt9ocFNs0OLj0AjAfNvb1z7lmutN6Ra118N9CagnqvpKd5mhRnnVXC\/4OK4XAsGmV1ni6nJludrrPsT1JSAunq6sXKfJqjfgnMZeHkxCoMJyGALgCLgCzlCv90a\/ptekcSgBcZPt+59h8Bht+fPnL7hTYpDjYxcCIB040hzxUBtnKitXum4KRQwkIHrFru9\/DNeMR9O1nj0ndvM+MiEYOQjyPUMriSl95HD2\/OmPh0FlIAGRCOxBUq3vMwmHgbR493STb+r9w+y+IEJDERAP9CIh24RBKIt5Dg50ar7hyQfEhiYgbg6TkDsmQKW4YjocB83uaOQDciMREA8YEpqOybNnz9lPCGAKJvDzoe5Nh8PzRycfIBuZgHgIJDy9svKOcdG8ePlKYMCZm2Sgk28xPV3UOc7hanlB\/YNhbb4wOmMR0CRyamXlivKFHjGB1xtNMs+oNujk7witt13bERgdI6kJX12Fq6XSWt8xzhtHIiAyPFM5d5MWMr1DY8e3oY4xdoxC8nzCcaojm8+gLqFcjNbDPAHXn3oHAxVRS2xFTSD4\/KPNrctCqmuWsMqIx6772Gkhym4L4VVevCoOyPaXOPEC8TChwCgT+Peoxbt6FpNVYpJYCWjK9Hjz3mdKikuGiPgEmCbj7PTIn4KIE1BTvjwfo+AFmw5rw7EyEqYUwi1Bc3tjV\/jXozS3JrHgMRECmgzCGtHEg4y2Y2sySlsKx7bNpa5jFEC7EitAxLB46Q4EEWyf9gOCGwW7YuiNCQ5Ip7\/jQSz8bpeWasRNPFMViRLQZPJo8+dV2vjjsiXFBXorOu8WaEmbfvhkLEipj3SOD2oj3oh96hRtbN1ZbNyLX5HEECj8zo3Hj3UUrmMjSLl0sukqoXPEYWsMfY3s9Z5C9p3wsEZcruuVkj1vii8y9Vrb3NwsHRf2mpJqlVhzntAo9yMlXtN80d28slxcMqd87IHAKHhhWz7sjKY8bBZurT8X3npSmq5HUXVU6gTsV5AHmw\/KjnDLBEqJyFmm+0oEzop6+pQ6XQJhLdbiYonCJRPGkT43i3BHXPB6Ts9rhFUt\/G7+9nYVcWS94VrNWloSrd3PatgPnLCqusKpjuu3Q9pxyv8BVb3XBNS3Vn0AAAAASUVORK5CYII=\n name: Complex PDF with Images & Tables\nversion: 0.1.0\nworkflow:\n conversation_variables: []\n environment_variables: []\n features: {}\n graph:\n edges:\n - data:\n isInLoop: false\n sourceType: datasource\n targetType: tool\n id: 1750400203722-source-1751281136356-target\n selected: false\n source: '1750400203722'\n sourceHandle: source\n target: '1751281136356'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: knowledge-index\n id: 1751338398711-source-1750400198569-target\n selected: false\n source: '1751338398711'\n sourceHandle: source\n target: '1750400198569'\n targetHandle: target\n type: custom\n zIndex: 0\n - data:\n isInLoop: false\n sourceType: tool\n targetType: tool\n id: 1751281136356-source-1751338398711-target\n selected: false\n source: '1751281136356'\n sourceHandle: source\n target: '1751338398711'\n targetHandle: target\n type: custom\n zIndex: 0\n nodes:\n - data:\n chunk_structure: hierarchical_model\n embedding_model: jina-embeddings-v2-base-en\n embedding_model_provider: langgenius\/jina\/jina\n index_chunk_variable_selector:\n - '1751338398711'\n - result\n indexing_technique: high_quality\n keyword_number: 10\n retrieval_model:\n reranking_enable: true\n reranking_mode: reranking_model\n reranking_model:\n reranking_model_name: jina-reranker-v1-base-en\n reranking_provider_name: langgenius\/jina\/jina\n score_threshold: 0\n score_threshold_enabled: false\n search_method: hybrid_search\n top_k: 3\n weights: null\n selected: true\n title: Knowledge Base\n type: knowledge-index\n height: 114\n id: '1750400198569'\n position:\n x: 355.92518399555183\n y: 282\n positionAbsolute:\n x: 355.92518399555183\n y: 282\n selected: true\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n datasource_configurations: {}\n datasource_label: File\n datasource_name: upload-file\n datasource_parameters: {}\n fileExtensions:\n - txt\n - markdown\n - mdx\n - pdf\n - html\n - xlsx\n - xls\n - vtt\n - properties\n - doc\n - docx\n - csv\n - eml\n - msg\n - pptx\n - xml\n - epub\n - ppt\n - md\n plugin_id: langgenius\/file\n provider_name: file\n provider_type: local_file\n selected: false\n title: File Upload\n type: datasource\n height: 52\n id: '1750400203722'\n position:\n x: -579\n y: 282\n positionAbsolute:\n x: -579\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n author: TenTen\n desc: ''\n height: 337\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Currently\n we support 4 types of \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Data\n Sources\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\":\n File Upload, Online Drive, Online Doc, and Web Crawler. Different types\n of Data Sources have different input and output types. The output of File\n Upload and Online Drive are files, while the output of Online Doc and WebCrawler\n are pages. You can find more Data Sources on our Marketplace.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n Knowledge Pipeline can have multiple data sources. Each data source can\n be selected more than once with different settings. Each added data source\n is a tab on the add file interface. However, each time the user can only\n select one data source to import the file and trigger its subsequent processing.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 358\n height: 337\n id: '1751264451381'\n position:\n x: -990.8091030156684\n y: 282\n positionAbsolute:\n x: -990.8091030156684\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 358\n - data:\n author: TenTen\n desc: ''\n height: 260\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n \",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Knowledge\n Pipeline\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n starts with Data Source as the starting node and ends with the knowledge\n base node. The general steps are: import documents from the data source\n \u2192 use extractor to extract document content \u2192 split and clean content into\n structured chunks \u2192 store in the knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n user input variables required by the Knowledge Pipeline node must be predefined\n and managed via the Input Field section located in the top-right corner\n of the orchestration canvas. It determines what input fields the end users\n will see and need to fill in when importing files to the knowledge base\n through this pipeline.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Unique\n Inputs: Input fields defined here are only available to the selected data\n source and its downstream nodes.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Global\n Inputs: These input fields are shared across all subsequent nodes after\n the data source and are typically set during the Process Documents step.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"For\n more information, see \",\"type\":\"text\",\"version\":1},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"link\",\"version\":1,\"rel\":\"noreferrer\",\"target\":null,\"title\":null,\"url\":\"https:\/\/docs.dify.ai\/en\/guides\/knowledge-base\/knowledge-pipeline\/knowledge-pipeline-orchestration\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 1182\n height: 260\n id: '1751266376760'\n position:\n x: -579\n y: -22.64803881585007\n positionAbsolute:\n x: -579\n y: -22.64803881585007\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 1182\n - data:\n author: TenTen\n desc: ''\n height: 541\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"A\n document extractor for large language models (LLMs) like MinerU is a tool\n that preprocesses and converts diverse document types into structured, clean,\n and machine-readable data. This structured data can then be used to train\n or augment LLMs and retrieval-augmented generation (RAG) systems by providing\n them with accurate, well-organized content from varied sources. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"MinerU\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n is an advanced open-source document extractor designed specifically to convert\n complex, unstructured documents\u2014such as PDFs, Word files, and PPTs\u2014into\n high-quality, machine-readable formats like Markdown and JSON. MinerU addresses\n challenges in document parsing such as layout detection, formula recognition,\n and multi-language support, which are critical for generating high-quality\n training corpora for LLMs.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 541\n id: '1751266402561'\n position:\n x: -263.7680017647218\n y: 558.328085421591\n positionAbsolute:\n x: -263.7680017647218\n y: 558.328085421591\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 554\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Parent-Child\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\n addresses the dilemma of context and precision by leveraging a two-tier\n hierarchical approach that effectively balances the trade-off between accurate\n matching and comprehensive contextual information in RAG systems. \",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Here\n is the essential mechanism of this structured, two-level information access:\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Query Matching with Child Chunks: Small, focused pieces of information,\n often as concise as a single sentence within a paragraph, are used to match\n the user''s query. These child chunks enable precise and relevant initial\n retrieval.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"-\n Contextual Enrichment with Parent Chunks: Larger, encompassing sections\u2014such\n as a paragraph, a section, or even an entire document\u2014that include the matched\n child chunks are then retrieved. These parent chunks provide comprehensive\n context for the Language Model (LLM).\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 554\n id: '1751266447821'\n position:\n x: 42.95253988413964\n y: 366.1915342509804\n positionAbsolute:\n x: 42.95253988413964\n y: 366.1915342509804\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n author: TenTen\n desc: ''\n height: 411\n selected: false\n showAuthor: true\n text: '{\"root\":{\"children\":[{\"children\":[{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"The\n knowledge base provides two indexing methods:\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Economical\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\",\n each with different retrieval strategies. High-Quality mode uses embeddings\n for vectorization and supports vector, full-text, and hybrid retrieval,\n offering more accurate results but higher resource usage. Economical mode\n uses keyword-based inverted indexing with no token consumption but lower\n accuracy; upgrading to High-Quality is possible, but downgrading requires\n creating a new knowledge base.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[],\"direction\":null,\"format\":\"\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":0,\"textStyle\":\"\"},{\"children\":[{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"*\n Parent-Child Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0and\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"Q&A\n Mode\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0only\n support the\u00a0\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":1,\"mode\":\"normal\",\"style\":\"\",\"text\":\"High-Quality\",\"type\":\"text\",\"version\":1},{\"detail\":0,\"format\":0,\"mode\":\"normal\",\"style\":\"\",\"text\":\"\u00a0indexing\n method.\",\"type\":\"text\",\"version\":1}],\"direction\":\"ltr\",\"format\":\"start\",\"indent\":0,\"type\":\"paragraph\",\"version\":1,\"textFormat\":1,\"textStyle\":\"\"}],\"direction\":\"ltr\",\"format\":\"\",\"indent\":0,\"type\":\"root\",\"version\":1,\"textFormat\":1}}'\n theme: blue\n title: ''\n type: ''\n width: 240\n height: 411\n id: '1751266580099'\n position:\n x: 355.92518399555183\n y: 434.6494699299023\n positionAbsolute:\n x: 355.92518399555183\n y: 434.6494699299023\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom-note\n width: 240\n - data:\n credential_id: fd1cbc33-1481-47ee-9af2-954b53d350e0\n is_team_authorization: false\n output_schema:\n properties:\n full_zip_url:\n description: The zip URL of the complete parsed result\n type: string\n images:\n description: The images extracted from the file\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n ja_JP: \u89e3\u6790\u3059\u308b\u30d5\u30a1\u30a4\u30eb(pdf\u3001ppt\u3001pptx\u3001doc\u3001docx\u3001png\u3001jpg\u3001jpeg\u3092\u30b5\u30dd\u30fc\u30c8)\n pt_BR: the file to be parsed(support pdf, ppt, pptx, doc, docx, png, jpg,\n jpeg)\n zh_Hans: \u7528\u4e8e\u89e3\u6790\u7684\u6587\u4ef6(\u652f\u6301 pdf, ppt, pptx, doc, docx, png, jpg, jpeg)\n label:\n en_US: file\n ja_JP: file\n pt_BR: file\n zh_Hans: file\n llm_description: the file to be parsed (support pdf, ppt, pptx, doc, docx,\n png, jpg, jpeg)\n max: null\n min: null\n name: file\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: file\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n ja_JP: \uff08\u30ed\u30fc\u30ab\u30eb\u30c7\u30d7\u30ed\u30a4\u30e1\u30f3\u30c8\u30b5\u30fc\u30d3\u30b9\u7528\uff09\u89e3\u6790\u65b9\u6cd5\u306f\u3001auto\u3001ocr\u3001\u307e\u305f\u306ftxt\u306e\u3044\u305a\u308c\u304b\u3067\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fauto\u3067\u3059\u3002\u7d50\u679c\u304c\u6e80\u8db3\u3067\u304d\u306a\u3044\u5834\u5408\u306f\u3001ocr\u3092\u8a66\u3057\u3066\u304f\u3060\u3055\u3044\n pt_BR: (For local deployment service)Parsing method, can be auto, ocr,\n or txt. Default is auto. If results are not satisfactory, try ocr\n zh_Hans: \uff08\u7528\u4e8e\u672c\u5730\u90e8\u7f72\u670d\u52a1\uff09\u89e3\u6790\u65b9\u6cd5\uff0c\u53ef\u4ee5\u662fauto, ocr, \u6216 txt\u3002\u9ed8\u8ba4\u662fauto\u3002\u5982\u679c\u7ed3\u679c\u4e0d\u7406\u60f3\uff0c\u8bf7\u5c1d\u8bd5ocr\n label:\n en_US: parse method\n ja_JP: \u89e3\u6790\u65b9\u6cd5\n pt_BR: parse method\n zh_Hans: \u89e3\u6790\u65b9\u6cd5\n llm_description: Parsing method, can be auto, ocr, or txt. Default is auto.\n If results are not satisfactory, try ocr\n max: null\n min: null\n name: parse_method\n options:\n - label:\n en_US: auto\n ja_JP: auto\n pt_BR: auto\n zh_Hans: auto\n value: auto\n - label:\n en_US: ocr\n ja_JP: ocr\n pt_BR: ocr\n zh_Hans: ocr\n value: ocr\n - label:\n en_US: txt\n ja_JP: txt\n pt_BR: txt\n zh_Hans: txt\n value: txt\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable formula recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable formula recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n label:\n en_US: Enable formula recognition\n ja_JP: \u6570\u5f0f\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable formula recognition\n zh_Hans: \u5f00\u542f\u516c\u5f0f\u8bc6\u522b\n llm_description: (For official API) Whether to enable formula recognition\n max: null\n min: null\n name: enable_formula\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 1\n form: form\n human_description:\n en_US: (For official API) Whether to enable table recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable table recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542f\u8868\u683c\u8bc6\u522b\n label:\n en_US: Enable table recognition\n ja_JP: \u8868\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable table recognition\n zh_Hans: \u5f00\u542f\u8868\u683c\u8bc6\u522b\n llm_description: (For official API) Whether to enable table recognition\n max: null\n min: null\n name: enable_table\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: doclayout_yolo\n form: form\n human_description:\n en_US: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\uff1adoclayout_yolo\u3001layoutlmv3\u3001\u30c7\u30d5\u30a9\u30eb\u30c8\u5024\u306f doclayout_yolo\u3002doclayout_yolo\n \u306f\u81ea\u5df1\u958b\u767a\u30e2\u30c7\u30eb\u3067\u3001\u52b9\u679c\u304c\u3088\u308a\u826f\u3044\n pt_BR: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed\n model with better effect'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u53ef\u9009\u503c\uff1adoclayout_yolo\u3001layoutlmv3\uff0c\u9ed8\u8ba4\u503c\u4e3a doclayout_yolo\u3002doclayout_yolo\n \u4e3a\u81ea\u7814\u6a21\u578b\uff0c\u6548\u679c\u66f4\u597d\n label:\n en_US: Layout model\n ja_JP: \u30ec\u30a4\u30a2\u30a6\u30c8\u691c\u51fa\u30e2\u30c7\u30eb\n pt_BR: Layout model\n zh_Hans: \u5e03\u5c40\u68c0\u6d4b\u6a21\u578b\n llm_description: '(For official API) Optional values: doclayout_yolo, layoutlmv3,\n default value is doclayout_yolo. doclayout_yolo is a self-developed model\n withbetter effect'\n max: null\n min: null\n name: layout_model\n options:\n - label:\n en_US: doclayout_yolo\n ja_JP: doclayout_yolo\n pt_BR: doclayout_yolo\n zh_Hans: doclayout_yolo\n value: doclayout_yolo\n - label:\n en_US: layoutlmv3\n ja_JP: layoutlmv3\n pt_BR: layoutlmv3\n zh_Hans: layoutlmv3\n value: layoutlmv3\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: auto\n form: form\n human_description:\n en_US: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u6307\u5b9a\u3057\u307e\u3059\u3002\u30c7\u30d5\u30a9\u30eb\u30c8\u306fch\u3067\u3001auto\u306b\u8a2d\u5b9a\u3067\u304d\u307e\u3059\u3002auto\u306e\u5834\u5408\u3001\u30e2\u30c7\u30eb\u306f\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\u3092\u81ea\u52d5\u7684\u306b\u8b58\u5225\u3057\u307e\u3059\u3002\u4ed6\u306e\u30aa\u30d7\u30b7\u30e7\u30f3\u5024\u30ea\u30b9\u30c8\u306b\u3064\u3044\u3066\u306f\u3001\u6b21\u3092\u53c2\u7167\u3057\u3066\u304f\u3060\u3055\u3044\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n pt_BR: '(For official API) Specify document language, default ch, can\n be set to auto, when auto, the model will automatically identify document\n language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u6307\u5b9a\u6587\u6863\u8bed\u8a00\uff0c\u9ed8\u8ba4 ch\uff0c\u53ef\u4ee5\u8bbe\u7f6e\u4e3aauto\uff0c\u5f53\u4e3aauto\u65f6\u6a21\u578b\u4f1a\u81ea\u52a8\u8bc6\u522b\u6587\u6863\u8bed\u8a00\uff0c\u5176\u4ed6\u53ef\u9009\u503c\u5217\u8868\u8be6\u89c1\uff1ahttps:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5\n label:\n en_US: Document language\n ja_JP: \u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u8a00\u8a9e\n pt_BR: Document language\n zh_Hans: \u6587\u6863\u8bed\u8a00\n llm_description: '(For official API) Specify document language, default\n ch, can be set to auto, when auto, the model will automatically identify\n document language, other optional value list see: https:\/\/paddlepaddle.github.io\/PaddleOCR\/latest\/ppocr\/blog\/multi_languages.html#5'\n max: null\n min: null\n name: language\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 0\n form: form\n human_description:\n en_US: (For official API) Whether to enable OCR recognition\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\u304b\u3069\u3046\u304b\n pt_BR: (For official API) Whether to enable OCR recognition\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u662f\u5426\u5f00\u542fOCR\u8bc6\u522b\n label:\n en_US: Enable OCR recognition\n ja_JP: OCR\u8a8d\u8b58\u3092\u6709\u52b9\u306b\u3059\u308b\n pt_BR: Enable OCR recognition\n zh_Hans: \u5f00\u542fOCR\u8bc6\u522b\n llm_description: (For official API) Whether to enable OCR recognition\n max: null\n min: null\n name: enable_ocr\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: '[]'\n form: form\n human_description:\n en_US: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n ja_JP: \uff08\u516c\u5f0fAPI\u7528\uff09\u4f8b\uff1a[\"docx\",\"html\"]\u3001markdown\u3001json\u306f\u30c7\u30d5\u30a9\u30eb\u30c8\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\u3067\u3042\u308a\u3001\u8a2d\u5b9a\u3059\u308b\u5fc5\u8981\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u3053\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u306f\u3001docx\u3001html\u3001latex\u306e3\u3064\u306e\u5f62\u5f0f\u306e\u3044\u305a\u308c\u304b\u307e\u305f\u306f\u8907\u6570\u306e\u307f\u3092\u30b5\u30dd\u30fc\u30c8\u3057\u307e\u3059\n pt_BR: '(For official API) Example: [\"docx\",\"html\"], markdown, json are\n the default export formats, no need to set, this parameter only supports\n one or more of docx, html, latex'\n zh_Hans: \uff08\u7528\u4e8e\u5b98\u65b9API\uff09\u793a\u4f8b\uff1a[\"docx\",\"html\"],markdown\u3001json\u4e3a\u9ed8\u8ba4\u5bfc\u51fa\u683c\u5f0f\uff0c\u65e0\u987b\u8bbe\u7f6e\uff0c\u8be5\u53c2\u6570\u4ec5\u652f\u6301docx\u3001html\u3001latex\u4e09\u79cd\u683c\u5f0f\u4e2d\u7684\u4e00\u4e2a\u6216\u591a\u4e2a\n label:\n en_US: Extra export formats\n ja_JP: \u8ffd\u52a0\u306e\u30a8\u30af\u30b9\u30dd\u30fc\u30c8\u5f62\u5f0f\n pt_BR: Extra export formats\n zh_Hans: \u989d\u5916\u5bfc\u51fa\u683c\u5f0f\n llm_description: '(For official API) Example: [\"docx\",\"html\"], markdown,\n json are the default export formats, no need to set, this parameter only\n supports one or more of docx, html, latex'\n max: null\n min: null\n name: extra_formats\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n params:\n enable_formula: ''\n enable_ocr: ''\n enable_table: ''\n extra_formats: ''\n file: ''\n language: ''\n layout_model: ''\n parse_method: ''\n provider_id: langgenius\/mineru\/mineru\n provider_name: langgenius\/mineru\/mineru\n provider_type: builtin\n selected: false\n title: MinerU\n tool_configurations:\n enable_formula:\n type: constant\n value: 1\n enable_ocr:\n type: constant\n value: 0\n enable_table:\n type: constant\n value: 1\n extra_formats:\n type: constant\n value: '[]'\n language:\n type: constant\n value: auto\n layout_model:\n type: constant\n value: doclayout_yolo\n parse_method:\n type: constant\n value: auto\n tool_description: a tool for parsing text, tables, and images, supporting\n multiple formats such as pdf, pptx, docx, etc. supporting multiple languages\n such as English, Chinese, etc.\n tool_label: Parse File\n tool_name: parse-file\n tool_node_version: '2'\n tool_parameters:\n file:\n type: variable\n value:\n - '1750400203722'\n - file\n type: tool\n height: 244\n id: '1751281136356'\n position:\n x: -263.7680017647218\n y: 282\n positionAbsolute:\n x: -263.7680017647218\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n - data:\n is_team_authorization: true\n output_schema:\n properties:\n result:\n description: Parent child chunks result\n items:\n type: object\n type: array\n type: object\n paramSchemas:\n - auto_generate: null\n default: null\n form: llm\n human_description:\n en_US: ''\n ja_JP: ''\n pt_BR: ''\n zh_Hans: ''\n label:\n en_US: Input Content\n ja_JP: Input Content\n pt_BR: Conte\u00fado de Entrada\n zh_Hans: \u8f93\u5165\u6587\u672c\n llm_description: The text you want to chunk.\n max: null\n min: null\n name: input_text\n options: []\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: paragraph\n form: llm\n human_description:\n en_US: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n ja_JP: Split text into paragraphs based on separator and maximum chunk\n length, using split text as parent block or entire document as parent\n block and directly retrieve.\n pt_BR: Dividir texto em par\u00e1grafos com base no separador e no comprimento\n m\u00e1ximo do bloco, usando o texto dividido como bloco pai ou documento\n completo como bloco pai e diretamente recuper\u00e1-lo.\n zh_Hans: \u6839\u636e\u5206\u9694\u7b26\u548c\u6700\u5927\u5757\u957f\u5ea6\u5c06\u6587\u672c\u62c6\u5206\u4e3a\u6bb5\u843d\uff0c\u4f7f\u7528\u62c6\u5206\u6587\u672c\u4f5c\u4e3a\u68c0\u7d22\u7684\u7236\u5757\u6216\u6574\u4e2a\u6587\u6863\u7528\u4f5c\u7236\u5757\u5e76\u76f4\u63a5\u68c0\u7d22\u3002\n label:\n en_US: Parent Mode\n ja_JP: Parent Mode\n pt_BR: Modo Pai\n zh_Hans: \u7236\u5757\u6a21\u5f0f\n llm_description: Split text into paragraphs based on separator and maximum\n chunk length, using split text as parent block or entire document as parent\n block and directly retrieve.\n max: null\n min: null\n name: parent_mode\n options:\n - label:\n en_US: Paragraph\n ja_JP: Paragraph\n pt_BR: Par\u00e1grafo\n zh_Hans: \u6bb5\u843d\n value: paragraph\n - label:\n en_US: Full Document\n ja_JP: Full Document\n pt_BR: Documento Completo\n zh_Hans: \u5168\u6587\n value: full_doc\n placeholder: null\n precision: null\n required: true\n scope: null\n template: null\n type: select\n - auto_generate: null\n default: '\n\n\n '\n form: llm\n human_description:\n en_US: Separator used for chunking\n ja_JP: Separator used for chunking\n pt_BR: Separador usado para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Parent Delimiter\n ja_JP: Parent Delimiter\n pt_BR: Separador de Pai\n zh_Hans: \u7236\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split chunks\n max: null\n min: null\n name: separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 1024\n form: llm\n human_description:\n en_US: Maximum length for chunking\n ja_JP: Maximum length for chunking\n pt_BR: Comprimento m\u00e1ximo para divis\u00e3o\n zh_Hans: \u7528\u4e8e\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Parent Chunk Length\n ja_JP: Maximum Parent Chunk Length\n pt_BR: Comprimento M\u00e1ximo do Bloco Pai\n zh_Hans: \u6700\u5927\u7236\u5757\u957f\u5ea6\n llm_description: Maximum length allowed per chunk\n max: null\n min: null\n name: max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: '. '\n form: llm\n human_description:\n en_US: Separator used for subchunking\n ja_JP: Separator used for subchunking\n pt_BR: Separador usado para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u5206\u9694\u7b26\n label:\n en_US: Child Delimiter\n ja_JP: Child Delimiter\n pt_BR: Separador de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u5206\u9694\u7b26\n llm_description: The separator used to split subchunks\n max: null\n min: null\n name: subchunk_separator\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: string\n - auto_generate: null\n default: 512\n form: llm\n human_description:\n en_US: Maximum length for subchunking\n ja_JP: Maximum length for subchunking\n pt_BR: Comprimento m\u00e1ximo para subdivis\u00e3o\n zh_Hans: \u7528\u4e8e\u5b50\u5206\u5757\u7684\u6700\u5927\u957f\u5ea6\n label:\n en_US: Maximum Child Chunk Length\n ja_JP: Maximum Child Chunk Length\n pt_BR: Comprimento M\u00e1ximo de Subdivis\u00e3o\n zh_Hans: \u5b50\u5206\u5757\u6700\u5927\u957f\u5ea6\n llm_description: Maximum length allowed per subchunk\n max: null\n min: null\n name: subchunk_max_length\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: number\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove consecutive spaces, newlines and tabs\n ja_JP: Whether to remove consecutive spaces, newlines and tabs\n pt_BR: Se deve remover espa\u00e7os extras no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n label:\n en_US: Replace consecutive spaces, newlines and tabs\n ja_JP: Replace consecutive spaces, newlines and tabs\n pt_BR: Substituir espa\u00e7os consecutivos, novas linhas e guias\n zh_Hans: \u66ff\u6362\u8fde\u7eed\u7a7a\u683c\u3001\u6362\u884c\u7b26\u548c\u5236\u8868\u7b26\n llm_description: Whether to remove consecutive spaces, newlines and tabs\n max: null\n min: null\n name: remove_extra_spaces\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n - auto_generate: null\n default: 0\n form: llm\n human_description:\n en_US: Whether to remove URLs and emails in the text\n ja_JP: Whether to remove URLs and emails in the text\n pt_BR: Se deve remover URLs e e-mails no texto\n zh_Hans: \u662f\u5426\u79fb\u9664\u6587\u672c\u4e2d\u7684URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n label:\n en_US: Delete all URLs and email addresses\n ja_JP: Delete all URLs and email addresses\n pt_BR: Remover todas as URLs e e-mails\n zh_Hans: \u5220\u9664\u6240\u6709URL\u548c\u7535\u5b50\u90ae\u4ef6\u5730\u5740\n llm_description: Whether to remove URLs and emails in the text\n max: null\n min: null\n name: remove_urls_emails\n options: []\n placeholder: null\n precision: null\n required: false\n scope: null\n template: null\n type: boolean\n params:\n input_text: ''\n max_length: ''\n parent_mode: ''\n remove_extra_spaces: ''\n remove_urls_emails: ''\n separator: ''\n subchunk_max_length: ''\n subchunk_separator: ''\n provider_id: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_name: langgenius\/parentchild_chunker\/parentchild_chunker\n provider_type: builtin\n selected: false\n title: Parent-child Chunker\n tool_configurations: {}\n tool_description: Process documents into parent-child chunk structures\n tool_label: Parent-child Chunker\n tool_name: parentchild_chunker\n tool_node_version: '2'\n tool_parameters:\n input_text:\n type: mixed\n value: '{{#1751281136356.text#}}'\n max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Parent_Length\n parent_mode:\n type: variable\n value:\n - rag\n - shared\n - Parent_Mode\n remove_extra_spaces:\n type: variable\n value:\n - rag\n - shared\n - clean_1\n remove_urls_emails:\n type: variable\n value:\n - rag\n - shared\n - clean_2\n separator:\n type: mixed\n value: '{{#rag.shared.Parent_Delimiter#}}'\n subchunk_max_length:\n type: variable\n value:\n - rag\n - shared\n - Maximum_Child_Length\n subchunk_separator:\n type: mixed\n value: '{{#rag.shared.Child_Delimiter#}}'\n type: tool\n height: 52\n id: '1751338398711'\n position:\n x: 42.95253988413964\n y: 282\n positionAbsolute:\n x: 42.95253988413964\n y: 282\n selected: false\n sourcePosition: right\n targetPosition: left\n type: custom\n width: 242\n viewport:\n x: 628.3302331655243\n y: 120.08894361588159\n zoom: 0.7027501395646496\n rag_pipeline_variables:\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: paragraph\n label: Parent Mode\n max_length: 48\n options:\n - paragraph\n - full_doc\n placeholder: null\n required: true\n tooltips: 'Parent Mode provides two options: paragraph mode splits text into paragraphs\n as parent chunks for retrieval, while full_doc mode uses the entire document\n as a single parent chunk (text beyond 10,000 tokens will be truncated).'\n type: select\n unit: null\n variable: Parent_Mode\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\\n\n label: Parent Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: A delimiter is the character used to separate text. \\n\\n is recommended\n for splitting the original document into large parent chunks. You can also use\n special delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Parent_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 1024\n label: Maximum Parent Length\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Parent_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: \\n\n label: Child Delimiter\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: A delimiter is the character used to separate text. \\n is recommended\n for splitting parent chunks into small child chunks. You can also use special\n delimiters defined by yourself.\n type: text-input\n unit: null\n variable: Child_Delimiter\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: 256\n label: Maximum Child Length\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: number\n unit: tokens\n variable: Maximum_Child_Length\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: true\n label: Replace consecutive spaces, newlines and tabs.\n max_length: 48\n options: []\n placeholder: null\n required: true\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_1\n - allow_file_extension: null\n allow_file_upload_methods: null\n allowed_file_types: null\n belong_to_node_id: shared\n default_value: null\n label: Delete all URLs and email addresses.\n max_length: 48\n options: []\n placeholder: null\n required: false\n tooltips: null\n type: checkbox\n unit: null\n variable: clean_2\n", @@ -7340,4 +7372,4 @@ "name": "Complex PDF with Images & Tables" } } -} \ No newline at end of file +}