| """ | """ | ||||
| from unstructured.partition.api import partition_via_api | from unstructured.partition.api import partition_via_api | ||||
| if not (dify_config.UNSTRUCTURED_API_URL and dify_config.UNSTRUCTURED_API_KEY): | |||||
| raise TextExtractionError("UNSTRUCTURED_API_URL and UNSTRUCTURED_API_KEY must be set") | |||||
| if not dify_config.UNSTRUCTURED_API_URL: | |||||
| raise TextExtractionError("UNSTRUCTURED_API_URL must be set") | |||||
| try: | try: | ||||
| with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file: | with tempfile.NamedTemporaryFile(suffix=".doc", delete=False) as temp_file: | ||||
| file=file, | file=file, | ||||
| metadata_filename=temp_file.name, | metadata_filename=temp_file.name, | ||||
| api_url=dify_config.UNSTRUCTURED_API_URL, | api_url=dify_config.UNSTRUCTURED_API_URL, | ||||
| api_key=dify_config.UNSTRUCTURED_API_KEY, | |||||
| api_key=dify_config.UNSTRUCTURED_API_KEY, # type: ignore | |||||
| ) | ) | ||||
| os.unlink(temp_file.name) | os.unlink(temp_file.name) | ||||
| return "\n".join([getattr(element, "text", "") for element in elements]) | return "\n".join([getattr(element, "text", "") for element in elements]) |