| @@ -6,6 +6,12 @@ from urllib.parse import urljoin | |||
| import requests | |||
| from requests import Response | |||
| from core.rag.extractor.watercrawl.exceptions import ( | |||
| WaterCrawlAuthenticationError, | |||
| WaterCrawlBadRequestError, | |||
| WaterCrawlPermissionError, | |||
| ) | |||
| class BaseAPIClient: | |||
| def __init__(self, api_key, base_url): | |||
| @@ -53,6 +59,15 @@ class WaterCrawlAPIClient(BaseAPIClient): | |||
| yield data | |||
| def process_response(self, response: Response) -> dict | bytes | list | None | Generator: | |||
| if response.status_code == 401: | |||
| raise WaterCrawlAuthenticationError(response) | |||
| if response.status_code == 403: | |||
| raise WaterCrawlPermissionError(response) | |||
| if 400 <= response.status_code < 500: | |||
| raise WaterCrawlBadRequestError(response) | |||
| response.raise_for_status() | |||
| if response.status_code == 204: | |||
| return None | |||
| @@ -0,0 +1,32 @@ | |||
| import json | |||
| class WaterCrawlError(Exception): | |||
| pass | |||
| class WaterCrawlBadRequestError(WaterCrawlError): | |||
| def __init__(self, response): | |||
| self.status_code = response.status_code | |||
| self.response = response | |||
| data = response.json() | |||
| self.message = data.get("message", "Unknown error occurred") | |||
| self.errors = data.get("errors", {}) | |||
| super().__init__(self.message) | |||
| @property | |||
| def flat_errors(self): | |||
| return json.dumps(self.errors) | |||
| def __str__(self): | |||
| return f"WaterCrawlBadRequestError: {self.message} \n {self.flat_errors}" | |||
| class WaterCrawlPermissionError(WaterCrawlBadRequestError): | |||
| def __str__(self): | |||
| return f"You are exceeding your WaterCrawl API limits. {self.message}" | |||
| class WaterCrawlAuthenticationError(WaterCrawlBadRequestError): | |||
| def __str__(self): | |||
| return "WaterCrawl API key is invalid or expired. Please check your API key and try again." | |||