Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

website.py 1.9KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455
  1. from flask_restful import Resource, reqparse # type: ignore
  2. from controllers.console import api
  3. from controllers.console.datasets.error import WebsiteCrawlError
  4. from controllers.console.wraps import account_initialization_required, setup_required
  5. from libs.login import login_required
  6. from services.website_service import WebsiteService
  7. class WebsiteCrawlApi(Resource):
  8. @setup_required
  9. @login_required
  10. @account_initialization_required
  11. def post(self):
  12. parser = reqparse.RequestParser()
  13. parser.add_argument(
  14. "provider",
  15. type=str,
  16. choices=["firecrawl", "watercrawl", "jinareader"],
  17. required=True,
  18. nullable=True,
  19. location="json",
  20. )
  21. parser.add_argument("url", type=str, required=True, nullable=True, location="json")
  22. parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
  23. args = parser.parse_args()
  24. WebsiteService.document_create_args_validate(args)
  25. # crawl url
  26. try:
  27. result = WebsiteService.crawl_url(args)
  28. except Exception as e:
  29. raise WebsiteCrawlError(str(e))
  30. return result, 200
  31. class WebsiteCrawlStatusApi(Resource):
  32. @setup_required
  33. @login_required
  34. @account_initialization_required
  35. def get(self, job_id: str):
  36. parser = reqparse.RequestParser()
  37. parser.add_argument(
  38. "provider", type=str, choices=["firecrawl", "watercrawl", "jinareader"], required=True, location="args"
  39. )
  40. args = parser.parse_args()
  41. # get crawl status
  42. try:
  43. result = WebsiteService.get_crawl_status(job_id, args["provider"])
  44. except Exception as e:
  45. raise WebsiteCrawlError(str(e))
  46. return result, 200
  47. api.add_resource(WebsiteCrawlApi, "/website/crawl")
  48. api.add_resource(WebsiteCrawlStatusApi, "/website/crawl/status/<string:job_id>")