Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

website.py 3.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990
  1. from flask_restx import Resource, fields, reqparse
  2. from controllers.console import api, console_ns
  3. from controllers.console.datasets.error import WebsiteCrawlError
  4. from controllers.console.wraps import account_initialization_required, setup_required
  5. from libs.login import login_required
  6. from services.website_service import WebsiteCrawlApiRequest, WebsiteCrawlStatusApiRequest, WebsiteService
  7. @console_ns.route("/website/crawl")
  8. class WebsiteCrawlApi(Resource):
  9. @api.doc("crawl_website")
  10. @api.doc(description="Crawl website content")
  11. @api.expect(
  12. api.model(
  13. "WebsiteCrawlRequest",
  14. {
  15. "provider": fields.String(
  16. required=True,
  17. description="Crawl provider (firecrawl/watercrawl/jinareader)",
  18. enum=["firecrawl", "watercrawl", "jinareader"],
  19. ),
  20. "url": fields.String(required=True, description="URL to crawl"),
  21. "options": fields.Raw(required=True, description="Crawl options"),
  22. },
  23. )
  24. )
  25. @api.response(200, "Website crawl initiated successfully")
  26. @api.response(400, "Invalid crawl parameters")
  27. @setup_required
  28. @login_required
  29. @account_initialization_required
  30. def post(self):
  31. parser = reqparse.RequestParser()
  32. parser.add_argument(
  33. "provider",
  34. type=str,
  35. choices=["firecrawl", "watercrawl", "jinareader"],
  36. required=True,
  37. nullable=True,
  38. location="json",
  39. )
  40. parser.add_argument("url", type=str, required=True, nullable=True, location="json")
  41. parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
  42. args = parser.parse_args()
  43. # Create typed request and validate
  44. try:
  45. api_request = WebsiteCrawlApiRequest.from_args(args)
  46. except ValueError as e:
  47. raise WebsiteCrawlError(str(e))
  48. # Crawl URL using typed request
  49. try:
  50. result = WebsiteService.crawl_url(api_request)
  51. except Exception as e:
  52. raise WebsiteCrawlError(str(e))
  53. return result, 200
  54. @console_ns.route("/website/crawl/status/<string:job_id>")
  55. class WebsiteCrawlStatusApi(Resource):
  56. @api.doc("get_crawl_status")
  57. @api.doc(description="Get website crawl status")
  58. @api.doc(params={"job_id": "Crawl job ID", "provider": "Crawl provider (firecrawl/watercrawl/jinareader)"})
  59. @api.response(200, "Crawl status retrieved successfully")
  60. @api.response(404, "Crawl job not found")
  61. @api.response(400, "Invalid provider")
  62. @setup_required
  63. @login_required
  64. @account_initialization_required
  65. def get(self, job_id: str):
  66. parser = reqparse.RequestParser()
  67. parser.add_argument(
  68. "provider", type=str, choices=["firecrawl", "watercrawl", "jinareader"], required=True, location="args"
  69. )
  70. args = parser.parse_args()
  71. # Create typed request and validate
  72. try:
  73. api_request = WebsiteCrawlStatusApiRequest.from_args(args, job_id)
  74. except ValueError as e:
  75. raise WebsiteCrawlError(str(e))
  76. # Get crawl status using typed request
  77. try:
  78. result = WebsiteService.get_crawl_status_typed(api_request)
  79. except Exception as e:
  80. raise WebsiteCrawlError(str(e))
  81. return result, 200