Nelze vybrat více než 25 témat Téma musí začínat písmenem nebo číslem, může obsahovat pomlčky („-“) a může být dlouhé až 35 znaků.

sandbox_security_tests_full.py 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436
  1. #
  2. # Copyright 2025 The InfiniFlow Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import base64
  17. import os
  18. import textwrap
  19. import time
  20. from concurrent.futures import ThreadPoolExecutor, as_completed
  21. from enum import Enum
  22. from typing import Dict, Optional
  23. import requests
  24. from pydantic import BaseModel
  25. API_URL = os.getenv("SANDBOX_API_URL", "http://localhost:9385/run")
  26. TIMEOUT = 15
  27. MAX_WORKERS = 5
  28. class ResultStatus(str, Enum):
  29. SUCCESS = "success"
  30. PROGRAM_ERROR = "program_error"
  31. RESOURCE_LIMIT_EXCEEDED = "resource_limit_exceeded"
  32. UNAUTHORIZED_ACCESS = "unauthorized_access"
  33. RUNTIME_ERROR = "runtime_error"
  34. PROGRAM_RUNNER_ERROR = "program_runner_error"
  35. class ResourceLimitType(str, Enum):
  36. TIME = "time"
  37. MEMORY = "memory"
  38. OUTPUT = "output"
  39. class UnauthorizedAccessType(str, Enum):
  40. DISALLOWED_SYSCALL = "disallowed_syscall"
  41. FILE_ACCESS = "file_access"
  42. NETWORK_ACCESS = "network_access"
  43. class RuntimeErrorType(str, Enum):
  44. SIGNALLED = "signalled"
  45. NONZERO_EXIT = "nonzero_exit"
  46. class ExecutionResult(BaseModel):
  47. status: ResultStatus
  48. stdout: str
  49. stderr: str
  50. exit_code: int
  51. detail: Optional[str] = None
  52. resource_limit_type: Optional[ResourceLimitType] = None
  53. unauthorized_access_type: Optional[UnauthorizedAccessType] = None
  54. runtime_error_type: Optional[RuntimeErrorType] = None
  55. class TestResult(BaseModel):
  56. name: str
  57. passed: bool
  58. duration: float
  59. expected_failure: bool = False
  60. result: Optional[ExecutionResult] = None
  61. error: Optional[str] = None
  62. validation_error: Optional[str] = None
  63. def encode_code(code: str) -> str:
  64. return base64.b64encode(code.encode("utf-8")).decode("utf-8")
  65. def execute_single_test(name: str, code: str, language: str, arguments: dict, expect_fail: bool = False) -> TestResult:
  66. """Execute a single test case"""
  67. payload = {
  68. "code_b64": encode_code(textwrap.dedent(code)),
  69. "language": language,
  70. "arguments": arguments,
  71. }
  72. test_result = TestResult(name=name, passed=False, duration=0, expected_failure=expect_fail)
  73. really_processed = False
  74. try:
  75. while not really_processed:
  76. start_time = time.perf_counter()
  77. resp = requests.post(API_URL, json=payload, timeout=TIMEOUT)
  78. resp.raise_for_status()
  79. response_data = resp.json()
  80. if response_data["exit_code"] == -429: # too many request
  81. print(f"[{name}] Reached request limit, retring...")
  82. time.sleep(0.5)
  83. continue
  84. really_processed = True
  85. print("-------------------")
  86. print(f"{name}:\n{response_data}")
  87. print("-------------------")
  88. test_result.duration = time.perf_counter() - start_time
  89. test_result.result = ExecutionResult(**response_data)
  90. # Validate test result expectations
  91. validate_test_result(name, expect_fail, test_result)
  92. except requests.exceptions.RequestException as e:
  93. test_result.duration = time.perf_counter() - start_time
  94. test_result.error = f"Request failed: {str(e)}"
  95. test_result.result = ExecutionResult(
  96. status=ResultStatus.PROGRAM_RUNNER_ERROR,
  97. stdout="",
  98. stderr=str(e),
  99. exit_code=-999,
  100. detail="request_failed",
  101. )
  102. return test_result
  103. def validate_test_result(name: str, expect_fail: bool, test_result: TestResult):
  104. """Validate if the test result meets expectations"""
  105. if not test_result.result:
  106. test_result.passed = False
  107. test_result.validation_error = "No result returned"
  108. return
  109. test_result.passed = test_result.result.status == ResultStatus.SUCCESS
  110. # General validation logic
  111. if expect_fail:
  112. # Tests expected to fail should return a non-success status
  113. if test_result.passed:
  114. test_result.validation_error = "Expected failure but actually succeeded"
  115. else:
  116. # Tests expected to succeed should return a success status
  117. if not test_result.passed:
  118. test_result.validation_error = f"Unexpected failure (status={test_result.result.status})"
  119. def get_test_cases() -> Dict[str, dict]:
  120. """Return test cases (code, whether expected to fail)"""
  121. return {
  122. "1 Infinite loop: Should be forcibly terminated": {
  123. "code": """
  124. def main():
  125. while True:
  126. pass
  127. """,
  128. "should_fail": True,
  129. "arguments": {},
  130. "language": "python",
  131. },
  132. "2 Infinite loop: Should be forcibly terminated": {
  133. "code": """
  134. def main():
  135. while True:
  136. pass
  137. """,
  138. "should_fail": True,
  139. "arguments": {},
  140. "language": "python",
  141. },
  142. "3 Infinite loop: Should be forcibly terminated": {
  143. "code": """
  144. def main():
  145. while True:
  146. pass
  147. """,
  148. "should_fail": True,
  149. "arguments": {},
  150. "language": "python",
  151. },
  152. "4 Infinite loop: Should be forcibly terminated": {
  153. "code": """
  154. def main():
  155. while True:
  156. pass
  157. """,
  158. "should_fail": True,
  159. "arguments": {},
  160. "language": "python",
  161. },
  162. "5 Infinite loop: Should be forcibly terminated": {
  163. "code": """
  164. def main():
  165. while True:
  166. pass
  167. """,
  168. "should_fail": True,
  169. "arguments": {},
  170. "language": "python",
  171. },
  172. "6 Infinite loop: Should be forcibly terminated": {
  173. "code": """
  174. def main():
  175. while True:
  176. pass
  177. """,
  178. "should_fail": True,
  179. "arguments": {},
  180. "language": "python",
  181. },
  182. "7 Normal test: Python without dependencies": {
  183. "code": """
  184. def main():
  185. return {"data": "hello, world"}
  186. """,
  187. "should_fail": False,
  188. "arguments": {},
  189. "language": "python",
  190. },
  191. "8 Normal test: Python with pandas, should pass without any error": {
  192. "code": """
  193. import pandas as pd
  194. def main():
  195. data = {'Name': ['Alice', 'Bob', 'Charlie'],
  196. 'Age': [25, 30, 35]}
  197. df = pd.DataFrame(data)
  198. """,
  199. "should_fail": False,
  200. "arguments": {},
  201. "language": "python",
  202. },
  203. "9 Normal test: Nodejs without dependencies, should pass without any error": {
  204. "code": """
  205. const https = require('https');
  206. async function main(args) {
  207. return new Promise((resolve, reject) => {
  208. const req = https.get('https://example.com/', (res) => {
  209. let data = '';
  210. res.on('data', (chunk) => {
  211. data += chunk;
  212. });
  213. res.on('end', () => {
  214. clearTimeout(timeout);
  215. console.log('Body:', data);
  216. resolve(data);
  217. });
  218. });
  219. const timeout = setTimeout(() => {
  220. req.destroy(new Error('Request timeout after 10s'));
  221. }, 10000);
  222. req.on('error', (err) => {
  223. clearTimeout(timeout);
  224. console.error('Error:', err.message);
  225. reject(err);
  226. });
  227. });
  228. }
  229. module.exports = { main };
  230. """,
  231. "should_fail": False,
  232. "arguments": {},
  233. "language": "nodejs",
  234. },
  235. "10 Normal test: Nodejs with axios, should pass without any error": {
  236. "code": """
  237. const axios = require('axios');
  238. async function main(args) {
  239. try {
  240. const response = await axios.get('https://example.com/', {
  241. timeout: 10000
  242. });
  243. console.log('Body:', response.data);
  244. } catch (error) {
  245. console.error('Error:', error.message);
  246. }
  247. }
  248. module.exports = { main };
  249. """,
  250. "should_fail": False,
  251. "arguments": {},
  252. "language": "nodejs",
  253. },
  254. "11 Dangerous import: Should fail due to os module import": {
  255. "code": """
  256. import os
  257. def main():
  258. pass
  259. """,
  260. "should_fail": True,
  261. "arguments": {},
  262. "language": "python",
  263. },
  264. "12 Dangerous import from subprocess: Should fail due to subprocess import": {
  265. "code": """
  266. from subprocess import Popen
  267. def main():
  268. pass
  269. """,
  270. "should_fail": True,
  271. "arguments": {},
  272. "language": "python",
  273. },
  274. "13 Dangerous call: Should fail due to eval function call": {
  275. "code": """
  276. def main():
  277. eval('os.system("echo hello")')
  278. """,
  279. "should_fail": True,
  280. "arguments": {},
  281. "language": "python",
  282. },
  283. "14 Dangerous attribute access: Should fail due to shutil.rmtree": {
  284. "code": """
  285. import shutil
  286. def main():
  287. shutil.rmtree('/some/path')
  288. """,
  289. "should_fail": True,
  290. "arguments": {},
  291. "language": "python",
  292. },
  293. "15 Dangerous binary operation: Should fail due to unsafe concatenation leading to eval": {
  294. "code": """
  295. def main():
  296. dangerous_string = "os." + "system"
  297. eval(dangerous_string + '("echo hello")')
  298. """,
  299. "should_fail": True,
  300. "arguments": {},
  301. "language": "python",
  302. },
  303. "16 Dangerous function definition: Should fail due to user-defined eval function": {
  304. "code": """
  305. def eval_function():
  306. eval('os.system("echo hello")')
  307. def main():
  308. eval_function()
  309. """,
  310. "should_fail": True,
  311. "arguments": {},
  312. "language": "python",
  313. },
  314. "17 Memory exhaustion(256m): Should fail due to exceeding memory limit(try to allocate 300m)": {
  315. "code": """
  316. def main():
  317. x = ['a' * 1024 * 1024] * 300 # 300MB
  318. """,
  319. "should_fail": True,
  320. "arguments": {},
  321. "language": "python",
  322. },
  323. }
  324. def print_test_report(results: Dict[str, TestResult]):
  325. print("\n=== 🔍 Test Report ===")
  326. max_name_len = max(len(name) for name in results)
  327. for name, result in results.items():
  328. status = "✅" if result.passed else "❌"
  329. if result.expected_failure:
  330. status = "⚠️" if result.passed else "✓" # Expected failure case
  331. print(f"{status} {name.ljust(max_name_len)} {result.duration:.2f}s")
  332. if result.error:
  333. print(f" REQUEST ERROR: {result.error}")
  334. if result.validation_error:
  335. print(f" VALIDATION ERROR: {result.validation_error}")
  336. if result.result and not result.passed:
  337. print(f" STATUS: {result.result.status}")
  338. if result.result.stderr:
  339. print(f" STDERR: {result.result.stderr[:200]}...")
  340. if result.result.detail:
  341. print(f" DETAIL: {result.result.detail}")
  342. passed = sum(1 for r in results.values() if ((not r.expected_failure and r.passed) or (r.expected_failure and not r.passed)))
  343. failed = len(results) - passed
  344. print("\n=== 📊 Statistics ===")
  345. print(f"✅ Passed: {passed}")
  346. print(f"❌ Failed: {failed}")
  347. print(f"📌 Total: {len(results)}")
  348. def main():
  349. print(f"🔐 Starting sandbox security tests (API: {API_URL})")
  350. print(f"🚀 Concurrent threads: {MAX_WORKERS}")
  351. test_cases = get_test_cases()
  352. results = {}
  353. with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
  354. futures = {}
  355. for name, detail in test_cases.items():
  356. # ✅ Log when a task is submitted
  357. print(f"✅ Task submitted: {name}")
  358. time.sleep(0.4)
  359. future = executor.submit(execute_single_test, name, detail["code"], detail["language"], detail["arguments"], detail["should_fail"])
  360. futures[future] = name
  361. print("\n=== 🚦 Test Progress ===")
  362. for i, future in enumerate(as_completed(futures)):
  363. name = futures[future]
  364. print(f" {i + 1}/{len(test_cases)} completed: {name}")
  365. try:
  366. results[name] = future.result()
  367. except Exception as e:
  368. print(f"⚠️ Test {name} execution exception: {str(e)}")
  369. results[name] = TestResult(name=name, passed=False, duration=0, error=f"Execution exception: {str(e)}")
  370. print_test_report(results)
  371. if any(not r.passed and not r.expected_failure for r in results.values()):
  372. exit(1)
  373. if __name__ == "__main__":
  374. main()