### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [x] New Feature (non-breaking change which adds functionality)tags/v0.6.0
| @@ -156,7 +156,7 @@ def filename_type(filename): | |||
| return FileType.PDF.value | |||
| if re.match( | |||
| r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md)$", filename): | |||
| r".*\.(doc|docx|ppt|pptx|yml|xml|htm|json|csv|txt|ini|xls|xlsx|wps|rtf|hlp|pages|numbers|key|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename): | |||
| return FileType.DOC.value | |||
| if re.match( | |||
| @@ -136,7 +136,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, | |||
| excel_parser = ExcelParser() | |||
| sections = [(excel_parser.html(binary), "")] | |||
| elif re.search(r"\.(txt|md)$", filename, re.IGNORECASE): | |||
| elif re.search(r"\.(txt|md|py|js|java|c|cpp|h|php|go|ts|sh|cs|kt)$", filename, re.IGNORECASE): | |||
| callback(0.1, "Start to parse.") | |||
| txt = "" | |||
| if binary: | |||