|
1 | 1 | import logging |
2 | 2 | import re |
| 3 | +from pathlib import Path |
3 | 4 | from typing import List, Tuple |
4 | 5 |
|
5 | | -from docx.document import Document as _Document |
| 6 | +from common import LINE_RE |
6 | 7 | from docx import Document |
| 8 | +from docx.document import Document as _Document |
| 9 | +from docx.oxml import CT_SectPr |
7 | 10 | from docx.oxml.table import CT_Tbl |
8 | 11 | from docx.oxml.text.paragraph import CT_P |
9 | | -from docx.oxml import CT_SectPr |
10 | 12 | from docx.table import Table, _Cell, _Row |
11 | 13 | from docx.text.paragraph import Paragraph |
12 | 14 | from parsers.base import Parser |
13 | | -from common import LINE_RE |
14 | 15 |
|
15 | 16 | logger = logging.getLogger(__name__) |
16 | 17 |
|
@@ -49,12 +50,14 @@ def iter_block_items(self, parent): |
49 | 50 | yield Table(child, parent) |
50 | 51 |
|
51 | 52 | def parse(self, result, detail) -> Tuple[str, str, List[str]]: |
52 | | - document = self.request.get_word(detail["path"]) |
| 53 | + level = result["level"].strip() |
| 54 | + title = result["title"].strip() |
| 55 | + |
| 56 | + document = self.request.get_word(detail["path"], Path(level) / title) |
53 | 57 | if not document: |
54 | 58 | logger.warning(f"document {detail['path']} not exists") |
55 | 59 | return |
56 | 60 |
|
57 | | - title = result["title"].strip() |
58 | 61 | return self.parse_document(document, title) |
59 | 62 |
|
60 | 63 | def parse_document(self, document, title): |
|
0 commit comments