diff --git a/txt2mobi/txt2html.py b/txt2mobi/txt2html.py index d548e0a..196899a 100644 --- a/txt2mobi/txt2html.py +++ b/txt2mobi/txt2html.py @@ -151,29 +151,34 @@ def __is_chapter_title(self, line): if re.match(self.title_filter, strip_line): return True else: - if line.strip().startswith(u'第'): - if 3 < len(line.strip()) < 30 and u"第" in line and u"章" in line: - return True - if line.strip().startswith(u'第'): - if 3 < len(line.strip()) < 30 and u"第" in line and u"张" in line: - return True - if line.strip().startswith(u'正文 第'): - if 3 < len(line.strip()) < 30 and u"第" in line and u"章" in line: - return True - line = line.replace(u".", u".").replace(u":", u".") - if line.split('.')[0].isdigit(): - if 3 < len(line.strip()) < 20: - return True - if len(line) < 20 and (line.strip()[:3].isdigit() or line.strip()[:4].isdigit()): - return True - if len(line) < 40 and u"第" in line and u"卷" in line: - if line[line.index(u"第") + 1: line.index(u"卷")] in [u"一", u"二", u"三", u"四", u"五", u"六", u"七", u"八", u"九", u"十"]: - return True - if line.strip().startswith(u'[第'): - if 3 < len(line.strip()) < 30 and u"第" in line and u"章" in line: - return True + flag = "content" + for ch in line: + if flag == "content" and ch == u"第": + flag = "starttitle" + continue + + if flag == "starttitle": + if (ch == " " or ch == u" "): + flag = "starttitle" + continue + elif re.match(u"([0-9一二三四五六七八九十]+)", ch): + flag = "number" + continue + else: + flag = "content" - return False + if flag == "number" : + if re.match(u"([0-9一二三四五六七八九十]+)", ch): + flag = "number" + continue + elif (ch == " " or ch == u" "): + flag = "number" + continue + if (ch == u"张" or ch == u"章" or ch == u"卷"): + return True + else: + flag = "content" + return False def process_lines(self, lines): """