From 50d03b1815bc57b989fc6c7baf56503392acc658 Mon Sep 17 00:00:00 2001
From: jlugjb <xiaogong@sohu.com>
Date: Fri, 28 Apr 2017 16:51:28 +0800
Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E4=B8=AD=E9=97=B4=E6=9C=89?=
 =?UTF-8?q?=E5=A4=9A=E4=B8=AA=E7=A9=BA=E6=A0=BC=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

print __is_chapter_title(u"正文  第647章 战战和和")
使用状态机，代码更容易读和调试
---
 txt2mobi/txt2html.py | 49 ++++++++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 22 deletions(-)

diff --git a/txt2mobi/txt2html.py b/txt2mobi/txt2html.py
index d548e0a..196899a 100644
--- a/txt2mobi/txt2html.py
+++ b/txt2mobi/txt2html.py
@@ -151,29 +151,34 @@ def __is_chapter_title(self, line):
                 if re.match(self.title_filter, strip_line):
                     return True
         else:
-            if line.strip().startswith(u'第'):
-                if 3 < len(line.strip()) < 30 and u"第" in line and u"章" in line:
-                    return True
-            if line.strip().startswith(u'第'):
-                if 3 < len(line.strip()) < 30 and u"第" in line and u"张" in line:
-                    return True
-            if line.strip().startswith(u'正文 第'):
-                if 3 < len(line.strip()) < 30 and u"第" in line and u"章" in line:
-                    return True
-            line = line.replace(u"．", u".").replace(u":", u".")
-            if line.split('.')[0].isdigit():
-                if 3 < len(line.strip()) < 20:
-                    return True
-            if len(line) < 20 and (line.strip()[:3].isdigit() or line.strip()[:4].isdigit()):
-                return True
-            if len(line) < 40 and u"第" in line and u"卷" in line:
-                if line[line.index(u"第") + 1: line.index(u"卷")] in [u"一", u"二", u"三", u"四", u"五", u"六", u"七", u"八", u"九", u"十"]:
-                    return True
-            if line.strip().startswith(u'[第'):
-                if 3 < len(line.strip()) < 30 and u"第" in line and u"章" in line:
-                    return True
+            flag = "content"
+            for ch in line:
+                if flag == "content" and ch == u"第":
+                    flag = "starttitle"
+                    continue
+
+                if flag == "starttitle":
+                    if (ch == " " or ch == u" "):
+                        flag = "starttitle"
+                        continue
+                    elif  re.match(u"([0-9一二三四五六七八九十]+)", ch):
+                        flag = "number"
+                        continue
+                    else:
+                        flag = "content"
 
-        return False
+                if flag == "number" :
+                    if re.match(u"([0-9一二三四五六七八九十]+)", ch):
+                        flag = "number"
+                        continue
+                    elif  (ch == " " or ch == u" "):
+                        flag = "number"
+                        continue
+                    if  (ch == u"张" or ch == u"章" or ch == u"卷"):
+                        return True
+                    else:
+                        flag = "content"
+        return  False
 
     def process_lines(self, lines):
         """