Эх сурвалжийг харах

Improve chapter recognition (#8050)

stevenyomi 2 жил өмнө
parent
commit
4b4a138eee

+ 22 - 43
app/src/main/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognition.kt

@@ -4,35 +4,31 @@ package eu.kanade.tachiyomi.util.chapter
  * -R> = regex conversion.
  */
 object ChapterRecognition {
+
+    private const val NUMBER_PATTERN = """([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?"""
+
     /**
      * All cases with Ch.xx
      * Mokushiroku Alice Vol.1 Ch. 4: Misrepresentation -R> 4
      */
-    private val basic = Regex("""(?<=ch\.) *([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
+    private val basic = Regex("""(?<=ch\.) *$NUMBER_PATTERN""")
 
     /**
-     * Regex used when only one number occurrence
      * Example: Bleach 567: Down With Snowwhite -R> 567
      */
-    private val occurrence = Regex("""([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
-
-    /**
-     * Regex used when manga title removed
-     * Example: Solanin 028 Vol. 2 -> 028 Vol.2 -> 028Vol.2 -R> 028
-     */
-    private val withoutManga = Regex("""^([0-9]+)(\.[0-9]+)?(\.?[a-z]+)?""")
+    private val number = Regex(NUMBER_PATTERN)
 
     /**
      * Regex used to remove unwanted tags
      * Example Prison School 12 v.1 vol004 version1243 volume64 -R> Prison School 12
      */
-    private val unwanted = Regex("""(?<![a-z])(v|ver|vol|version|volume|season|s).?[0-9]+""")
+    private val unwanted = Regex("""\b(?:v|ver|vol|version|volume|season|s)[^a-z]?[0-9]+""")
 
     /**
      * Regex used to remove unwanted whitespace
      * Example One Piece 12 special -R> One Piece 12special
      */
-    private val unwantedWhiteSpace = Regex("""(\s)(extra|special|omake)""")
+    private val unwantedWhiteSpace = Regex("""\s(?=extra|special|omake)""")
 
     fun parseChapterNumber(mangaTitle: String, chapterName: String, chapterNumber: Float? = null): Float {
         // If chapter number is known return.
@@ -43,40 +39,23 @@ object ChapterRecognition {
         // Get chapter title with lower case
         var name = chapterName.lowercase()
 
+        // Remove manga title from chapter title.
+        name = name.replace(mangaTitle.lowercase(), "").trim()
+
         // Remove comma's or hyphens.
         name = name.replace(',', '.').replace('-', '.')
 
         // Remove unwanted white spaces.
-        unwantedWhiteSpace.findAll(name).let {
-            it.forEach { occurrence -> name = name.replace(occurrence.value, occurrence.value.trim()) }
-        }
+        name = unwantedWhiteSpace.replace(name, "")
 
         // Remove unwanted tags.
-        unwanted.findAll(name).let {
-            it.forEach { occurrence -> name = name.replace(occurrence.value, "") }
-        }
+        name = unwanted.replace(name, "")
 
         // Check base case ch.xx
-        getChapterNumberFromMatch(basic.find(name))?.let { return it }
-
-        // Check one number occurrence.
-        val occurrences: MutableList<MatchResult> = arrayListOf()
-        occurrence.findAll(name).let {
-            it.forEach { occurrence -> occurrences.add(occurrence) }
-        }
-
-        if (occurrences.size == 1) {
-            getChapterNumberFromMatch(occurrences[0])?.let { return it }
-        }
-
-        // Remove manga title from chapter title.
-        val nameWithoutManga = name.replace(mangaTitle.lowercase(), "").trim()
-
-        // Check if first value is number after title remove.
-        getChapterNumberFromMatch(withoutManga.find(nameWithoutManga))?.let { return it }
+        basic.find(name)?.let { return getChapterNumberFromMatch(it) }
 
         // Take the first number encountered.
-        getChapterNumberFromMatch(occurrence.find(nameWithoutManga))?.let { return it }
+        number.find(name)?.let { return getChapterNumberFromMatch(it) }
 
         return chapterNumber ?: -1f
     }
@@ -86,8 +65,8 @@ object ChapterRecognition {
      * @param match result of regex
      * @return chapter number if found else null
      */
-    private fun getChapterNumberFromMatch(match: MatchResult?): Float? {
-        return match?.let {
+    private fun getChapterNumberFromMatch(match: MatchResult): Float {
+        return match.let {
             val initial = it.groups[1]?.value?.toFloat()!!
             val subChapterDecimal = it.groups[2]?.value
             val subChapterAlpha = it.groups[3]?.value
@@ -120,11 +99,9 @@ object ChapterRecognition {
                 return .97f
             }
 
-            return if (alpha[0] == '.') {
-                // Take value after (.)
-                parseAlphaPostFix(alpha[1])
-            } else {
-                parseAlphaPostFix(alpha[0])
+            val trimmedAlpha = alpha.trimStart('.')
+            if (trimmedAlpha.length == 1) {
+                return parseAlphaPostFix(trimmedAlpha[0])
             }
         }
 
@@ -135,6 +112,8 @@ object ChapterRecognition {
      * x.a -> x.1, x.b -> x.2, etc
      */
     private fun parseAlphaPostFix(alpha: Char): Float {
-        return ("0." + (alpha.code - 96).toString()).toFloat()
+        val number = alpha.code - ('a'.code - 1)
+        if (number >= 10) return 0f
+        return number / 10f
     }
 }

+ 10 - 3
app/src/test/java/eu/kanade/tachiyomi/util/chapter/ChapterRecognitionTest.kt

@@ -198,7 +198,6 @@ class ChapterRecognitionTest {
 
         assertChapter(mangaTitle, "Fairy Tail 404.extravol002", 404.99f)
         assertChapter(mangaTitle, "Fairy Tail 404 extravol002", 404.99f)
-        assertChapter(mangaTitle, "Fairy Tail 404.evol002", 404.5f)
     }
 
     @Test
@@ -207,7 +206,6 @@ class ChapterRecognitionTest {
 
         assertChapter(mangaTitle, "Fairy Tail 404.omakevol002", 404.98f)
         assertChapter(mangaTitle, "Fairy Tail 404 omakevol002", 404.98f)
-        assertChapter(mangaTitle, "Fairy Tail 404.ovol002", 404.15f)
     }
 
     @Test
@@ -216,7 +214,6 @@ class ChapterRecognitionTest {
 
         assertChapter(mangaTitle, "Fairy Tail 404.specialvol002", 404.97f)
         assertChapter(mangaTitle, "Fairy Tail 404 specialvol002", 404.97f)
-        assertChapter(mangaTitle, "Fairy Tail 404.svol002", 404.19f)
     }
 
     @Test
@@ -253,6 +250,16 @@ class ChapterRecognitionTest {
         assertChapter("One Outs", "One Outs 001", 1f)
     }
 
+    @Test
+    fun `Chapters containing ordinals`() {
+        val mangaTitle = "The Sister of the Woods with a Thousand Young"
+
+        assertChapter(mangaTitle, "The 1st Night", 1f)
+        assertChapter(mangaTitle, "The 2nd Night", 2f)
+        assertChapter(mangaTitle, "The 3rd Night", 3f)
+        assertChapter(mangaTitle, "The 4th Night", 4f)
+    }
+
     private fun assertChapter(mangaTitle: String, name: String, expected: Float) {
         val chapterNumber = ChapterRecognition.parseChapterNumber(mangaTitle, name)
         assertEquals(chapterNumber, expected)