Эх сурвалжийг харах

Pull basic manga/chapter metadata from EPUB files. (#2962)

* Fill manga and chapter metadata with EPUB metadata where appropriate.

* Use fuzzy-match to remove manga titles from chapter names.

This allows removing manga titles from the metadata title of local EPUB chapters with characters that can't be in the manga directory's name due to filesystem limitations.

* Use more standard way of parsing EPUB dates.

* Use date format compatible with lower Android versions for EPUBs.
Steven Smith 5 жил өмнө
parent
commit
305ee3c12e

+ 58 - 8
app/src/main/java/eu/kanade/tachiyomi/source/LocalSource.kt

@@ -112,12 +112,20 @@ class LocalSource(private val context: Context) : CatalogueSource {
                     }
                     }
                 }
                 }
 
 
-                // Copy the cover from the first chapter found.
-                if (thumbnail_url == null) {
-                    val chapters = fetchChapterList(this).toBlocking().first()
-                    if (chapters.isNotEmpty()) {
+                val chapters = fetchChapterList(this).toBlocking().first()
+                if (chapters.isNotEmpty()) {
+                    val chapter = chapters.last()
+                    val format = getFormat(chapter)
+                    if (format is Format.Epub) {
+                        EpubFile(format.file).use { epub ->
+                            epub.fillMangaMetadata(this)
+                        }
+                    }
+
+                    // Copy the cover from the first chapter found.
+                    if (thumbnail_url == null) {
                         try {
                         try {
-                            val dest = updateCover(chapters.last(), this)
+                            val dest = updateCover(chapter, this)
                             thumbnail_url = dest?.absolutePath
                             thumbnail_url = dest?.absolutePath
                         } catch (e: Exception) {
                         } catch (e: Exception) {
                             Timber.e(e)
                             Timber.e(e)
@@ -158,14 +166,22 @@ class LocalSource(private val context: Context) : CatalogueSource {
             .map { chapterFile ->
             .map { chapterFile ->
                 SChapter.create().apply {
                 SChapter.create().apply {
                     url = "${manga.url}/${chapterFile.name}"
                     url = "${manga.url}/${chapterFile.name}"
-                    val chapName = if (chapterFile.isDirectory) {
+                    name = if (chapterFile.isDirectory) {
                         chapterFile.name
                         chapterFile.name
                     } else {
                     } else {
                         chapterFile.nameWithoutExtension
                         chapterFile.nameWithoutExtension
                     }
                     }
-                    val chapNameCut = chapName.replace(manga.title, "", true).trim(' ', '-', '_')
-                    name = if (chapNameCut.isEmpty()) chapName else chapNameCut
                     date_upload = chapterFile.lastModified()
                     date_upload = chapterFile.lastModified()
+
+                    val format = getFormat(this)
+                    if (format is Format.Epub) {
+                        EpubFile(format.file).use { epub ->
+                            epub.fillChapterMetadata(this)
+                        }
+                    }
+
+                    val chapNameCut = stripMangaTitle(name, manga.title)
+                    if (chapNameCut.isNotEmpty()) name = chapNameCut
                     ChapterRecognition.parseChapterNumber(this, manga)
                     ChapterRecognition.parseChapterNumber(this, manga)
                 }
                 }
             }
             }
@@ -180,6 +196,40 @@ class LocalSource(private val context: Context) : CatalogueSource {
         return Observable.just(chapters)
         return Observable.just(chapters)
     }
     }
 
 
+    /**
+     * Strips the manga title from a chapter name, matching only based on alphanumeric and whitespace
+     * characters.
+     */
+    private fun stripMangaTitle(chapterName: String, mangaTitle: String): String {
+        var chapterNameIndex = 0
+        var mangaTitleIndex = 0
+        while (chapterNameIndex < chapterName.length && mangaTitleIndex < mangaTitle.length) {
+            val chapterChar = chapterName.get(chapterNameIndex)
+            val mangaChar = mangaTitle.get(mangaTitleIndex)
+            if (!chapterChar.equals(mangaChar, true)) {
+                val invalidChapterChar = !chapterChar.isLetterOrDigit() && !chapterChar.isWhitespace()
+                val invalidMangaChar = !mangaChar.isLetterOrDigit() && !mangaChar.isWhitespace()
+
+                if (!invalidChapterChar && !invalidMangaChar) {
+                    return chapterName
+                }
+
+                if (invalidChapterChar) {
+                    chapterNameIndex++
+                }
+
+                if (invalidMangaChar) {
+                    mangaTitleIndex++
+                }
+            } else {
+                chapterNameIndex++
+                mangaTitleIndex++
+            }
+        }
+
+        return chapterName.substring(chapterNameIndex).trimStart(' ', '-', '_', ',', ':')
+    }
+
     override fun fetchPageList(chapter: SChapter): Observable<List<Page>> {
     override fun fetchPageList(chapter: SChapter): Observable<List<Page>> {
         return Observable.error(Exception("Unused"))
         return Observable.error(Exception("Unused"))
     }
     }

+ 57 - 0
app/src/main/java/eu/kanade/tachiyomi/util/storage/EpubFile.kt

@@ -1,8 +1,13 @@
 package eu.kanade.tachiyomi.util.storage
 package eu.kanade.tachiyomi.util.storage
 
 
+import eu.kanade.tachiyomi.source.model.SChapter
+import eu.kanade.tachiyomi.source.model.SManga
 import java.io.Closeable
 import java.io.Closeable
 import java.io.File
 import java.io.File
 import java.io.InputStream
 import java.io.InputStream
+import java.text.ParseException
+import java.text.SimpleDateFormat
+import java.util.Locale
 import java.util.zip.ZipEntry
 import java.util.zip.ZipEntry
 import java.util.zip.ZipFile
 import java.util.zip.ZipFile
 import org.jsoup.Jsoup
 import org.jsoup.Jsoup
@@ -44,6 +49,58 @@ class EpubFile(file: File) : Closeable {
         return zip.getEntry(name)
         return zip.getEntry(name)
     }
     }
 
 
+    /**
+     * Fills manga metadata using this epub file's metadata.
+     */
+    fun fillMangaMetadata(manga: SManga) {
+        val ref = getPackageHref()
+        val doc = getPackageDocument(ref)
+
+        val creator = doc.getElementsByTag("dc:creator").first()
+        val description = doc.getElementsByTag("dc:description").first()
+
+        manga.author = creator?.text()
+        manga.description = description?.text()
+    }
+
+    /**
+     * Fills chapter metadata using this epub file's metadata.
+     */
+    fun fillChapterMetadata(chapter: SChapter) {
+        val ref = getPackageHref()
+        val doc = getPackageDocument(ref)
+
+        val title = doc.getElementsByTag("dc:title").first()
+        val publisher = doc.getElementsByTag("dc:publisher").first()
+        val creator = doc.getElementsByTag("dc:creator").first()
+        var date = doc.getElementsByTag("dc:date").first()
+        if (date == null) {
+            date = doc.select("meta[property=dcterms:modified]").first()
+        }
+
+        if (title != null) {
+            chapter.name = title.text()
+        }
+
+        if (publisher != null) {
+            chapter.scanlator = publisher.text()
+        } else if (creator != null) {
+            chapter.scanlator = creator.text()
+        }
+
+        if (date != null) {
+            val dateFormat = SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ", Locale.getDefault())
+            try {
+                val parsedDate = dateFormat.parse(date.text())
+                if (parsedDate != null) {
+                    chapter.date_upload = parsedDate.time
+                }
+            } catch (e: ParseException) {
+                // Empty
+            }
+        }
+    }
+
     /**
     /**
      * Returns the path of all the images found in the epub file.
      * Returns the path of all the images found in the epub file.
      */
      */