Utils.kt

/*
 * This file is part of the pl.wrzasq.commons.
 *
 * @license http://mit-license.org/ The MIT license
 * @copyright 2015 - 2016, 2018 - 2019, 2021 © by Rafał Wrzeszcz - Wrzasq.pl.
 */

package pl.wrzasq.commons.text.html

import pl.wrzasq.commons.text.TextProcessingException
import java.net.URLEncoder
import pl.wrzasq.commons.text.Formatter
import java.nio.charset.StandardCharsets
import java.util.regex.Pattern

private const val SUFFIX_DEFAULT = "…"
private val WORDBOUND_PATTERN = Pattern.compile("\\S\\s+\\S*?$", Pattern.UNICODE_CASE)
private val REGEX_FIRSTPARAGRAPH = Pattern.compile(
    "<p(?: [^>]*)?>(.*?)</p>",
    Pattern.DOTALL
)

/**
 * Various HTML text processing utilities.
 */
object Utils {
    private var formatter = Formatter()

    /**
     * Fetches first paragraph of text.
     *
     * @param text HTML snippet.
     * @return First paragraph.
     */
    fun firstParagraph(text: String): String {
        val match = REGEX_FIRSTPARAGRAPH.matcher(text)
        return if (match.find()) match.group(1) else ""
    }

    /**
     * Truncates text.
     *
     * @param text Text to be truncated.
     * @param length Maximum text length.
     * @param suffix Suffix to be used at the end of truncated text.
     * @param bound Whether to look for word end or not.
     * @return Translated message.
     */
    fun truncate(text: String, length: Int, suffix: String, bound: Boolean): String {
        // nothing to do here
        if (text.length <= length) {
            return text
        }

        // look for last possible word
        var position = length
        if (bound) {
            // look for last word-break
            val part = text.substring(0, length + 2)
            val matcher = WORDBOUND_PATTERN.matcher(part)
            if (matcher.find()) {
                // we add 1 as second parameter is exclusive
                position = matcher.start() + 1
            }
        }
        return text.substring(0, position).trim() + suffix
    }

    /**
     * Truncates text.
     *
     * @param text Text to be truncated.
     * @param length Maximum text length.
     * @param suffix Suffix to be used at the end of truncated text.
     * @return Translated message.
     */
    fun truncate(text: String, length: Int, suffix: String): String = truncate(text, length, suffix, true)

    /**
     * Truncates text.
     *
     * @param text Text to be truncated.
     * @param length Maximum text length.
     * @param bound Whether to look for word end or not.
     * @return Truncated text.
     */
    fun truncate(text: String, length: Int, bound: Boolean): String = truncate(text, length, SUFFIX_DEFAULT, bound)

    /**
     * Truncates text.
     *
     * @param text Text to be truncated.
     * @param length Maximum text length.
     * @return Truncated text.
     */
    fun truncate(text: String, length: Int): String = truncate(text, length, SUFFIX_DEFAULT, true)

    /**
     * Registers new text formatting handler.
     *
     * @param formatter Formatter.
     */
    @JvmStatic
    fun setFormatter(formatter: Formatter) {
        Utils.formatter = formatter
    }

    /**
     * Formats the text.
     *
     * @param format Format name.
     * @param text Source text.
     * @return Formatted text.
     * @throws TextProcessingException When text processing fails.
     */
    fun format(format: String, text: String): String = formatter.transform(format, text)

    /**
     * Wrapper function that encodes URLs using UTF-8 encoding.
     *
     * @param value URL part.
     * @return URL-encoded part.
     */
    fun urlEncode(value: String): String = URLEncoder.encode(value, StandardCharsets.UTF_8)
}