// Created on 18.02.2006
package biz.junginger.newsfeed.util;

/**
 * (C) Copyright 2004-2008 Markus Junginger.
 * 
 * @author Markus Junginger
 */
/*
 * This file is part of RSS View.
 * 
 * RSS View is free software: you can redistribute it and/or modify it under the
 * terms of the GNU Lesser General Public License as published by the Free
 * Software Foundation, either version 3 of the License, or (at your option) any
 * later version.
 * 
 * RSS View is distributed in the hope that it will be useful, but WITHOUT ANY
 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
 * A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
 * details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with RSS View. If not, see <http://www.gnu.org/licenses/>.
 */
public class StringUtils
{

    public static String convertHtmlToPlainText(String str)
    {
        StringBuffer buffer = new StringBuffer(str);
        convertHtmlToPlainText(buffer);
        return buffer.toString();
    }

    public static void convertHtmlToPlainText(StringBuffer buffer)
    {
        stripHtmlBody(buffer);
        // TODO replacing like this is quite limited (case, variations...)
        replace(buffer, "<br>", "\n");
        replace(buffer, "<br/>", "\n");
        replace(buffer, "<li>", "\n * ");
        replace(buffer, "</p>", "\n");
        int index = 0;
        while (true) {
            int start = buffer.indexOf("<", index);
            int end = buffer.indexOf(">", index);
            if (start == -1 || end == -1 || start > end) {
                break;
            }
            buffer.replace(start, end + 1, "");
        }
        replace(buffer, "&amp;", "&");
        replace(buffer, "&quot;", "\"");
        replace(buffer, "&nbsp;", " ");
        replace(buffer, "\n\n\n", "\n\n");
    }

    // Get the body part of a html document
    // TODO Is there any feed needing this?? 
    private static void stripHtmlBody(StringBuffer htmlContent)
    {
     // TODO looking for body like this is quite limited (variations...)
        String lowerHtml = htmlContent.toString().toLowerCase();
        int bodyTag = lowerHtml.indexOf("<body>");
        if (bodyTag != -1) {
            int bodyStart = bodyTag+6;
            int bodyEnd = lowerHtml.indexOf("</body>", bodyStart);
            if (bodyEnd != -1) {
                htmlContent.delete(bodyEnd, htmlContent.length());
            }
            htmlContent.delete(0,bodyStart);
        }
    }

    public static String convertHtmlToPlainTextNoSpecialChars(String str)
    {
        StringBuffer buffer = new StringBuffer(str);
        convertHtmlToPlainText(buffer);
        for (int i = 0; i < buffer.length(); i++) {
            char c = buffer.charAt(i);
            if (c < ' ') {
                buffer.setCharAt(i, ' ');
            }
        }
        return buffer.toString();
    }

    public static void replace(StringBuffer buffer, String search, String replaceWith)
    {
        while (true) {
            int start = buffer.indexOf(search);
            if (start == -1) {
                break;
            }
            int end = start + search.length();
            buffer.replace(start, end, replaceWith);
        }
    }
}
