/*
 * Decompiled with CFR 0.152.
 */
package org.apache.tika.parser.mbox;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Collections;
import java.util.Date;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class MboxParser
extends AbstractParser {
    private static final long serialVersionUID = -1762689436731160661L;
    private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("mbox"));
    public static final String MBOX_MIME_TYPE = "application/mbox";
    public static final String MBOX_RECORD_DIVIDER = "From ";
    private static final Pattern EMAIL_HEADER_PATTERN = Pattern.compile("([^ ]+):[ \t]*(.*)");
    private static final Pattern EMAIL_ADDRESS_PATTERN = Pattern.compile("<(.*@.*)>");
    private static final String EMAIL_HEADER_METADATA_PREFIX = "MboxParser-";
    private static final String EMAIL_FROMLINE_METADATA = "MboxParser-from";

    @Override
    public Set<MediaType> getSupportedTypes(ParseContext context) {
        return SUPPORTED_TYPES;
    }

    @Override
    public void parse(InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, TikaException, SAXException {
        InputStreamReader isr;
        try {
            isr = new InputStreamReader(stream, "US-ASCII");
        }
        catch (UnsupportedEncodingException e) {
            throw new TikaException("US-ASCII is not supported!", e);
        }
        BufferedReader reader = new BufferedReader(isr);
        metadata.set("Content-Type", MBOX_MIME_TYPE);
        metadata.set("Content-Encoding", "us-ascii");
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();
        ParseStates parseState = ParseStates.START;
        String multiLine = null;
        boolean inQuote = false;
        int numEmails = 0;
        String curLine = reader.readLine();
        while (curLine != null) {
            boolean newMessage = curLine.startsWith(MBOX_RECORD_DIVIDER);
            if (newMessage) {
                ++numEmails;
            }
            switch (parseState) {
                case START: {
                    if (!newMessage) break;
                    parseState = ParseStates.IN_HEADER;
                    newMessage = false;
                }
                case IN_HEADER: {
                    if (newMessage) {
                        this.saveHeaderInMetadata(numEmails, metadata, multiLine);
                        multiLine = curLine;
                        break;
                    }
                    if (curLine.length() == 0) {
                        this.saveHeaderInMetadata(numEmails, metadata, multiLine);
                        parseState = ParseStates.IN_CONTENT;
                        xhtml.startElement("div", "class", "email-entry");
                        xhtml.startElement("p");
                        inQuote = false;
                        break;
                    }
                    if (curLine.startsWith(" ") || curLine.startsWith("\t")) {
                        multiLine = multiLine + " " + curLine.trim();
                        break;
                    }
                    this.saveHeaderInMetadata(numEmails, metadata, multiLine);
                    multiLine = curLine;
                    break;
                }
                case IN_CONTENT: {
                    if (newMessage) {
                        this.endMessage(xhtml, inQuote);
                        parseState = ParseStates.IN_HEADER;
                        multiLine = curLine;
                        break;
                    }
                    boolean quoted = curLine.startsWith(">");
                    if (inQuote) {
                        if (!quoted) {
                            xhtml.endElement("q");
                            inQuote = false;
                        }
                    } else if (quoted) {
                        xhtml.startElement("q");
                        inQuote = true;
                    }
                    xhtml.characters(curLine);
                    xhtml.element("br", "");
                }
            }
            curLine = reader.readLine();
        }
        if (parseState == ParseStates.IN_HEADER) {
            this.saveHeaderInMetadata(numEmails, metadata, multiLine);
        } else if (parseState == ParseStates.IN_CONTENT) {
            this.endMessage(xhtml, inQuote);
        }
        xhtml.endDocument();
    }

    private void endMessage(XHTMLContentHandler xhtml, boolean inQuote) throws SAXException {
        if (inQuote) {
            xhtml.endElement("q");
        }
        xhtml.endElement("p");
        xhtml.endElement("div");
    }

    private void saveHeaderInMetadata(int numEmails, Metadata metadata, String curLine) {
        if (curLine == null || numEmails > 1) {
            return;
        }
        if (curLine.startsWith(MBOX_RECORD_DIVIDER)) {
            metadata.add(EMAIL_FROMLINE_METADATA, curLine.substring(MBOX_RECORD_DIVIDER.length()));
            return;
        }
        Matcher headerMatcher = EMAIL_HEADER_PATTERN.matcher(curLine);
        if (!headerMatcher.matches()) {
            return;
        }
        String headerTag = headerMatcher.group(1).toLowerCase();
        String headerContent = headerMatcher.group(2);
        if (headerTag.equalsIgnoreCase("From")) {
            metadata.set(TikaCoreProperties.CREATOR, headerContent);
        } else if (headerTag.equalsIgnoreCase("To") || headerTag.equalsIgnoreCase("Cc") || headerTag.equalsIgnoreCase("Bcc")) {
            Matcher address = EMAIL_ADDRESS_PATTERN.matcher(headerContent);
            if (address.find()) {
                metadata.add("Message-Recipient-Address", address.group(1));
            } else if (headerContent.indexOf(64) > -1) {
                metadata.add("Message-Recipient-Address", headerContent);
            }
            String property = "Message-To";
            if (headerTag.equalsIgnoreCase("Cc")) {
                property = "Message-Cc";
            } else if (headerTag.equalsIgnoreCase("Bcc")) {
                property = "Message-Bcc";
            }
            metadata.add(property, headerContent);
        } else if (headerTag.equalsIgnoreCase("Subject")) {
            metadata.add(TikaCoreProperties.TRANSITION_SUBJECT_TO_DC_TITLE, headerContent);
        } else if (headerTag.equalsIgnoreCase("Date")) {
            try {
                Date date = MboxParser.parseDate(headerContent);
                metadata.set(TikaCoreProperties.CREATED, date);
            }
            catch (ParseException e) {}
        } else if (headerTag.equalsIgnoreCase("Message-Id")) {
            metadata.set(TikaCoreProperties.IDENTIFIER, headerContent);
        } else if (headerTag.equalsIgnoreCase("In-Reply-To")) {
            metadata.set(TikaCoreProperties.RELATION, headerContent);
        } else if (headerTag.equalsIgnoreCase("Content-Type")) {
            metadata.add("Content-Type", headerContent);
            metadata.set(TikaCoreProperties.FORMAT, headerContent);
        } else {
            metadata.add(EMAIL_HEADER_METADATA_PREFIX + headerTag, headerContent);
        }
    }

    public static Date parseDate(String headerContent) throws ParseException {
        SimpleDateFormat dateFormat = new SimpleDateFormat("EEE, d MMM yyyy HH:mm:ss Z", Locale.US);
        return dateFormat.parse(headerContent);
    }

    private static enum ParseStates {
        START,
        IN_HEADER,
        IN_CONTENT;

    }
}

