package ru.yandex.webmaster3.core.util;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

import net.htmlparser.jericho.EndTag;
import net.htmlparser.jericho.Segment;
import net.htmlparser.jericho.Source;
import net.htmlparser.jericho.StartTag;

/**
 * @author aherman
 */
public class HtmlUtils {
    public static List<MetaTag> extractMetaTags(String html) throws IOException {
        Source source = new Source(new StringReader(html));
        boolean inHeadTag = false;
        List<MetaTag> metaTags = new ArrayList<>();
        for (Segment segment : source) {
            if (segment instanceof StartTag) {
                StartTag startTag = (StartTag) segment;
                if ("body".equalsIgnoreCase(startTag.getName())) {
                    break;
                } else if ("head".equalsIgnoreCase(startTag.getName())) {
                    inHeadTag = true;
                } else if (inHeadTag && "meta".equalsIgnoreCase(startTag.getName())) {
                    String name = startTag.getAttributeValue("name");
                    String contentValue = startTag.getAttributeValue("content");
                    metaTags.add(new MetaTag(name, contentValue, startTag.tidy(false)));
                }
            } else if (segment instanceof EndTag) {
                if ("head".equalsIgnoreCase(((EndTag) segment).getName())) {
                    break;
                }
            }
        }

        return metaTags;
    }

    public static class MetaTag {
        private final String name;
        private final String content;
        private final String tagHtml;

        public MetaTag(String name, String content, String tagHtml) {
            this.name = name;
            this.content = content;
            this.tagHtml = tagHtml;
        }

        public String getName() {
            return name;
        }

        public String getContent() {
            return content;
        }

        public String getTagHtml() {
            return tagHtml;
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }

            MetaTag metaTag = (MetaTag) o;

            if (name != null ? !name.equals(metaTag.name) : metaTag.name != null) {
                return false;
            }
            if (content != null ? !content.equals(metaTag.content) : metaTag.content != null) {
                return false;
            }
            return tagHtml != null ? tagHtml.equals(metaTag.tagHtml) : metaTag.tagHtml == null;

        }

        @Override
        public int hashCode() {
            int result = name != null ? name.hashCode() : 0;
            result = 31 * result + (content != null ? content.hashCode() : 0);
            result = 31 * result + (tagHtml != null ? tagHtml.hashCode() : 0);
            return result;
        }
    }
}
