package ru.yandex.webmaster3.core.semantic.data_reactor_common.impl.processors.markup;

import ru.yandex.webmaster3.core.semantic.data_reactor_common.Entity;
import ru.yandex.webmaster3.core.semantic.data_reactor_common.EntityProcessor;
import ru.yandex.webmaster3.core.semantic.data_reactor_common.impl.BaseMutableEntity;
import ru.yandex.webmaster3.core.semantic.data_reactor_common.impl.attrs.*;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.MicrodataUtils;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.data.ComplexMicrodata;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.data.Microdata;
import ru.yandex.webmaster3.core.semantic.semantic_document_parser.microdata.data.TextMicrodata;

import java.util.*;

/**
 * Created by aleksart on 25.12.13.
 */
public class GetMicrodataProcessor extends EntityProcessor {
    @Override
    public Collection<Entity> process(Entity e) {
        String tag = e.getTag();
        if (tag.equals("httpDocument")) {
            byte[] doc = ((BlobAttrValue) e.getFirstOrNull("data")).byteArray;
            String charset = ((StringAttrValue) e.getFirstOrNull("charset")).getContent();
            String lang = e.getStringValue("lang");
            //String url = ((StringAttrValue) e.getFirstOrNull("url")).getContent();
            String url = e.getUrl();
            List<Microdata> data = MicrodataUtils.extractMD(doc, url, charset, true, false);
            List<Entity> entityList = new ArrayList<Entity>();
            for (Microdata card : data) {
                entityList.add(extractEntityFromMicrodata(card, url,lang, true));
            }
            return entityList;
        }
        return Collections.EMPTY_LIST;
    }

    private Entity extractEntityFromMicrodata(Microdata card, String url, String lang, boolean is_root) {

        BaseMutableEntity e;
        if (is_root) {
            e = new BaseMutableEntity("microdata",url);
        } else {
            e = new BaseMutableEntity((String)null,url);
        }
        if (card instanceof ComplexMicrodata) {
            Set<String> props = ((ComplexMicrodata) card).getPropList();
            e.setValue("@itemtype", new StringAttrValue(((ComplexMicrodata) card).getType()));
            e.setValue("itemlang", new StringAttrValue(lang));
            for (String prop : props) {
                List<Microdata> subCards = ((ComplexMicrodata) card).getPropAsList(prop);
                for (Microdata sCard : subCards) {

                    e.appendValue(prop, new EntityAttrValue(extractEntityFromMicrodata(sCard, url, lang, false)));
                }
            }
        } else if (card instanceof TextMicrodata) {
            e.setValue("data", new StringAttrValue(((TextMicrodata) card).data));
            e.setValue("href", new StringAttrValue(((TextMicrodata) card).href));
            e.setValue("html", new StringAttrValue(((TextMicrodata) card).html));
        }

        return e;
    }
}
