GeoRegionDataCLDR.java

package org.genesys.server.service.worker;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import javax.annotation.Resource;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.server.model.impl.Country;
import org.genesys.server.model.impl.GeoRegion;
import org.genesys.server.service.GeoService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;

@Component
public class GeoRegionDataCLDR {

    private static final String CLDR_URL = "https://unicode.org/Public/cldr/26.0.1/core.zip";

    public static final Logger LOG = LoggerFactory.getLogger(GeoRegionDataCLDR.class);

    @Autowired
    private GeoService geoService;

    @Resource
    private Set<String> supportedLocales;

    public List<GeoRegion> getGeoRegionDataCLDR() throws IOException, ParserConfigurationException, SAXException {

        ZipFile zipFile = getCoreZip();
        LOG.info("Obtained CLDR zip file: {}", zipFile);

        SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
        saxParserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

        SAXParser parser = saxParserFactory.newSAXParser();
        ParserForCLDR parserForCLDR = new ParserForCLDR();

        List<GeoRegion> geoRegionList = null;
        Map<String, Map<String, GeoRegion>> forNameJGeoRegions = new HashMap<>();
        InputStream inputStream;
        for (String language : supportedLocales) {
            String fileName = "common/main/" + language + ".xml";
            LOG.info("Examining {}", fileName);

            ZipEntry zipEntry = zipFile.getEntry(fileName);
            inputStream = zipFile.getInputStream(zipEntry);
            parser.parse(inputStream, parserForCLDR);

            if (language.equals("en")) {
                geoRegionList = assignParents(zipFile, parserForCLDR.getRegionMap());
            } else {
                forNameJGeoRegions.put(language, parserForCLDR.getRegionMap());
            }

            inputStream.close();
        }

        assignNameJ(geoRegionList, forNameJGeoRegions);

        return geoRegionList;
    }

    private void assignNameJ(List<GeoRegion> geoRegionList, Map<String, Map<String, GeoRegion>> forNameJGeoRegions) throws JsonProcessingException {

        for (GeoRegion geoRegion : geoRegionList) {

            ObjectMapper objectMapper = new ObjectMapper();

            Map<String, String> nameL = new HashMap<>();
            nameL.put("en", geoRegion.getName());
            for (Map.Entry<String, Map<String, GeoRegion>> entry : forNameJGeoRegions.entrySet()) {
                nameL.put(entry.getKey(), entry.getValue().get(geoRegion.getIsoCode()).getName());
            }

            String nameLJson = objectMapper.writeValueAsString(nameL);

            geoRegion.setNameL(nameLJson);
        }
    }

    private List<GeoRegion> assignParents(ZipFile zipFile, Map<String, GeoRegion> geoRegions) throws SAXException, ParserConfigurationException, IOException {

        SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
        saxParserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

        SAXParser parser = saxParserFactory.newSAXParser();
        SupplementalDataParser supplementalDataParser = new SupplementalDataParser();

        String fileName = "common/supplemental/supplementalData.xml";
        ZipEntry zipEntry = zipFile.getEntry(fileName);
        InputStream inputStream;
        inputStream = zipFile.getInputStream(zipEntry);
        parser.parse(inputStream, supplementalDataParser);

        for (Map.Entry<String, String> entry : supplementalDataParser.getGeoregionsCodes().entrySet()) {
            for (String isoCode : entry.getValue().split(" ")) {
                if (isoCode.matches("[0-9]+")) {
                    geoRegions.get(isoCode).setParentRegion(geoRegions.get(entry.getKey()));
                }
            }
        }
        for (Map.Entry<String, String> entry : supplementalDataParser.getCountryMap().entrySet()) {
            List<Country> countries = new ArrayList<>();
            for (String isoCode : entry.getValue().split(" ")) {
                for (Country country : geoService.listAll()) {
                    if (StringUtils.equals(country.getCode2(), isoCode)) {
                        countries.add(country);
                    }
                }

            }
            geoRegions.get(entry.getKey()).setCountries(countries);
        }

        return new ArrayList<>(geoRegions.values());
    }

    private ZipFile getCoreZip() throws IOException {

        URL url = new URL(CLDR_URL);
        LOG.info("Downloading {}", url);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        File tempFileForXml = File.createTempFile("core", ".zip");
        connection.setRequestMethod("GET");
        InputStream in = connection.getInputStream();
        FileOutputStream out = new FileOutputStream(tempFileForXml);
        IOUtils.copy(in, out);
        out.close();
        LOG.info("Done copying CLDR data to file");

        return new ZipFile(tempFileForXml);
    }

    public static class SupplementalDataParser extends DefaultHandler {

        Map<String, String> georegionsCodes;
        Map<String, String> countryMap;

        String xmlTag;

        Attributes attributes;

        public Map<String, String> getCountryMap() {
            return countryMap;
        }

        public Map<String, String> getGeoregionsCodes() {
            return georegionsCodes;
        }

        @Override
        public void startDocument() throws SAXException {
            georegionsCodes = new HashMap<>();
            countryMap = new HashMap<>();
        }

        @Override
        public void endDocument() throws SAXException {
            super.endDocument();
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
            this.xmlTag = qName;
            this.attributes = attributes;
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            super.endElement(uri, localName, qName);
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (this.xmlTag.equalsIgnoreCase("group") && attributes.getValue("type").matches("[0-9]+")) {
                if (attributes.getValue("contains").split(" ")[0].matches("[0-9]+")) {
                    georegionsCodes.put(attributes.getValue("type"), attributes.getValue("contains"));
                } else if (!attributes.getValue("contains").split(" ")[0].matches("[0-9]+")) {
                    // FIXME in supplementalData.xml <territoryContainment> has repeatable "<group type=" values and I read only the first one
                    if (!countryMap.containsKey(attributes.getValue("type"))) {
                        countryMap.put(attributes.getValue("type"), attributes.getValue("contains"));
//                        String countryCodes = countryMap.get(attributes.getValue("type")) + " " + attributes.getValue("contains");
//                        countryMap.remove(attributes.getValue("type"));
//                        countryMap.put(attributes.getValue("type"), countryCodes);
                    }
//                    else {
//                        countryMap.put(attributes.getValue("type"), attributes.getValue("contains"));
//                    }
                }
            }
        }
    }

    public static class ParserForCLDR extends DefaultHandler {

        private GeoRegion geoRegion;

        private Map<String, GeoRegion> regionMap;

        // needs for avoid duplicate geoRegions in regionList
        private List<String> isoCodeList;

        private String xmlTag = "";

        private Attributes attributes;

        public Map<String, GeoRegion> getRegionMap() {
            return regionMap;
        }

        @Override
        public void startDocument() throws SAXException {

            regionMap = new HashMap<>();
            isoCodeList = new ArrayList<>();
        }

        @Override
        public void endDocument() throws SAXException {
            super.endDocument();
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
//
//            if (qName.equalsIgnoreCase("territory") && attributes.getValue("type").matches("[0-9]+") && !isoCodeList.contains(attributes.getValue("type"))) {
//
//                if(geoRegionService.find(attributes.getValue("type")) != null){
//                    geoRegion = geoRegionService.find(attributes.getValue("type"));
//                }else {
                    geoRegion = new GeoRegion();
//                }
//            }

            this.xmlTag = qName;
            this.attributes = attributes;
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            super.endElement(uri, localName, qName);
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            String attribute = attributes.getValue("type");
            if (this.xmlTag.equalsIgnoreCase("territory") && attribute.matches("[0-9]+") && !isoCodeList.contains(attribute)) {
                geoRegion.setIsoCode(attribute);
                geoRegion.setName(new String(ch, start, length));
                isoCodeList.add(attribute);
                geoRegion.setNameL(null);
                regionMap.put(geoRegion.getIsoCode(), geoRegion);
            }
        }
    }
}