GeoRegionDataCLDR.java

package org.genesys.server.service.worker;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import javax.annotation.Resource;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.server.model.impl.Country;
import org.genesys.server.model.impl.GeoRegion;
import org.genesys.server.service.CountryService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.annotation.Lazy;
import org.springframework.stereotype.Component;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;

@Component
public class GeoRegionDataCLDR {

	private static final String CLDR_URL = "https://unicode.org/Public/cldr/26.0.1/core.zip";

	public static final Logger LOG = LoggerFactory.getLogger(GeoRegionDataCLDR.class);

	@Autowired
	@Lazy
	private CountryService countryService;

	@Resource
	private Set<String> supportedLocales;

	public List<GeoRegion> getGeoRegionDataCLDR() throws IOException, ParserConfigurationException, SAXException {

		ZipFile zipFile = getCoreZip();
		LOG.info("Obtained CLDR zip file: {}", zipFile);

		SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
		saxParserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

		SAXParser parser = saxParserFactory.newSAXParser();
		ParserForCLDR parserForCLDR = new ParserForCLDR();

		List<GeoRegion> geoRegionList = null;
		Map<String, Map<String, GeoRegion>> forNameJGeoRegions = new HashMap<>();
		InputStream inputStream;
		for (String language : supportedLocales) {
			String fileName = "common/main/" + language + ".xml";
			LOG.info("Examining {}", fileName);

			ZipEntry zipEntry = zipFile.getEntry(fileName);
			inputStream = zipFile.getInputStream(zipEntry);
			parser.parse(inputStream, parserForCLDR);

			if (language.equals("en")) {
				geoRegionList = assignParents(zipFile, parserForCLDR.getRegionMap());
			} else {
				forNameJGeoRegions.put(language, parserForCLDR.getRegionMap());
			}

			inputStream.close();
		}

		assignNameJ(geoRegionList, forNameJGeoRegions);

		return geoRegionList;
	}

	private void assignNameJ(List<GeoRegion> geoRegionList, Map<String, Map<String, GeoRegion>> forNameJGeoRegions) throws JsonProcessingException {

		for (GeoRegion geoRegion : geoRegionList) {

			ObjectMapper objectMapper = new ObjectMapper();

			Map<String, String> nameL = new HashMap<>();
			nameL.put("en", geoRegion.getName());
			for (Map.Entry<String, Map<String, GeoRegion>> entry : forNameJGeoRegions.entrySet()) {
				nameL.put(entry.getKey(), entry.getValue().get(geoRegion.getIsoCode()).getName());
			}

			String nameLJson = objectMapper.writeValueAsString(nameL);

			geoRegion.setNameL(nameLJson);
		}
	}

	private List<GeoRegion> assignParents(ZipFile zipFile, Map<String, GeoRegion> geoRegions) throws SAXException, ParserConfigurationException, IOException {

		SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
		saxParserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);

		SAXParser parser = saxParserFactory.newSAXParser();
		SupplementalDataParser supplementalDataParser = new SupplementalDataParser();

		String fileName = "common/supplemental/supplementalData.xml";
		ZipEntry zipEntry = zipFile.getEntry(fileName);
		InputStream inputStream;
		inputStream = zipFile.getInputStream(zipEntry);
		parser.parse(inputStream, supplementalDataParser);

		for (Map.Entry<String, String> entry : supplementalDataParser.getGeoregionsCodes().entrySet()) {
			for (String isoCode : entry.getValue().split(" ")) {
				if (isoCode.matches("[0-9]+")) {
					geoRegions.get(isoCode).setParentRegion(geoRegions.get(entry.getKey()));
				}
			}
		}
		for (Map.Entry<String, String> entry : supplementalDataParser.getCountryMap().entrySet()) {
			List<Country> countries = new ArrayList<>();
			for (String isoCode : entry.getValue().split(" ")) {
				for (Country country : countryService.listAll()) {
					if (StringUtils.equals(country.getCode2(), isoCode)) {
						countries.add(country);
					}
				}

			}
			geoRegions.get(entry.getKey()).setCountries(countries);
		}

		return new ArrayList<>(geoRegions.values());
	}

	private ZipFile getCoreZip() throws IOException {

		URL url = new URL(CLDR_URL);
		LOG.info("Downloading {}", url);
		HttpURLConnection connection = (HttpURLConnection) url.openConnection();
		File tempFileForXml = File.createTempFile("core", ".zip");
		connection.setRequestMethod("GET");
		InputStream in = connection.getInputStream();
		FileOutputStream out = new FileOutputStream(tempFileForXml);
		IOUtils.copy(in, out);
		out.close();
		LOG.info("Done copying CLDR data to file");

		return new ZipFile(tempFileForXml);
	}

	public static class SupplementalDataParser extends DefaultHandler {

		Map<String, String> georegionsCodes;
		Map<String, String> countryMap;

		String xmlTag;

		Attributes attributes;

		public Map<String, String> getCountryMap() {
			return countryMap;
		}

		public Map<String, String> getGeoregionsCodes() {
			return georegionsCodes;
		}

		@Override
		public void startDocument() throws SAXException {
			georegionsCodes = new HashMap<>();
			countryMap = new HashMap<>();
		}

		@Override
		public void endDocument() throws SAXException {
			super.endDocument();
		}

		@Override
		public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
			this.xmlTag = qName;
			this.attributes = attributes;
		}

		@Override
		public void endElement(String uri, String localName, String qName) throws SAXException {
			super.endElement(uri, localName, qName);
		}

		@Override
		public void characters(char[] ch, int start, int length) throws SAXException {
			if (this.xmlTag.equalsIgnoreCase("group") && attributes.getValue("type").matches("[0-9]+")) {
				if (attributes.getValue("contains").split(" ")[0].matches("[0-9]+")) {
					georegionsCodes.put(attributes.getValue("type"), attributes.getValue("contains"));
				} else if (!attributes.getValue("contains").split(" ")[0].matches("[0-9]+")) {
					// FIXME in supplementalData.xml <territoryContainment> has repeatable "<group type=" values and I read only the first one
					if (!countryMap.containsKey(attributes.getValue("type"))) {
						countryMap.put(attributes.getValue("type"), attributes.getValue("contains"));
//                        String countryCodes = countryMap.get(attributes.getValue("type")) + " " + attributes.getValue("contains");
//                        countryMap.remove(attributes.getValue("type"));
//                        countryMap.put(attributes.getValue("type"), countryCodes);
					}
//                    else {
//                        countryMap.put(attributes.getValue("type"), attributes.getValue("contains"));
//                    }
				}
			}
		}
	}

	public static class ParserForCLDR extends DefaultHandler {

		private GeoRegion geoRegion;

		private Map<String, GeoRegion> regionMap;

		// needs for avoid duplicate geoRegions in regionList
		private List<String> isoCodeList;

		private String xmlTag = "";

		private Attributes attributes;

		public Map<String, GeoRegion> getRegionMap() {
			return regionMap;
		}

		@Override
		public void startDocument() throws SAXException {

			regionMap = new HashMap<>();
			isoCodeList = new ArrayList<>();
		}

		@Override
		public void endDocument() throws SAXException {
			super.endDocument();
		}

		@Override
		public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
//
//            if (qName.equalsIgnoreCase("territory") && attributes.getValue("type").matches("[0-9]+") && !isoCodeList.contains(attributes.getValue("type"))) {
//
//                if(geoRegionService.find(attributes.getValue("type")) != null){
//                    geoRegion = geoRegionService.find(attributes.getValue("type"));
//                }else {
			geoRegion = new GeoRegion();
//                }
//            }

			this.xmlTag = qName;
			this.attributes = attributes;
		}

		@Override
		public void endElement(String uri, String localName, String qName) throws SAXException {
			super.endElement(uri, localName, qName);
		}

		@Override
		public void characters(char[] ch, int start, int length) throws SAXException {
			String attribute = attributes.getValue("type");
			if (this.xmlTag.equalsIgnoreCase("territory") && attribute.matches("[0-9]+") && !isoCodeList.contains(attribute)) {
				geoRegion.setIsoCode(attribute);
				geoRegion.setName(new String(ch, start, length));
				isoCodeList.add(attribute);
				geoRegion.setNameL(null);
				regionMap.put(geoRegion.getIsoCode(), geoRegion);
			}
		}
	}
}