GeoRegionDataCLDR.java
package org.genesys.server.service.worker;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import javax.annotation.Resource;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.genesys.server.model.impl.Country;
import org.genesys.server.model.impl.GeoRegion;
import org.genesys.server.service.GeoService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Component;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
@Component
public class GeoRegionDataCLDR {
private static final String CLDR_URL = "https://unicode.org/Public/cldr/26.0.1/core.zip";
public static final Logger LOG = LoggerFactory.getLogger(GeoRegionDataCLDR.class);
@Autowired
private GeoService geoService;
@Resource
private Set<String> supportedLocales;
public List<GeoRegion> getGeoRegionDataCLDR() throws IOException, ParserConfigurationException, SAXException {
ZipFile zipFile = getCoreZip();
LOG.info("Obtained CLDR zip file: {}", zipFile);
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
saxParserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
SAXParser parser = saxParserFactory.newSAXParser();
ParserForCLDR parserForCLDR = new ParserForCLDR();
List<GeoRegion> geoRegionList = null;
Map<String, Map<String, GeoRegion>> forNameJGeoRegions = new HashMap<>();
InputStream inputStream;
for (String language : supportedLocales) {
String fileName = "common/main/" + language + ".xml";
LOG.info("Examining {}", fileName);
ZipEntry zipEntry = zipFile.getEntry(fileName);
inputStream = zipFile.getInputStream(zipEntry);
parser.parse(inputStream, parserForCLDR);
if (language.equals("en")) {
geoRegionList = assignParents(zipFile, parserForCLDR.getRegionMap());
} else {
forNameJGeoRegions.put(language, parserForCLDR.getRegionMap());
}
inputStream.close();
}
assignNameJ(geoRegionList, forNameJGeoRegions);
return geoRegionList;
}
private void assignNameJ(List<GeoRegion> geoRegionList, Map<String, Map<String, GeoRegion>> forNameJGeoRegions) throws JsonProcessingException {
for (GeoRegion geoRegion : geoRegionList) {
ObjectMapper objectMapper = new ObjectMapper();
Map<String, String> nameL = new HashMap<>();
nameL.put("en", geoRegion.getName());
for (Map.Entry<String, Map<String, GeoRegion>> entry : forNameJGeoRegions.entrySet()) {
nameL.put(entry.getKey(), entry.getValue().get(geoRegion.getIsoCode()).getName());
}
String nameLJson = objectMapper.writeValueAsString(nameL);
geoRegion.setNameL(nameLJson);
}
}
private List<GeoRegion> assignParents(ZipFile zipFile, Map<String, GeoRegion> geoRegions) throws SAXException, ParserConfigurationException, IOException {
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
saxParserFactory.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
SAXParser parser = saxParserFactory.newSAXParser();
SupplementalDataParser supplementalDataParser = new SupplementalDataParser();
String fileName = "common/supplemental/supplementalData.xml";
ZipEntry zipEntry = zipFile.getEntry(fileName);
InputStream inputStream;
inputStream = zipFile.getInputStream(zipEntry);
parser.parse(inputStream, supplementalDataParser);
for (Map.Entry<String, String> entry : supplementalDataParser.getGeoregionsCodes().entrySet()) {
for (String isoCode : entry.getValue().split(" ")) {
if (isoCode.matches("[0-9]+")) {
geoRegions.get(isoCode).setParentRegion(geoRegions.get(entry.getKey()));
}
}
}
for (Map.Entry<String, String> entry : supplementalDataParser.getCountryMap().entrySet()) {
List<Country> countries = new ArrayList<>();
for (String isoCode : entry.getValue().split(" ")) {
for (Country country : geoService.listAll()) {
if (StringUtils.equals(country.getCode2(), isoCode)) {
countries.add(country);
}
}
}
geoRegions.get(entry.getKey()).setCountries(countries);
}
return new ArrayList<>(geoRegions.values());
}
private ZipFile getCoreZip() throws IOException {
URL url = new URL(CLDR_URL);
LOG.info("Downloading {}", url);
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
File tempFileForXml = File.createTempFile("core", ".zip");
connection.setRequestMethod("GET");
InputStream in = connection.getInputStream();
FileOutputStream out = new FileOutputStream(tempFileForXml);
IOUtils.copy(in, out);
out.close();
LOG.info("Done copying CLDR data to file");
return new ZipFile(tempFileForXml);
}
public static class SupplementalDataParser extends DefaultHandler {
Map<String, String> georegionsCodes;
Map<String, String> countryMap;
String xmlTag;
Attributes attributes;
public Map<String, String> getCountryMap() {
return countryMap;
}
public Map<String, String> getGeoregionsCodes() {
return georegionsCodes;
}
@Override
public void startDocument() throws SAXException {
georegionsCodes = new HashMap<>();
countryMap = new HashMap<>();
}
@Override
public void endDocument() throws SAXException {
super.endDocument();
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
this.xmlTag = qName;
this.attributes = attributes;
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
super.endElement(uri, localName, qName);
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (this.xmlTag.equalsIgnoreCase("group") && attributes.getValue("type").matches("[0-9]+")) {
if (attributes.getValue("contains").split(" ")[0].matches("[0-9]+")) {
georegionsCodes.put(attributes.getValue("type"), attributes.getValue("contains"));
} else if (!attributes.getValue("contains").split(" ")[0].matches("[0-9]+")) {
// FIXME in supplementalData.xml <territoryContainment> has repeatable "<group type=" values and I read only the first one
if (!countryMap.containsKey(attributes.getValue("type"))) {
countryMap.put(attributes.getValue("type"), attributes.getValue("contains"));
// String countryCodes = countryMap.get(attributes.getValue("type")) + " " + attributes.getValue("contains");
// countryMap.remove(attributes.getValue("type"));
// countryMap.put(attributes.getValue("type"), countryCodes);
}
// else {
// countryMap.put(attributes.getValue("type"), attributes.getValue("contains"));
// }
}
}
}
}
public static class ParserForCLDR extends DefaultHandler {
private GeoRegion geoRegion;
private Map<String, GeoRegion> regionMap;
// needs for avoid duplicate geoRegions in regionList
private List<String> isoCodeList;
private String xmlTag = "";
private Attributes attributes;
public Map<String, GeoRegion> getRegionMap() {
return regionMap;
}
@Override
public void startDocument() throws SAXException {
regionMap = new HashMap<>();
isoCodeList = new ArrayList<>();
}
@Override
public void endDocument() throws SAXException {
super.endDocument();
}
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
//
// if (qName.equalsIgnoreCase("territory") && attributes.getValue("type").matches("[0-9]+") && !isoCodeList.contains(attributes.getValue("type"))) {
//
// if(geoRegionService.find(attributes.getValue("type")) != null){
// geoRegion = geoRegionService.find(attributes.getValue("type"));
// }else {
geoRegion = new GeoRegion();
// }
// }
this.xmlTag = qName;
this.attributes = attributes;
}
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
super.endElement(uri, localName, qName);
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
String attribute = attributes.getValue("type");
if (this.xmlTag.equalsIgnoreCase("territory") && attribute.matches("[0-9]+") && !isoCodeList.contains(attribute)) {
geoRegion.setIsoCode(attribute);
geoRegion.setName(new String(ch, start, length));
isoCodeList.add(attribute);
geoRegion.setNameL(null);
regionMap.put(geoRegion.getIsoCode(), geoRegion);
}
}
}
}