GeoNamesCountrySource.java

/**
 * Copyright 2014 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 **/

package org.genesys.server.service.worker;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;

/**
 * Fetch and parse country information from
 * https://download.geonames.org/export/dump/countryInfo.txt
 *
 * @author mobreza
 */
@Component
public class GeoNamesCountrySource {
	private static final String GEONAMES_ISO3166_URL = "https://download.geonames.org/export/dump/countryInfo.txt";

	public static final Logger LOG = LoggerFactory.getLogger(GeoNamesCountrySource.class);

	/**
	 * Retrieve data from davros.org
	 *
	 * @throws IOException
	 */
	public List<CountryInfo> fetchCountryData() throws IOException {

		final HttpGet httpget = new HttpGet(GEONAMES_ISO3166_URL);

		HttpResponse response = null;
		InputStream instream = null;

		final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
		try {
			response = httpclient.execute(httpget);

			LOG.debug(response.getStatusLine().toString());

			// Get hold of the response entity
			final HttpEntity entity = response.getEntity();
			LOG.debug("{} {}", entity.getContentType(), entity.getContentLength());

			instream = entity.getContent();
			final BufferedReader inreader = new BufferedReader(new InputStreamReader(instream));

			// Find line that starts with
			// #ISO ISO3 ISO-Numeric fips
			String line;
			while ((line = inreader.readLine()) != null) {
				if (LOG.isTraceEnabled()) {
					LOG.trace(line);
				}
				if (line.startsWith("#ISO\tISO3\tISO-Numeric\tfips")) {
					break;
				}
			}

			final List<CountryInfo> countries = new ArrayList<CountryInfo>();
			
			var parser = new CSVParserBuilder()
				.withSeparator('\t')
				.withQuoteChar('"')
				.withEscapeChar('\\')
				.withStrictQuotes(false)
				.withIgnoreLeadingWhiteSpace(true)
				.withIgnoreQuotations(false)
				.build();
			
			final CSVReader reader = new CSVReaderBuilder(inreader).withCSVParser(parser).build();

			String[] data = null;
			while ((data = reader.readNext()) != null) {
				if (LOG.isTraceEnabled()) {
					LOG.trace(ArrayUtils.toString(data));
				}

				Long refnameId = null;
				if (StringUtils.isNotBlank(data[16])) {
					refnameId = Long.valueOf(data[16]);
				}
				final CountryInfo countryInfo = new CountryInfo(data[0], data[1], data[2], data[4], refnameId, true);
				if (!countries.contains(countryInfo)) {
					countries.add(countryInfo);
				}
			}
			reader.close();
			inreader.close();

			LOG.info("Got {} countries!", countries.size());
			return countries;
		} catch (final ClientProtocolException e) {
			LOG.error(e.getMessage(), e);
			throw new IOException("Could not execute HTTP request: " + e.getMessage(), e);
		} catch (final RuntimeException | CsvValidationException ex) {
			LOG.error(ex.getMessage(), ex);
			httpget.abort();
			throw new IOException(ex);
		} finally {
			LOG.info("Done fetching country info from geonames.org");
			IOUtils.closeQuietly(instream);
			IOUtils.closeQuietly(httpclient);
		}
	}
}