GeoNamesCountrySource.java
/**
* Copyright 2014 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
**/
package org.genesys.server.service.worker;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;
/**
* Fetch and parse country information from
* https://download.geonames.org/export/dump/countryInfo.txt
*
* @author mobreza
*/
@Component
public class GeoNamesCountrySource {
private static final String GEONAMES_ISO3166_URL = "https://download.geonames.org/export/dump/countryInfo.txt";
public static final Logger LOG = LoggerFactory.getLogger(GeoNamesCountrySource.class);
/**
* Retrieve data from davros.org
*
* @throws IOException
*/
public List<CountryInfo> fetchCountryData() throws IOException {
final HttpGet httpget = new HttpGet(GEONAMES_ISO3166_URL);
HttpResponse response = null;
InputStream instream = null;
final CloseableHttpClient httpclient = HttpClientBuilder.create().build();
try {
response = httpclient.execute(httpget);
LOG.debug(response.getStatusLine().toString());
// Get hold of the response entity
final HttpEntity entity = response.getEntity();
LOG.debug("{} {}", entity.getContentType(), entity.getContentLength());
instream = entity.getContent();
final BufferedReader inreader = new BufferedReader(new InputStreamReader(instream));
// Find line that starts with
// #ISO ISO3 ISO-Numeric fips
String line;
while ((line = inreader.readLine()) != null) {
if (LOG.isTraceEnabled()) {
LOG.trace(line);
}
if (line.startsWith("#ISO\tISO3\tISO-Numeric\tfips")) {
break;
}
}
final List<CountryInfo> countries = new ArrayList<CountryInfo>();
var parser = new CSVParserBuilder()
.withSeparator('\t')
.withQuoteChar('"')
.withEscapeChar('\\')
.withStrictQuotes(false)
.withIgnoreLeadingWhiteSpace(true)
.withIgnoreQuotations(false)
.build();
final CSVReader reader = new CSVReaderBuilder(inreader).withCSVParser(parser).build();
String[] data = null;
while ((data = reader.readNext()) != null) {
if (LOG.isTraceEnabled()) {
LOG.trace(ArrayUtils.toString(data));
}
Long refnameId = null;
if (StringUtils.isNotBlank(data[16])) {
refnameId = Long.valueOf(data[16]);
}
final CountryInfo countryInfo = new CountryInfo(data[0], data[1], data[2], data[4], refnameId, true);
if (!countries.contains(countryInfo)) {
countries.add(countryInfo);
}
}
reader.close();
inreader.close();
LOG.info("Got {} countries!", countries.size());
return countries;
} catch (final ClientProtocolException e) {
LOG.error(e.getMessage(), e);
throw new IOException("Could not execute HTTP request: " + e.getMessage(), e);
} catch (final RuntimeException | CsvValidationException ex) {
LOG.error(ex.getMessage(), ex);
httpget.abort();
throw new IOException(ex);
} finally {
LOG.info("Done fetching country info from geonames.org");
IOUtils.closeQuietly(instream);
IOUtils.closeQuietly(httpclient);
}
}
}