S3StorageServiceImpl.java
/*
* Copyright 2018 Global Crop Diversity Trust
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.genesys.filerepository.service.impl;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.InvalidKeyException;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.TimeZone;
import java.util.function.Consumer;
import java.util.stream.Collectors;
import javax.crypto.Mac;
import javax.crypto.spec.SecretKeySpec;
import javax.xml.bind.DatatypeConverter;
import org.apache.commons.lang3.StringUtils;
import org.genesys.filerepository.InvalidRepositoryPathException;
import org.genesys.filerepository.service.BytesStorageService;
import org.genesys.filerepository.service.s3.ListBucketResult;
import org.springframework.beans.factory.InitializingBean;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.core.io.FileSystemResource;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpHeaders;
import org.springframework.http.HttpMethod;
import org.springframework.http.HttpRequest;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.http.client.ClientHttpResponse;
import org.springframework.http.converter.xml.MappingJackson2XmlHttpMessageConverter;
import org.springframework.stereotype.Service;
import org.springframework.web.client.HttpClientErrorException;
import org.springframework.web.client.RestTemplate;
import com.fasterxml.jackson.module.jaxb.JaxbAnnotationModule;
import lombok.extern.slf4j.Slf4j;
/**
* Amazon S3 storage implementation.
*/
@Service("S3Storage")
@Slf4j
public class S3StorageServiceImpl implements BytesStorageService, InitializingBean {
private static final Charset CHARSET_UTF8 = StandardCharsets.UTF_8;
private static final String HTTP_AUTHORIZATION = "Authorization";
private static final String LINE_SEPARATOR = "\n";
private static final String AMZ_CONTENT_SHA256 = "X-Amz-Content-SHA256";
private static final String AMZ_DATE = "X-Amz-Date";
/** Algorithm for AWS V4 */
private static final String AWS_SIGN_ALG = "HmacSHA256";
/** The Constant HEADER_DATE_FORMAT. */
private static final ThreadLocal<SimpleDateFormat> HEADER_DATE_FORMAT = new ThreadLocal<SimpleDateFormat>() {
@Override
protected SimpleDateFormat initialValue() {
var sdf = new SimpleDateFormat("yyyyMMdd'T'HHmmss'Z'", Locale.US);
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
return sdf;
}
};
/** The Constant YYYYMMDD date format. */
private static final ThreadLocal<SimpleDateFormat> YYYYMMDD = new ThreadLocal<SimpleDateFormat>() {
@Override
protected SimpleDateFormat initialValue() {
var sdf = new SimpleDateFormat("yyyyMMdd");
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
return sdf;
}
};
/** The rest template. */
private final RestTemplate restTemplate = initializeRestTemplate();
/** The access key. */
@Value("${s3.accessKey}")
private String accessKey;
/** The secret key. */
@Value("${s3.secretKey}")
private String secretKey;
/** The bucket. */
@Value("${s3.bucket:#{null}}")
private String bucket;
/** The region. */
@Value("${s3.region:#{null}}")
private String region;
/** The prefix. */
@Value("${s3.prefix:/}")
private String prefix;
/** The S3 endpoint. Allows for using AWS S3 compatible services. */
@Value("${s3.endpoint:#{null}}")
private URL s3endpoint;
// We use this handle the prefix
private Path awsBasePath;
@Override
public void afterPropertiesSet() throws Exception {
this.awsBasePath = Paths.get(StringUtils.defaultIfBlank(this.prefix, "/"));
if (s3endpoint == null) {
if (bucket != null && region != null) {
s3endpoint = new URL(String.format("https://%s.s3-%s.amazonaws.com", bucket, region));
log.warn("Please configure S3_ENDPOINT={} instead of S3_BUCKET={} and S3_REGION={}", s3endpoint, bucket, region);
} else {
log.error("You must configure S3_ENDPOINT=https://BUCKET.s3-REGION.amazonaws.com");
throw new RuntimeException("Please configure S3_ENDPOINT=https://BUCKET.s3-REGION.amazonaws.com");
}
}
log.warn("S3 endpoint={} prefix={} dummy={}", s3endpoint, prefix, getAwsUrl(Paths.get("/dummy", "filename.txt")));
}
/*
* (non-Javadoc)
* @see org.genesys.filerepository.service.BytesStorageService#upsert
* (java.lang.String, java.lang.String, byte[])
*/
@Override
public void upsert(final Path bytesFile, final byte[] data) throws IOException {
final Path normalPath = bytesFile.normalize().toAbsolutePath();
if (data == null) {
throw new IOException("File bytes are null");
}
log.debug("Putting to path={} len={}", bytesFile, data.length);
final String url = getAwsUrl(normalPath).toString();
try {
restTemplate.put(url, data);
} catch (final HttpClientErrorException e) {
log.error("Upserting file failed with error\n{}", e.getResponseBodyAsString());
throw e;
}
}
/** {@inheritDoc} */
@Override
public void upsert(Path bytesFile, File fileWithData) throws IOException {
if (fileWithData == null || !fileWithData.exists()) {
throw new IOException("File is null or does not exist.");
}
final Path normalPath = bytesFile.normalize().toAbsolutePath();
log.debug("Putting to path={} len={}", bytesFile, fileWithData.length());
final String url = getAwsUrl(normalPath).toString();
try {
ResponseEntity<String> response = restTemplate.exchange(url, HttpMethod.PUT, new HttpEntity<>(new FileSystemResource(fileWithData)), String.class);
log.info("Upload status code: {}", response.getStatusCode());
log.debug("Upload response: {}", response.getBody());
} catch (final HttpClientErrorException e) {
log.error("Upserting file failed with error\n{}", e.getResponseBodyAsString());
throw e;
}
}
/*
* (non-Javadoc)
* @see org.genesys.filerepository.service.BytesStorageService#remove
* (java.lang.String, java.lang.String)
*/
@Override
public void remove(final Path bytesFile) throws IOException {
final Path normalPath = bytesFile.normalize().toAbsolutePath();
final String url = getAwsUrl(normalPath).toString();
log.debug("Deleting from path={} url={}", normalPath, url);
try {
restTemplate.delete(url);
} catch (final HttpClientErrorException e) {
log.error("Deleting file failed with error\n{}", e.getResponseBodyAsString());
throw e;
}
}
/*
* (non-Javadoc)
* @see org.genesys.filerepository.service.BytesStorageService#get(java
* .lang.String, java.lang.String)
*/
@Override
public byte[] get(final Path bytesFile) throws IOException {
final Path normalPath = bytesFile.normalize().toAbsolutePath();
log.debug("Getting bytes path={} filename={}", normalPath.getParent(), normalPath.getFileName());
final String url = getAwsUrl(normalPath).toString();
try {
return restTemplate.getForObject(url, byte[].class);
} catch (final HttpClientErrorException e) {
log.error("Getting bytes failed with {} {} error\n{}", e.getStatusCode(), e.getStatusText(), e.getResponseBodyAsString());
if (e.getStatusCode() == HttpStatus.NOT_FOUND) {
return null; // Match behavior of FilesystemStorageServiceImpl
}
throw e;
}
}
@Override
public void get(Path bytesFile, Consumer<InputStream> consumerOfStream) throws IOException {
final Path normalPath = bytesFile.normalize().toAbsolutePath();
if (log.isDebugEnabled()) {
log.debug("Getting bytes path={} filename={}", normalPath.getParent(), normalPath.getFileName());
}
final String url = getAwsUrl(normalPath).toString();
try {
restTemplate.execute(url, HttpMethod.GET, null, (clientHttpResponse) -> {
try (InputStream inputStream = clientHttpResponse.getBody()) {
consumerOfStream.accept(inputStream);
}
return null;
});
} catch (final HttpClientErrorException e) {
log.error("Getting bytes failed with error\n{}", e.getResponseBodyAsString());
throw e;
}
}
/**
* Returns URL for S3 resource.
*
* @param path the normalized absolute path
* @param filename the filename
* @return the url
* @throws MalformedURLException when URL cannot be constructed
*/
private URL getAwsUrl(final Path bytesFile) throws MalformedURLException {
var url = new URL(s3endpoint, getAwsPath(bytesFile));
log.trace("getUrl path={} result={}", bytesFile, url);
return url;
}
/**
* Gets the path. Must end with "/" if not blank.
*
* @param path the path
* @return the path
*/
private String getAwsPath(final Path path) {
return Paths.get(awsBasePath.toString(), path.toString()).normalize().toAbsolutePath().toString();
}
/**
* Returns string to sign as specified at
* http://docs.aws.amazon.com/AmazonS3/latest/dev/RESTAuthentication.html#
* ConstructingTheAuthenticationHeader
*
* @param request
* @param body
*
*
* @throws NoSuchAlgorithmException
*/
private String buildCanonicalRequest(final HttpRequest request, final byte[] body) throws NoSuchAlgorithmException {
final StringBuilder sb = new StringBuilder();
// Content hash
final byte[] contentSha256 = hashSha256(body == null ? "".getBytes(CHARSET_UTF8) : body);
// Add header
request.getHeaders().set(AMZ_CONTENT_SHA256, printHex(contentSha256));
// <HTTPMethod>\n
// <CanonicalURI>\n
// <CanonicalQueryString>\n
// <CanonicalHeaders>\n
// <SignedHeaders>\n
// <HashedPayload>
// HTTP-Verb
sb.append(request.getMethod()).append(LINE_SEPARATOR);
// CanonicalURI
sb.append(request.getURI().getPath()).append(LINE_SEPARATOR);
// CanonicalQueryString
sb.append(buildQueryString(StringUtils.defaultIfBlank(request.getURI().getQuery(), ""))).append(LINE_SEPARATOR);
// sorted headers, lowercase
request.getHeaders().keySet().stream().map(String::toLowerCase).sorted()
// remove blanks
.filter(headerName -> !request.getHeaders().getValuesAsList(headerName).isEmpty())
// print values, but how do we print multiples??
.forEach(headerName -> {
sb.append(headerName).append(':').append(request.getHeaders().get(headerName).get(0)).append(LINE_SEPARATOR);
});
sb.append(LINE_SEPARATOR);
// signed headers
sb.append(request.getHeaders().keySet().stream().map(String::toLowerCase).sorted().collect(Collectors.joining(";")));
sb.append(LINE_SEPARATOR);
// HashedPayload is the hexadecimal value of the SHA256 hash of the request
// payload.
sb.append(printHex(contentSha256));
log.trace("canonicalRequest\n{}", sb);
return sb.toString();
}
/**
* Sorted by query parameter name.
*
* @param query the S3 query string
* @return a sorted, normalized list of query parameters
* as US-ASCII
*/
public static String buildQueryString(String query) {
log.trace("Encoding query string: {}", query);
return Arrays.stream(query.split("&"))
// split
.map(part -> part.split("=", 2))
// encode parts
.map(part -> URLEncoder.encode(part[0], StandardCharsets.US_ASCII) + (part.length == 1 ? "" : "=" + URLEncoder.encode(part[1], StandardCharsets.US_ASCII)))
// must be sorted
.sorted()
// debug
.peek(part -> log.trace("Querystring part: {}", part))
// merge
.reduce("", (res, part) -> {
if (res.length() == 0) {
return part;
} else {
// Do not & the ampersands!
return res + '&' + part;
}
});
}
/**
* Hash sha 256.
*
* @param bytes the bytes
* @return the byte[]
* @throws NoSuchAlgorithmException the no such algorithm exception
*/
public static byte[] hashSha256(final byte[] bytes) throws NoSuchAlgorithmException {
final MessageDigest digest = MessageDigest.getInstance("SHA-256");
return digest.digest(bytes);
}
/**
* Prints the hex.
*
* @param bytes the bytes
* @return the string
*/
public static String printHex(final byte[] bytes) {
return DatatypeConverter.printHexBinary(bytes).toLowerCase();
}
private static String buildStringToSign(final String canonicalRequest, final Date date, final String region, final String awsService) throws NoSuchAlgorithmException {
final StringBuilder sb = new StringBuilder();
// "AWS4-HMAC-SHA256" + LINE_SEPARATOR +
// timeStampISO8601Format + LINE_SEPARATOR +
// <Scope> + LINE_SEPARATOR +
// Hex(SHA256Hash(<CanonicalRequest>))
sb.append("AWS4-HMAC-SHA256\n");
sb.append(HEADER_DATE_FORMAT.get().format(date)).append(LINE_SEPARATOR);
// 20130606/us-east-1/s3/aws4_request
sb.append(YYYYMMDD.get().format(date)).append('/').append(region).append('/').append(awsService).append("/aws4_request").append(LINE_SEPARATOR);
// Hex(SHA256Hash(<CanonicalRequest>))
sb.append(printHex(hashSha256(canonicalRequest.getBytes(CHARSET_UTF8))));
log.trace("stringToSign\n{}", sb);
return sb.toString();
}
private static byte[] calculateSigningKey(final String secretKey, final String date, final String region, final String service) throws InvalidKeyException,
NoSuchAlgorithmException {
log.trace("sign date={} region={} service={}", date, region, service);
return
// SigningKey = HMAC-SHA256(<DateRegionServiceKey>, "aws4_request")
hmacSha256(
// DateRegionServiceKey = HMAC-SHA256(<DateRegionKey>, "<aws-service>")
hmacSha256(
// DateRegionKey = HMAC-SHA256(<DateKey>, "<aws-region>")
hmacSha256(
// DateKey = HMAC-SHA256("AWS4"+"<SecretAccessKey>", "<YYYYMMDD>")
hmacSha256(("AWS4" + secretKey).getBytes(CHARSET_UTF8), date), region), service), "aws4_request");
}
private static byte[] hmacSha256(final byte[] key, final String data) throws InvalidKeyException, NoSuchAlgorithmException {
return hmacSha256(key, data.getBytes(CHARSET_UTF8));
}
private static byte[] hmacSha256(final byte[] key, final byte[] data) throws NoSuchAlgorithmException, InvalidKeyException {
final Mac mac = Mac.getInstance(AWS_SIGN_ALG);
mac.init(new SecretKeySpec(key, AWS_SIGN_ALG));
return mac.doFinal(data);
}
/**
* Returns AWS authorization HTTP Header.
*
* http://docs.aws.amazon.com/AmazonS3/latest/API/sigv4-auth-using-authorization-header.html
* http://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-header-based-auth.html
*
* @param finalSignature the signature
* @param date
* @return the authorization header
*/
private String getAuthorizationHeader(final byte[] finalSignature, final HttpRequest request, final Date date) {
final StringBuilder sb = new StringBuilder("AWS4-HMAC-SHA256").append(" Credential=")
// credential
.append(getAWSCredential(date))
// signed headers
.append(",SignedHeaders=");
// signed headers
sb.append(request.getHeaders().keySet().stream().map(String::toLowerCase).sorted().collect(Collectors.joining(";")));
// request signature
sb.append(",Signature=").append(printHex(finalSignature));
log.trace("authorizationHeader=\n{}", sb);
return sb.toString();
}
private String getAWSCredential(final Date date) {
return String.format("%s/%s/%s/%s/aws4_request", accessKey, (YYYYMMDD.get().format(date)), (region), ("s3"));
}
/**
* Initializes RestTemplate with the interceptor that signs the HTTP requests to
* AWS using V4 signature method.
*
* @return the rest template
*/
private RestTemplate initializeRestTemplate() {
final RestTemplate restTemplate = new RestTemplate();
// create module
JaxbAnnotationModule jaxbAnnotationModule = new JaxbAnnotationModule();
restTemplate.getMessageConverters().stream().filter(converter -> {
return converter instanceof MappingJackson2XmlHttpMessageConverter;
}).forEach(converter -> ((MappingJackson2XmlHttpMessageConverter) converter).getObjectMapper().registerModule(jaxbAnnotationModule));
final List<ClientHttpRequestInterceptor> interceptors = new ArrayList<>();
interceptors.add((request, body, execution) -> {
final Date date = new Date();
request.getHeaders().set("Host", s3endpoint.getHost());
// This avoids date formatting problems
request.getHeaders().add(AMZ_DATE, HEADER_DATE_FORMAT.get().format(date));
// DELETE has no Content-length
if (request.getMethod() != HttpMethod.POST && request.getMethod() != HttpMethod.PUT) {
request.getHeaders().remove(HttpHeaders.CONTENT_LENGTH);
}
try {
final String canonicalRequest = buildCanonicalRequest(request, body);
final String stringToSign = buildStringToSign(canonicalRequest, date, region, "s3");
final byte[] signingKey = calculateSigningKey(secretKey, YYYYMMDD.get().format(date), region, "s3");
final byte[] finalSignature = hmacSha256(signingKey, stringToSign);
request.getHeaders().set(HTTP_AUTHORIZATION, getAuthorizationHeader(finalSignature, request, date));
} catch (NoSuchAlgorithmException | InvalidKeyException e) {
log.error("Could not sign AWS request.", e);
}
final ClientHttpResponse response = execution.execute(request, body);
if (response.getStatusCode() != HttpStatus.OK) {
log.trace("S3 HTTP {} {} status={} {}", request.getMethod(), request.getURI(), response.getRawStatusCode(), response.getStatusText());
}
return response;
});
restTemplate.setInterceptors(interceptors);
return restTemplate;
}
/**
* http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectHEAD.html
*
* @param bytesFile the bytes file
* @return true, if successful
* @throws IOException when other stuff is bad
* @throws InvalidRepositoryPathException when path or filename are weird
*/
@Override
public boolean exists(final Path bytesFile) throws IOException, InvalidRepositoryPathException {
final Path normalPath = bytesFile.normalize().toAbsolutePath();
try {
var url = getAwsUrl(normalPath);
if (log.isTraceEnabled()) {
log.trace("Fetching HEAD for url={}", url);
}
final HttpHeaders headers = restTemplate.headForHeaders(url.toString());
if (log.isDebugEnabled()) {
headers.forEach((header, values) -> {
log.debug("{}: {}", header, values);
});
}
return true;
} catch (final HttpClientErrorException e) {
if (e.getStatusCode() != HttpStatus.NOT_FOUND) {
log.error("Testing for file failed with error\n{}", e.getResponseBodyAsString());
throw e;
}
} catch (final Throwable e) {
log.warn("Catch this thing!", e);
throw e;
}
return false;
}
/**
* List bucket contents as per
* http://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketGET.html
*
* @param path the repository path
* @return list of filenames at specified path
* @throws InvalidRepositoryPathException when path is messed up
*/
@Override
public List<String> listFiles(final Path path) throws InvalidRepositoryPathException {
PathValidator.checkValidPath(path);
final String s3prefix = getAwsPath(path).substring(1);
log.debug("Listing S3 bucket for host={} path={} prefix={}", s3endpoint.getHost(), path, s3prefix);
try {
final ListBucketResult listBucketResult = restTemplate.getForObject("https://" + s3endpoint.getHost() + "/?list-type=2&delimiter=/&prefix={path}/", ListBucketResult.class, s3prefix);
if (log.isDebugEnabled()) {
log.debug("Bucket name={} maxKeys={} delimiter={} prefix={}", listBucketResult.getName(), listBucketResult.getMaxKeys(), listBucketResult.getDelimiter(), listBucketResult
.getPrefix());
if (listBucketResult.getCommonPrefixes() != null) {
listBucketResult.getCommonPrefixes().forEach(commonPrefix -> {
log.debug("Subprefix={}", commonPrefix.getPrefix());
});
}
if (listBucketResult.getContents() != null) {
listBucketResult.getContents().forEach(content -> {
log.debug("Object prefix={} len={} filename={}", content.getKey(), content.getSize(), content.getKey().substring(s3prefix.length()));
});
}
}
if (listBucketResult == null || listBucketResult.getContents() == null) {
return Collections.emptyList();
} else {
return listBucketResult.getContents().stream().map(content -> content.getKey().substring(s3prefix.length())).collect(Collectors.toList());
}
} catch (HttpClientErrorException e) {
log.error("Error listing files at path={}\n{}", path, e.getResponseBodyAsString());
throw e;
}
}
}