diff --git a/documentation/sphinx/source/backups.rst b/documentation/sphinx/source/backups.rst index f4fd550032..f55a34cf4f 100644 --- a/documentation/sphinx/source/backups.rst +++ b/documentation/sphinx/source/backups.rst @@ -83,7 +83,7 @@ For blob store backup locations, the Backup URL format is :: - blobstore://[][:[:]]@[:]/?bucket=[&=]...] + blobstore://[][:[:]]@[:]/?bucket=[®ion=][&=]...] - API key to use for authentication. Optional. - API key's secret. Optional. @@ -92,6 +92,7 @@ For blob store backup locations, the Backup URL format is - Remote port to connect to. Optional. Default is 80. - Name of the backup within the backup bucket. It can contain '/' characters in order to organize backups into a folder-like structure. - Name of the bucket to use for backup data. + - If is not in s3 compatible form (s3.region-name.example.com) and aws v4 signature is enabled, region name is required. = - Optional URL parameters. See below for details. diff --git a/fdbclient/S3BlobStore.actor.cpp b/fdbclient/S3BlobStore.actor.cpp index e02f8808cc..49ad61fd33 100644 --- a/fdbclient/S3BlobStore.actor.cpp +++ b/fdbclient/S3BlobStore.actor.cpp @@ -169,6 +169,32 @@ std::string S3BlobStoreEndpoint::BlobKnobs::getURLParameters() const { return r; } +std::string guessRegionFromDomain(std::string domain) { + static const std::vector knownServices = { "s3.", "cos.", "oss-", "obs." }; + boost::algorithm::to_lower(domain); + + for (int i = 0; i < knownServices.size(); ++i) { + const char* service = knownServices[i]; + + std::size_t p = domain.find(service); + + if (p == std::string::npos || (p >= 1 && domain[p - 1] != '.')) { + // eg. 127.0.0.1, example.com, s3-service.example.com, mys3.example.com + continue; + } + + StringRef h(domain.c_str() + p); + + if (!h.startsWith(LiteralStringRef("oss-"))) { + h.eat(service); // ignore s3 service + } + + return h.eat(".").toString(); + } + + return ""; +} + Reference S3BlobStoreEndpoint::fromString(const std::string& url, const Optional& proxy, std::string* resourceFromURL, @@ -222,6 +248,8 @@ Reference S3BlobStoreEndpoint::fromString(const std::string StringRef service = h.eat(); + std::string region = guessRegionFromDomain(host.toString()); + BlobKnobs knobs; HTTP::Headers extraHeaders; while (1) { @@ -251,6 +279,12 @@ Reference S3BlobStoreEndpoint::fromString(const std::string continue; } + // overwrite s3 region from parameter + if (name == LiteralStringRef("region")) { + region = value.toString(); + continue; + } + // See if the parameter is a knob // First try setting a dummy value (all knobs are currently numeric) just to see if this parameter is known // to S3BlobStoreEndpoint. If it is, then we will set it to a good value or throw below, so the dummy set @@ -289,8 +323,13 @@ Reference S3BlobStoreEndpoint::fromString(const std::string creds = S3BlobStoreEndpoint::Credentials{ key.toString(), secret.toString(), securityToken.toString() }; } + if (region.empty() && CLIENT_KNOBS->HTTP_REQUEST_AWS_V4_HEADER) { + throw std::string( + "Failed to get region from host or parameter in url, region is required for aws v4 signature"); + } + return makeReference( - host.toString(), service.toString(), proxyHost, proxyPort, creds, knobs, extraHeaders); + host.toString(), service.toString(), region, proxyHost, proxyPort, creds, knobs, extraHeaders); } catch (std::string& err) { if (error != nullptr) @@ -356,10 +395,25 @@ std::string S3BlobStoreEndpoint::getResourceURL(std::string resource, std::strin return r; } +std::string constructResourcePath(Reference b, std::string bucket, std::string object) { + std::string resource; + + if (b->getHost().find(bucket + ".") != 0) { + resource += std::string("/") + bucket; // not virtual hosting mode + } + + if (!object.empty()) { + resource += "/"; + resource += object; + } + + return std::move(resource); +} + ACTOR Future bucketExists_impl(Reference b, std::string bucket) { wait(b->requestRateRead->getAllowance(1)); - std::string resource = std::string("/") + bucket; + std::string resource = constructResourcePath(b, bucket, ""); HTTP::Headers headers; Reference r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, { 200, 404 })); @@ -373,7 +427,7 @@ Future S3BlobStoreEndpoint::bucketExists(std::string const& bucket) { ACTOR Future objectExists_impl(Reference b, std::string bucket, std::string object) { wait(b->requestRateRead->getAllowance(1)); - std::string resource = std::string("/") + bucket + "/" + object; + std::string resource = constructResourcePath(b, bucket, object); HTTP::Headers headers; Reference r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, { 200, 404 })); @@ -387,7 +441,7 @@ Future S3BlobStoreEndpoint::objectExists(std::string const& bucket, std::s ACTOR Future deleteObject_impl(Reference b, std::string bucket, std::string object) { wait(b->requestRateDelete->getAllowance(1)); - std::string resource = std::string("/") + bucket + "/" + object; + std::string resource = constructResourcePath(b, bucket, object); HTTP::Headers headers; // 200 or 204 means object successfully deleted, 404 means it already doesn't exist, so any of those are considered // successful @@ -477,9 +531,24 @@ ACTOR Future createBucket_impl(Reference b, std::stri bool exists = wait(b->bucketExists(bucket)); if (!exists) { - std::string resource = std::string("/") + bucket; + std::string resource = constructResourcePath(b, bucket, ""); HTTP::Headers headers; - Reference r = wait(b->doRequest("PUT", resource, headers, nullptr, 0, { 200, 409 })); + + std::string region = b->getRegion(); + if (region.empty()) { + Reference r = wait(b->doRequest("PUT", resource, headers, nullptr, 0, { 200, 409 })); + } else { + UnsentPacketQueue packets; + StringRef body(format("" + " %s" + "", + region.c_str())); + PacketWriter pw(packets.getWriteBuffer(), nullptr, Unversioned()); + pw.serializeBytes(body); + + Reference r = + wait(b->doRequest("PUT", resource, headers, &packets, body.size(), { 200, 409 })); + } } return Void(); } @@ -491,7 +560,7 @@ Future S3BlobStoreEndpoint::createBucket(std::string const& bucket) { ACTOR Future objectSize_impl(Reference b, std::string bucket, std::string object) { wait(b->requestRateRead->getAllowance(1)); - std::string resource = std::string("/") + bucket + "/" + object; + std::string resource = constructResourcePath(b, bucket, object); HTTP::Headers headers; Reference r = wait(b->doRequest("HEAD", resource, headers, nullptr, 0, { 200, 404 })); @@ -966,8 +1035,8 @@ ACTOR Future listObjectsStream_impl(Reference bstore, int maxDepth, std::function recurseFilter) { // Request 1000 keys at a time, the maximum allowed - state std::string resource = "/"; - resource.append(bucket); + state std::string resource = constructResourcePath(bstore, bucket, ""); + resource.append("/?max-keys=1000"); if (prefix.present()) resource.append("&prefix=").append(prefix.get()); @@ -1324,10 +1393,6 @@ void S3BlobStoreEndpoint::setV4AuthHeaders(std::string const& verb, amzDate = date; dateStamp = datestamp; } - // Extract service and region - StringRef hostRef(host); - std::string service = hostRef.eat(".").toString(); - std::string region = hostRef.eat(".").toString(); // ************* TASK 1: CREATE A CANONICAL REQUEST ************* // Create Create canonical URI--the part of the URI from domain to query string (use '/' if no path) @@ -1370,14 +1435,14 @@ void S3BlobStoreEndpoint::setV4AuthHeaders(std::string const& verb, // ************* TASK 2: CREATE THE STRING TO SIGN************* std::string algorithm = "AWS4-HMAC-SHA256"; - std::string credentialScope = dateStamp + "/" + region + "/" + service + "/" + "aws4_request"; + std::string credentialScope = dateStamp + "/" + region + "/s3/" + "aws4_request"; std::string stringToSign = algorithm + "\n" + amzDate + "\n" + credentialScope + "\n" + sha256_hex(canonicalRequest); // ************* TASK 3: CALCULATE THE SIGNATURE ************* // Create the signing key using the function defined above. - std::string signingKey = hmac_sha256( - hmac_sha256(hmac_sha256(hmac_sha256("AWS4" + secretKey, dateStamp), region), service), "aws4_request"); + std::string signingKey = + hmac_sha256(hmac_sha256(hmac_sha256(hmac_sha256("AWS4" + secretKey, dateStamp), region), "s3"), "aws4_request"); // Sign the string_to_sign using the signing_key std::string signature = hmac_sha256_hex(signingKey, stringToSign); // ************* TASK 4: ADD SIGNING INFORMATION TO THE Header ************* @@ -1445,7 +1510,7 @@ ACTOR Future readEntireFile_impl(Reference bst std::string object) { wait(bstore->requestRateRead->getAllowance(1)); - std::string resource = std::string("/") + bucket + "/" + object; + std::string resource = constructResourcePath(bstore, bucket, object); HTTP::Headers headers; Reference r = wait(bstore->doRequest("GET", resource, headers, nullptr, 0, { 200, 404 })); if (r->code == 404) @@ -1470,7 +1535,7 @@ ACTOR Future writeEntireFileFromBuffer_impl(Reference wait(bstore->concurrentUploads.take()); state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1); - std::string resource = std::string("/") + bucket + "/" + object; + std::string resource = constructResourcePath(bstore, bucket, object); HTTP::Headers headers; // Send MD5 sum for content so blobstore can verify it headers["Content-MD5"] = contentMD5; @@ -1540,7 +1605,7 @@ ACTOR Future readObject_impl(Reference bstore, return 0; wait(bstore->requestRateRead->getAllowance(1)); - std::string resource = std::string("/") + bucket + "/" + object; + std::string resource = constructResourcePath(bstore, bucket, object); HTTP::Headers headers; headers["Range"] = format("bytes=%lld-%lld", offset, offset + length - 1); Reference r = wait(bstore->doRequest("GET", resource, headers, nullptr, 0, { 200, 206, 404 })); @@ -1567,7 +1632,8 @@ ACTOR static Future beginMultiPartUpload_impl(ReferencerequestRateWrite->getAllowance(1)); - std::string resource = std::string("/") + bucket + "/" + object + "?uploads"; + std::string resource = constructResourcePath(bstore, bucket, object); + resource += "?uploads"; HTTP::Headers headers; if (!CLIENT_KNOBS->BLOBSTORE_ENCRYPTION_TYPE.empty()) headers["x-amz-server-side-encryption"] = CLIENT_KNOBS->BLOBSTORE_ENCRYPTION_TYPE; @@ -1609,8 +1675,8 @@ ACTOR Future uploadPart_impl(Reference bstore, wait(bstore->concurrentUploads.take()); state FlowLock::Releaser uploadReleaser(bstore->concurrentUploads, 1); - std::string resource = - format("/%s/%s?partNumber=%d&uploadId=%s", bucket.c_str(), object.c_str(), partNumber, uploadID.c_str()); + std::string resource = constructResourcePath(bstore, bucket, object); + resource += format("?partNumber=%d&uploadId=%s", partNumber, uploadID.c_str()); HTTP::Headers headers; // Send MD5 sum for content so blobstore can verify it headers["Content-MD5"] = contentMD5; @@ -1662,7 +1728,8 @@ ACTOR Future finishMultiPartUpload_impl(Reference bst manifest += format("%d%s\n", p.first, p.second.c_str()); manifest += ""; - std::string resource = format("/%s/%s?uploadId=%s", bucket.c_str(), object.c_str(), uploadID.c_str()); + std::string resource = constructResourcePath(bstore, bucket, object); + resource += format("?uploadId=%s", uploadID.c_str()); HTTP::Headers headers; PacketWriter pw(part_list.getWriteBuffer(manifest.size()), nullptr, Unversioned()); pw.serializeBytes(manifest); @@ -1686,7 +1753,7 @@ TEST_CASE("/backup/s3/v4headers") { S3BlobStoreEndpoint::Credentials creds{ "AKIAIOSFODNN7EXAMPLE", "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY", "" } // GET without query parameters { - S3BlobStoreEndpoint s3("s3.amazonaws.com", "s3", "proxy", "port", creds); + S3BlobStoreEndpoint s3("s3.amazonaws.com", "443", "amazonaws", "proxy", "port", creds); std::string verb("GET"); std::string resource("/test.txt"); HTTP::Headers headers; @@ -1701,7 +1768,7 @@ TEST_CASE("/backup/s3/v4headers") { // GET with query parameters { - S3BlobStoreEndpoint s3("s3.amazonaws.com", "s3", "proxy", "port", creds); + S3BlobStoreEndpoint s3("s3.amazonaws.com", "443", "amazonaws", "proxy", "port", creds); std::string verb("GET"); std::string resource("/test/examplebucket?Action=DescribeRegions&Version=2013-10-15"); HTTP::Headers headers; @@ -1716,7 +1783,7 @@ TEST_CASE("/backup/s3/v4headers") { // POST { - S3BlobStoreEndpoint s3("s3.us-west-2.amazonaws.com", "s3", "proxy", "port", creds); + S3BlobStoreEndpoint s3("s3.us-west-2.amazonaws.com", "443", "us-west-2", "proxy", "port", creds); std::string verb("POST"); std::string resource("/simple.json"); HTTP::Headers headers; @@ -1733,4 +1800,4 @@ TEST_CASE("/backup/s3/v4headers") { } return Void(); -} \ No newline at end of file +} diff --git a/fdbclient/S3BlobStore.h b/fdbclient/S3BlobStore.h index a61a5958d8..86e0e54e32 100644 --- a/fdbclient/S3BlobStore.h +++ b/fdbclient/S3BlobStore.h @@ -100,12 +100,13 @@ public: S3BlobStoreEndpoint(std::string const& host, std::string const& service, + std::string region, Optional const& proxyHost, Optional const& proxyPort, Optional const& creds, BlobKnobs const& knobs = BlobKnobs(), HTTP::Headers extraHeaders = HTTP::Headers()) - : host(host), service(service), proxyHost(proxyHost), proxyPort(proxyPort), + : host(host), service(service), region(region), proxyHost(proxyHost), proxyPort(proxyPort), useProxy(proxyHost.present() && proxyPort.present()), credentials(creds), lookupKey(creds.present() && creds.get().key.empty()), lookupSecret(creds.present() && creds.get().secret.empty()), knobs(knobs), extraHeaders(extraHeaders), @@ -156,6 +157,7 @@ public: std::string host; std::string service; + std::string region; Optional proxyHost; Optional proxyPort; bool useProxy; @@ -193,6 +195,10 @@ public: std::string date = "", std::string datestamp = ""); + std::string getHost() const { return host; } + + std::string getRegion() const { return region; } + // Prepend the HTTP request header to the given PacketBuffer, returning the new head of the buffer chain static PacketBuffer* writeRequestHeader(std::string const& request, HTTP::Headers const& headers,