diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 9c6d5b1235..0de2cde97e 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -93,9 +93,9 @@ jobs: # Install ld.gold (binutils) and ld.lld on different runs. if [ "${{ matrix.config.os }}" = "ubuntu-18.04" ]; then - sudo apt-get install -y ninja-build elfutils libzstd-dev binutils + sudo apt-get install -y ninja-build elfutils libzstd-dev binutils python3 else - sudo apt-get install -y ninja-build elfutils libzstd-dev lld + sudo apt-get install -y ninja-build elfutils libzstd-dev lld python3 fi if [ "${{ matrix.config.compiler }}" = "gcc" ]; then diff --git a/doc/MANUAL.adoc b/doc/MANUAL.adoc index 50292ee9a3..fe5f7cdb72 100644 --- a/doc/MANUAL.adoc +++ b/doc/MANUAL.adoc @@ -796,6 +796,7 @@ Examples: + * `file:///shared/nfs/directory` * `file:///shared/nfs/one|read-only file:///shared/nfs/two` +* `http://example.org/cache` [[config_sloppiness]] *sloppiness* (*CCACHE_SLOPPINESS*):: @@ -931,6 +932,27 @@ Optional attributes: * *update-mtime*: If *true*, update the modification time (mtime) of cache entries that are read. The default is *false*. +=== HTTP storage backend + +URL format: `http://HOST[:PORT][/PATH]` + +This backend stores data in an HTTP compatible server. The required HTTP +methods are `GET`, `PUT` and `DELETE`. + +Note that ccache will not perform any cleanup of the HTTP storage. + +Examples: + +* `http://localhost:8080/` +* `http://example.org/cache` + +Known issues and limitations: + +* URLs containing IPv6 addresses like `http://[::1]/` are not supported +* There are no HTTP timeouts implemented or configured +* Authentication is not yet supported +* HTTPS is not yet supported + == Cache size management By default, ccache has a 5 GB limit on the total size of files in the cache and diff --git a/src/ccache.hpp b/src/ccache.hpp index e4fb11607b..7b8f06ffa7 100644 --- a/src/ccache.hpp +++ b/src/ccache.hpp @@ -28,6 +28,7 @@ class Context; +extern const char CCACHE_NAME[]; extern const char CCACHE_VERSION[]; using FindExecutableFunction = diff --git a/src/storage/Storage.cpp b/src/storage/Storage.cpp index a8c402386a..fabe34d346 100644 --- a/src/storage/Storage.cpp +++ b/src/storage/Storage.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -239,6 +240,9 @@ create_storage(const ParseStorageEntryResult& storage_entry) if (storage_entry.url.scheme() == "file") { return std::make_unique(storage_entry.url, storage_entry.attributes); + } else if (storage_entry.url.scheme() == "http") { + return std::make_unique(storage_entry.url, + storage_entry.attributes); } return {}; diff --git a/src/storage/secondary/CMakeLists.txt b/src/storage/secondary/CMakeLists.txt index f6b8c2d668..f2292245cf 100644 --- a/src/storage/secondary/CMakeLists.txt +++ b/src/storage/secondary/CMakeLists.txt @@ -1,6 +1,7 @@ set( sources ${CMAKE_CURRENT_SOURCE_DIR}/FileStorage.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/HttpStorage.cpp ) target_sources(ccache_lib PRIVATE ${sources}) diff --git a/src/storage/secondary/HttpStorage.cpp b/src/storage/secondary/HttpStorage.cpp new file mode 100644 index 0000000000..5c4bd21ab1 --- /dev/null +++ b/src/storage/secondary/HttpStorage.cpp @@ -0,0 +1,215 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "HttpStorage.hpp" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace storage { +namespace secondary { + +namespace { + +nonstd::string_view +to_string(httplib::Error error) +{ + using httplib::Error; + + switch (error) { + case Error::Success: + return "Success"; + case Error::Connection: + return "Connection"; + case Error::BindIPAddress: + return "BindIPAddress"; + case Error::Read: + return "Read"; + case Error::Write: + return "Write"; + case Error::ExceedRedirectCount: + return "ExceedRedirectCount"; + case Error::Canceled: + return "Canceled"; + case Error::SSLConnection: + return "SSLConnection"; + case Error::SSLLoadingCerts: + return "SSLLoadingCerts"; + case Error::SSLServerVerification: + return "SSLServerVerification"; + case Error::UnsupportedMultipartBoundaryChars: + return "UnsupportedMultipartBoundaryChars"; + case Error::Compression: + return "Compression"; + case Error::Unknown: + break; + } + + return "Unknown"; +} + +int +get_url_port(const Url& url) +{ + if (!url.port().empty()) { + return Util::parse_unsigned(url.port(), 1, 65535, "port"); + } + if (url.scheme() == "http") { + return 80; + } else { + throw Error("Unknown scheme: {}", url.scheme()); + } +} + +std::string +get_url_path(const Url& url) +{ + auto path = url.path(); + if (path.empty() || path.back() != '/') { + path += '/'; + } + return path; +} + +} // namespace + +HttpStorage::HttpStorage(const Url& url, const AttributeMap&) + : m_url_path(get_url_path(url)), + m_http_client( + std::make_unique(url.host(), get_url_port(url))) +{ + m_http_client->set_default_headers( + {{"User-Agent", FMT("{}/{}", CCACHE_NAME, CCACHE_VERSION)}}); + m_http_client->set_keep_alive(true); +} + +HttpStorage::~HttpStorage() = default; + +nonstd::expected, SecondaryStorage::Error> +HttpStorage::get(const Digest& key) +{ + const auto url_path = get_entry_path(key); + + const auto result = m_http_client->Get(url_path.c_str()); + + if (result.error() != httplib::Error::Success || !result) { + LOG("Failed to get {} from http storage: {} ({})", + url_path, + to_string(result.error()), + result.error()); + return nonstd::make_unexpected(Error::error); + } + + if (result->status < 200 || result->status >= 300) { + // Don't log failure if the entry doesn't exist. + return nonstd::nullopt; + } + + return result->body; +} + +nonstd::expected +HttpStorage::put(const Digest& key, + const std::string& value, + bool only_if_missing) +{ + const auto url_path = get_entry_path(key); + + if (only_if_missing) { + const auto result = m_http_client->Head(url_path.c_str()); + + if (result.error() != httplib::Error::Success || !result) { + LOG("Failed to check for {} in http storage: {} ({})", + url_path, + to_string(result.error()), + result.error()); + return nonstd::make_unexpected(Error::error); + } + + if (result->status >= 200 && result->status < 300) { + LOG("Found entry {} already within http storage: status code: {}", + url_path, + result->status); + return false; + } + } + + const auto content_type = "application/octet-stream"; + + const auto result = m_http_client->Put( + url_path.c_str(), value.data(), value.size(), content_type); + + if (result.error() != httplib::Error::Success || !result) { + LOG("Failed to put {} to http storage: {} ({})", + url_path, + to_string(result.error()), + result.error()); + return nonstd::make_unexpected(Error::error); + } + + if (result->status < 200 || result->status >= 300) { + LOG("Failed to put {} to http storage: status code: {}", + url_path, + result->status); + return nonstd::make_unexpected(Error::error); + } + + return true; +} + +nonstd::expected +HttpStorage::remove(const Digest& key) +{ + const auto url_path = get_entry_path(key); + + const auto result = m_http_client->Delete(url_path.c_str()); + + if (result.error() != httplib::Error::Success || !result) { + LOG("Failed to delete {} from http storage: {} ({})", + url_path, + to_string(result.error()), + result.error()); + return nonstd::make_unexpected(Error::error); + } + + if (result->status < 200 || result->status >= 300) { + LOG("Failed to delete {} from http storage: status code: {}", + url_path, + result->status); + return nonstd::make_unexpected(Error::error); + } + + return true; +} + +std::string +HttpStorage::get_entry_path(const Digest& key) const +{ + return m_url_path + key.to_string(); +} + +} // namespace secondary +} // namespace storage diff --git a/src/storage/secondary/HttpStorage.hpp b/src/storage/secondary/HttpStorage.hpp new file mode 100644 index 0000000000..e0ea61b8f4 --- /dev/null +++ b/src/storage/secondary/HttpStorage.hpp @@ -0,0 +1,56 @@ +// Copyright (C) 2021 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include +#include + +#include +#include + +class Url; +namespace httplib { +class Client; +} + +namespace storage { +namespace secondary { + +class HttpStorage : public storage::SecondaryStorage +{ +public: + HttpStorage(const Url& url, const AttributeMap& attributes); + ~HttpStorage() override; + + nonstd::expected, Error> + get(const Digest& key) override; + nonstd::expected put(const Digest& key, + const std::string& value, + bool only_if_missing) override; + nonstd::expected remove(const Digest& key) override; + +private: + const std::string m_url_path; + std::unique_ptr m_http_client; + + std::string get_entry_path(const Digest& key) const; +}; + +} // namespace secondary +} // namespace storage diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 0438faa172..1e92f34893 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -53,6 +53,7 @@ addtest(readonly) addtest(readonly_direct) addtest(sanitize_blacklist) addtest(secondary_file) +addtest(secondary_http) addtest(secondary_url) addtest(serialize_diagnostics) addtest(source_date_epoch) diff --git a/test/http-client b/test/http-client new file mode 100755 index 0000000000..1b9eba50b4 --- /dev/null +++ b/test/http-client @@ -0,0 +1,41 @@ +#!/usr/bin/env python3 + +# This is a simple HTTP client to test readiness of the asynchronously +# launched HTTP server. + +import sys +import time +import urllib.request + +def run(url, timeout): + deadline = time.time() + timeout + req = urllib.request.Request(url, method="HEAD") + while True: + try: + response = urllib.request.urlopen(req) + print(f"Connection successful (code: {response.getcode()})") + break + except urllib.error.URLError as e: + print(e.reason) + if time.time() > deadline: + print(f"All connection attempts failed within {timeout} seconds.") + sys.exit(1) + time.sleep(0.5) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--timeout', '-t', metavar='TIMEOUT', + default=10, type=int, + help='Maximum seconds to wait for successful connection attempt ' + '[default: 10 seconds]') + parser.add_argument('url', + type=str, + help='URL to connect to') + args = parser.parse_args() + + run( + url=args.url, + timeout=args.timeout, + ) diff --git a/test/http-server b/test/http-server new file mode 100755 index 0000000000..d061fee2ca --- /dev/null +++ b/test/http-server @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 + +# This is a simple HTTP server based on the HTTPServer and +# SimpleHTTPRequestHandler. It has been extended with PUT +# and DELETE functionality to store or delete results. +# +# See: https://github.com/python/cpython/blob/main/Lib/http/server.py + +from functools import partial +from http import HTTPStatus +from http.server import HTTPServer, SimpleHTTPRequestHandler + +import os +import socket +import sys + +class PUTEnabledHTTPRequestHandler(SimpleHTTPRequestHandler): + def do_PUT(self): + path = self.translate_path(self.path) + try: + file_length = int(self.headers['Content-Length']) + with open(path, 'wb') as output_file: + output_file.write(self.rfile.read(file_length)) + self.send_response(HTTPStatus.CREATED) + self.send_header("Content-Length", "0") + self.end_headers() + except OSError: + self.send_error(HTTPStatus.INTERNAL_SERVER_ERROR, "Cannot open file for writing") + + def do_DELETE(self): + path = self.translate_path(self.path) + try: + os.remove(path) + self.send_response(HTTPStatus.OK) + self.send_header("Content-Length", "0") + self.end_headers() + except OSError: + self.send_error(HTTPStatus.INTERNAL_SERVER_ERROR, "Cannot delete file") + +def _get_best_family(*address): + infos = socket.getaddrinfo( + *address, + type=socket.SOCK_STREAM, + flags=socket.AI_PASSIVE, + ) + family, type, proto, canonname, sockaddr = next(iter(infos)) + return family, sockaddr + +def run(HandlerClass, ServerClass, port, bind): + HandlerClass.protocol_version = "HTTP/1.1" + ServerClass.address_family, addr = _get_best_family(bind, port) + + with ServerClass(addr, handler_class) as httpd: + host, port = httpd.socket.getsockname()[:2] + url_host = f'[{host}]' if ':' in host else host + print( + f"Serving HTTP on {host} port {port} " + f"(http://{url_host}:{port}/) ..." + ) + try: + httpd.serve_forever() + except KeyboardInterrupt: + print("\nKeyboard interrupt received, exiting.") + sys.exit(0) + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument('--bind', '-b', metavar='ADDRESS', + help='Specify alternate bind address ' + '[default: all interfaces]') + parser.add_argument('--directory', '-d', default=os.getcwd(), + help='Specify alternative directory ' + '[default:current directory]') + parser.add_argument('port', action='store', + default=8080, type=int, + nargs='?', + help='Specify alternate port [default: 8080]') + args = parser.parse_args() + + handler_class = partial(PUTEnabledHTTPRequestHandler) + + os.chdir(args.directory) + + run( + HandlerClass=PUTEnabledHTTPRequestHandler, + ServerClass=HTTPServer, + port=args.port, + bind=args.bind, + ) diff --git a/test/run b/test/run index 0f3ff97a80..1da8315f39 100755 --- a/test/run +++ b/test/run @@ -399,9 +399,19 @@ run_suite() { SUITE_$suite_name echo + terminate_all_children + return 0 } +terminate_all_children() { + local pids="$(jobs -p)" + if [[ -n "$pids" ]]; then + kill $pids >/dev/null 2>&1 + wait >/dev/null 2>&1 + fi +} + TEST() { CURRENT_TEST=$1 CCACHE_COMPILE="$CCACHE $COMPILER" @@ -433,6 +443,8 @@ TEST() { export LC_ALL=C +trap 'terminate_all_children' EXIT # also cleanup after exceptional code flow + if pwd | grep '[^A-Za-z0-9/.,=_%+-]' >/dev/null 2>&1; then cat <http-server.log 2>&1 & + "${HTTP_CLIENT}" "${SECONDARY_HTTP_URL}" >http-client.log 2>&1 || test_failed_internal "Cannot connect to server" +} + +SUITE_secondary_http_PROBE() { + if ! "${HTTP_SERVER}" --help >/dev/null 2>&1; then + echo "cannot execute ${HTTP_SERVER} - Python 3 might be missing" + fi +} + +SUITE_secondary_http_SETUP() { + unset CCACHE_NODIRECT + + local subdir="${CURRENT_TEST// /_}" + export CCACHE_SECONDARY_STORAGE="${SECONDARY_HTTP_URL}/${subdir}" + SECONDARY_HTTP_DIR="${ABS_TESTDIR}/secondary/${subdir}" + mkdir "${SECONDARY_HTTP_DIR}" + + generate_code 1 test.c +} + +SUITE_secondary_http() { + start_http_server + + # ------------------------------------------------------------------------- + TEST "Base case" + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest + + $CCACHE -C >/dev/null + expect_stat 'files in cache' 0 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 2 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 0 + expect_stat 'files in cache' 0 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest + + # ------------------------------------------------------------------------- + TEST "Read-only" + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 0 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 2 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest + + $CCACHE -C >/dev/null + expect_stat 'files in cache' 0 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest + + CCACHE_SECONDARY_STORAGE+="|read-only" + + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache miss' 1 + expect_stat 'files in cache' 0 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest + + echo 'int x;' >> test.c + $CCACHE_COMPILE -c test.c + expect_stat 'cache hit (direct)' 1 + expect_stat 'cache miss' 2 + expect_stat 'files in cache' 2 + expect_file_count 2 '*' $SECONDARY_HTTP_DIR # result + manifest +}