#include "download.h" constexpr std::string_view archived("archived"); constexpr std::string_view headerAcceptJson("accept: application/json"); static const std::regex file("(^https?):\\/\\/(?:[\\w\\d\\.\\-\\_]+\\/)+([\\w\\d\\-\\_]+)\\.([\\w\\d\\.]+)"); static const std::regex repo("(^https?):\\/\\/([\\w\\d\\.\\-\\_]+)\\/([\\w\\d\\-\\_]+)\\/([\\w\\d\\-\\_]+)(?:(?:\\.git)|\\/)?$"); static const std::regex archive("tar\\.gz$"); unsigned int Download::instances(0); AtomicMutex Download::amx{}; void Download::CurlDeleter::operator()(CURL* curl) const { curl_easy_cleanup(curl); } void Download::FileDeleter::operator()(FILE* file) const { fclose(file); } Download::Download( const std::string& url, const std::filesystem::path& destination, const std::shared_ptr& logger ) : Loggable(logger), curl(), url(url), destination(destination), location(std::nullopt) { std::lock_guard lock(amx); if (instances == 0) { CURLcode res = curl_global_init(CURL_GLOBAL_ALL); if (res != CURLE_OK) throw CurlError(std::string("Error initializing curl global ") + curl_easy_strerror(res)); } ++instances; createCurl(); } Download::~Download() { std::lock_guard lock(amx); --instances; if (instances == 0) curl_global_cleanup(); } std::optional Download::getLocation() const { return location; } void Download::proceed() { std::optional repo = testRepo(); if (repo.has_value()) { const RepoInfo& rp = repo.value(); std::optional path = downloadAsRepo(rp); info(url + " has been successfully donwloaded as a repository, extracting"); bool success = extract(path.value(), destination); if (success) { location = destination/rp.name; info("Successfully extracted " + url); return; } } std::optional file = testFile(); if (file.has_value()) { const FileInfo& fl = file.value(); std::optional path = downloadAsFile(fl); if (path.has_value()) { if (fl.archive) { info(url + " appears to be an archive, extracting"); bool success = extract(path.value(), destination/fl.name); if (success) { info("Successfully extracted " + url); location = destination/fl.name; } } else { location = path; } } } // CURLcode code = httpDownload(url, path); // if (code == CURLE_OK) // return path; } std::optional Download::repoInfoGiteaApi1(const RepoInfo& repo) { std::string url = repo.origin() + "/api/v1/repos/" + repo.project(); std::string data; CURLcode code = httpGet(url, data, {headerAcceptJson}); if (code == CURLE_OK) { try { return nlohmann::json::parse(data); } catch (const nlohmann::json::exception& e) { warn(e.what()); } } return std::nullopt; } std::optional Download::downloadAsRepo(const RepoInfo& repo) { info("Trying Gitea v1 API"); std::optional repoInfo = repoInfoGiteaApi1(repo); if (repoInfo.has_value()) { nlohmann::json::const_iterator itr = repoInfo.value().find("default_branch"); if (itr != repoInfo.value().end() && itr->is_string()) { std::string branchName = *itr; info("Gitea v1 API seem to have worked"); info("Default branch is " + branchName); return downloadRepoGiteaApi1(repo, branchName); } } return std::nullopt; } std::optional Download::downloadAsFile(const FileInfo& file) { std::filesystem::path dst; if (file.archive) dst = destination/archived/file.fileName(); else dst = destination/file.fileName(); CURLcode code = httpDownload(url, dst); if (code == CURLE_OK) return dst; minor("Removing " + dst.string()); if (std::filesystem::exists(dst)) std::filesystem::remove_all(dst); return std::nullopt; } std::optional Download::downloadRepoGiteaApi1(const RepoInfo& repo, const std::string& branch) { std::string fileName = branch + ".tar.gz"; std::string url = repo.origin() + "/api/v1/repos/" + repo.project() + "/archive/" + fileName; std::filesystem::path path = destination/archived/fileName; CURLcode code = httpDownload(url, path); if (code == CURLE_OK) return path; minor("Removing " + path.string()); if (std::filesystem::exists(path)) std::filesystem::remove_all(path); return std::nullopt; } CURLcode Download::httpGet( const std::string& url, std::string& result, const std::vector& headers) { setHeaders(headers); curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, writeString); curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &result); return curl_easy_perform(curl.get()); } CURLcode Download::httpDownload(const std::string& url, const std::filesystem::path& path, const std::vector& headers) { info("Starting to download " + url + " to " + path.string()); std::filesystem::create_directories(path.parent_path()); if (std::filesystem::exists(path)) { minor("File " + path.string() + " already exists, will be overwritten"); if (std::filesystem::is_directory(path)) std::filesystem::remove_all(path); } std::unique_ptr filePtr(fopen(path.c_str(), "wb")); if (!filePtr) throw FileError("Could not open file " + path.string() + " to write"); setHeaders(headers); curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, fwrite); curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, filePtr.get()); CURLcode code = curl_easy_perform(curl.get()); if (code == CURLE_OK) info("Successfully downloaded " + url); else warn("Couldn't download file " + url + ": " + curl_easy_strerror(code)); return code; // return res; // if (res != CURLE_OK) { // Project::warn("Couldn't download file " + url + ": " + curl_easy_strerror(res)); // } else { // uint32_t code; // curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code); // if (code == 200) // result = true; // else // Project::warn("Couldn't download file " + url + ": response code " + std::to_string(code)); // } // } else { // Project::error(std::string("Couldn't open output file ") + destination.c_str()); // } } void Download::setHeaders(const std::vector& headers) { struct curl_slist* curlHeaders = nullptr; for (const std::string_view& header : headers) curlHeaders = curl_slist_append(curlHeaders, header.data()); curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, curlHeaders); } void Download::createCurl() { CURL* crl = curl_easy_init(); if (!crl) throw CurlError("Error creating curl instalce"); curl_easy_setopt(crl, CURLOPT_USERAGENT, "libcurl-agent/1.0" ); curl_easy_setopt(crl, CURLOPT_VERBOSE, 1L); curl_easy_setopt(crl, CURLOPT_DEBUGFUNCTION, trace); curl_easy_setopt(crl, CURLOPT_DEBUGDATA, this); curl_easy_setopt(crl, CURLOPT_FOLLOWLOCATION, 1L); curl_easy_setopt(crl, CURLOPT_NOPROGRESS, 1L); curl = std::unique_ptr(crl); } std::optional Download::testFile() const { std::smatch results; if (std::regex_search(url, results, file)) { info(url + " appears to be a file"); std::string ext(results[3]); return FileInfo { results[1], results[2], ext, std::regex_search(ext, archive) }; } return std::nullopt; } std::optional Download::testRepo() const { std::smatch results; if (std::regex_search(url, results, repo)) { info(url + " appears to be a git repository"); return RepoInfo { results[1], results[2], results[3], results[4] }; } return std::nullopt; } bool Download::extract(const std::filesystem::path& source, const std::filesystem::path& destination) const { int flags = ARCHIVE_EXTRACT_TIME; flags |= ARCHIVE_EXTRACT_PERM; flags |= ARCHIVE_EXTRACT_ACL; flags |= ARCHIVE_EXTRACT_FFLAGS; struct archive* a = archive_read_new(); struct archive* ext = archive_write_disk_new(); struct archive_entry *entry; archive_read_support_format_all(a); archive_read_support_filter_all(a); archive_write_disk_set_options(ext, flags); archive_write_disk_set_standard_lookup(ext); bool result = true; bool readOpen = false; bool writeOpen = false; int r = archive_read_open_filename(a, source.c_str(), 10240); if (r) { major("Couldn't open file " + source.string()); result = false; } else { readOpen = true; } while (result) { r = archive_read_next_header(a, &entry); if (r == ARCHIVE_EOF) break; if (r < ARCHIVE_OK) major(archive_error_string(a)); if (r < ARCHIVE_WARN) break; std::string fileName(archive_entry_pathname(entry)); std::filesystem::path filePath = destination/fileName; debug("Extracting " + filePath.string()); if (std::filesystem::exists(filePath)) minor(filePath.string() + " exists, overwriting"); archive_entry_set_pathname_utf8(entry, filePath.c_str()); r = archive_write_header(ext, entry); if (r < ARCHIVE_OK) { major(archive_error_string(ext)); } else if (archive_entry_size(entry) > 0) { writeOpen = true; r = copy(a, ext); if (r < ARCHIVE_OK) major(archive_error_string(ext)); if (r < ARCHIVE_WARN) break; } r = archive_write_finish_entry(ext); if (r < ARCHIVE_OK) major(archive_error_string(ext)); if (r < ARCHIVE_WARN) break; } if (readOpen) archive_read_close(a); if (writeOpen) archive_write_close(ext); archive_read_free(a); archive_write_free(ext); return result; } int Download::copy(struct archive* ar, struct archive* aw) const { int r; const void *buff; size_t size; la_int64_t offset; while (true) { r = archive_read_data_block(ar, &buff, &size, &offset); if (r == ARCHIVE_EOF) return ARCHIVE_OK; if (r < ARCHIVE_OK) return r; r = archive_write_data_block(aw, buff, size, offset); if (r < ARCHIVE_OK) { major(archive_error_string(aw)); return r; } } } int Download::trace(CURL* handle, curl_infotype type, char* data, size_t size, void* clientp) { (void)(handle); switch (type) { case CURLINFO_TEXT: { std::string message(data, size); if (message[size - 1] == '\n') message = message.substr(0, size - 1); static_cast(clientp)->debug(message); } break; default: break; } return 0; } size_t Download::writeString(void* data, size_t size, size_t nmemb, void* mem) { size_t finalSize = size * nmemb; std::string* string = static_cast(mem); string->append(static_cast(data), finalSize); return finalSize; } Download::CurlError::CurlError(const std::string& message) : std::runtime_error(message) {} Download::FileError::FileError(const std::string& message) : std::runtime_error(message) {} std::string Download::RepoInfo::origin() const { return protocol + "://" + host; } std::string Download::RepoInfo::project() const { return owner + "/" + name; } std::string Download::FileInfo::fileName() const { return name + "." + extension; }