411 lines
12 KiB
C++
411 lines
12 KiB
C++
#include "download.h"
|
|
|
|
constexpr std::string_view archived("archived");
|
|
constexpr std::string_view headerAcceptJson("accept: application/json");
|
|
|
|
static const std::regex file("(^https?):\\/\\/(?:[\\w\\d\\.\\-\\_]+\\/)+([\\w\\d\\-\\_]+)\\.([\\w\\d\\.]+)");
|
|
static const std::regex repo("(^https?):\\/\\/([\\w\\d\\.\\-\\_]+)\\/([\\w\\d\\-\\_]+)\\/([\\w\\d\\-\\_]+)(?:(?:\\.git)|\\/)?$");
|
|
|
|
static const std::regex archive("tar\\.gz$");
|
|
|
|
unsigned int Download::instances(0);
|
|
AtomicMutex Download::amx{};
|
|
|
|
void Download::CurlDeleter::operator()(CURL* curl) const {
|
|
curl_easy_cleanup(curl);
|
|
}
|
|
|
|
void Download::FileDeleter::operator()(FILE* file) const {
|
|
fclose(file);
|
|
}
|
|
|
|
Download::Download(
|
|
const std::string& url,
|
|
const std::filesystem::path& destination,
|
|
const std::shared_ptr<Logger>& logger
|
|
) :
|
|
Loggable(logger),
|
|
curl(),
|
|
url(url),
|
|
destination(destination),
|
|
location(std::nullopt)
|
|
{
|
|
std::lock_guard lock(amx);
|
|
if (instances == 0) {
|
|
CURLcode res = curl_global_init(CURL_GLOBAL_ALL);
|
|
if (res != CURLE_OK)
|
|
throw CurlError(std::string("Error initializing curl global ") + curl_easy_strerror(res));
|
|
}
|
|
++instances;
|
|
|
|
createCurl();
|
|
}
|
|
|
|
Download::~Download() {
|
|
std::lock_guard lock(amx);
|
|
--instances;
|
|
if (instances == 0)
|
|
curl_global_cleanup();
|
|
}
|
|
|
|
std::optional<std::filesystem::path> Download::getLocation() const {
|
|
return location;
|
|
}
|
|
|
|
void Download::proceed() {
|
|
std::optional<RepoInfo> repo = testRepo();
|
|
if (repo.has_value()) {
|
|
const RepoInfo& rp = repo.value();
|
|
std::optional<std::filesystem::path> path = downloadAsRepo(rp);
|
|
info(url + " has been successfully donwloaded as a repository, extracting");
|
|
bool success = extract(path.value(), destination);
|
|
if (success) {
|
|
location = destination/rp.name;
|
|
info("Successfully extracted " + url);
|
|
return;
|
|
}
|
|
}
|
|
|
|
std::optional<FileInfo> file = testFile();
|
|
if (file.has_value()) {
|
|
const FileInfo& fl = file.value();
|
|
std::optional<std::filesystem::path> path = downloadAsFile(fl);
|
|
if (path.has_value()) {
|
|
if (fl.archive) {
|
|
info(url + " appears to be an archive, extracting");
|
|
bool success = extract(path.value(), destination/fl.name);
|
|
if (success) {
|
|
info("Successfully extracted " + url);
|
|
location = destination/fl.name;
|
|
}
|
|
} else {
|
|
location = path;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// CURLcode code = httpDownload(url, path);
|
|
// if (code == CURLE_OK)
|
|
// return path;
|
|
}
|
|
|
|
|
|
std::optional<nlohmann::json> Download::repoInfoGiteaApi1(const RepoInfo& repo) {
|
|
std::string url = repo.origin() + "/api/v1/repos/" + repo.project();
|
|
std::string data;
|
|
CURLcode code = httpGet(url, data, {headerAcceptJson});
|
|
if (code == CURLE_OK) {
|
|
try {
|
|
return nlohmann::json::parse(data);
|
|
} catch (const nlohmann::json::exception& e) {
|
|
warn(e.what());
|
|
}
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::optional<std::filesystem::path> Download::downloadAsRepo(const RepoInfo& repo) {
|
|
info("Trying Gitea v1 API");
|
|
std::optional<nlohmann::json> repoInfo = repoInfoGiteaApi1(repo);
|
|
if (repoInfo.has_value()) {
|
|
nlohmann::json::const_iterator itr = repoInfo.value().find("default_branch");
|
|
if (itr != repoInfo.value().end() && itr->is_string()) {
|
|
std::string branchName = *itr;
|
|
info("Gitea v1 API seem to have worked");
|
|
info("Default branch is " + branchName);
|
|
|
|
return downloadRepoGiteaApi1(repo, branchName);
|
|
}
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::optional<std::filesystem::path> Download::downloadAsFile(const FileInfo& file) {
|
|
std::filesystem::path dst;
|
|
if (file.archive)
|
|
dst = destination/archived/file.fileName();
|
|
else
|
|
dst = destination/file.fileName();
|
|
|
|
CURLcode code = httpDownload(url, dst);
|
|
if (code == CURLE_OK)
|
|
return dst;
|
|
|
|
minor("Removing " + dst.string());
|
|
if (std::filesystem::exists(dst))
|
|
std::filesystem::remove_all(dst);
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::optional<std::filesystem::path> Download::downloadRepoGiteaApi1(const RepoInfo& repo, const std::string& branch) {
|
|
std::string fileName = branch + ".tar.gz";
|
|
std::string url = repo.origin() + "/api/v1/repos/" + repo.project() + "/archive/" + fileName;
|
|
std::filesystem::path path = destination/archived/fileName;
|
|
CURLcode code = httpDownload(url, path);
|
|
if (code == CURLE_OK)
|
|
return path;
|
|
|
|
minor("Removing " + path.string());
|
|
if (std::filesystem::exists(path))
|
|
std::filesystem::remove_all(path);
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
|
|
CURLcode Download::httpGet(
|
|
const std::string& url,
|
|
std::string& result,
|
|
const std::vector<std::string_view>& headers)
|
|
{
|
|
setHeaders(headers);
|
|
|
|
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, writeString);
|
|
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &result);
|
|
|
|
return curl_easy_perform(curl.get());
|
|
}
|
|
|
|
CURLcode Download::httpDownload(const std::string& url, const std::filesystem::path& path, const std::vector<std::string_view>& headers) {
|
|
info("Starting to download " + url + " to " + path.string());
|
|
std::filesystem::create_directories(path.parent_path());
|
|
if (std::filesystem::exists(path)) {
|
|
minor("File " + path.string() + " already exists, will be overwritten");
|
|
|
|
if (std::filesystem::is_directory(path))
|
|
std::filesystem::remove_all(path);
|
|
}
|
|
|
|
std::unique_ptr<FILE, FileDeleter> filePtr(fopen(path.c_str(), "wb"));
|
|
if (!filePtr)
|
|
throw FileError("Could not open file " + path.string() + " to write");
|
|
|
|
setHeaders(headers);
|
|
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
|
|
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, fwrite);
|
|
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, filePtr.get());
|
|
|
|
CURLcode code = curl_easy_perform(curl.get());
|
|
if (code == CURLE_OK)
|
|
info("Successfully downloaded " + url);
|
|
else
|
|
warn("Couldn't download file " + url + ": " + curl_easy_strerror(code));
|
|
|
|
return code;
|
|
|
|
// return res;
|
|
// if (res != CURLE_OK) {
|
|
// Project::warn("Couldn't download file " + url + ": " + curl_easy_strerror(res));
|
|
// } else {
|
|
// uint32_t code;
|
|
// curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code);
|
|
// if (code == 200)
|
|
// result = true;
|
|
// else
|
|
// Project::warn("Couldn't download file " + url + ": response code " + std::to_string(code));
|
|
// }
|
|
// } else {
|
|
// Project::error(std::string("Couldn't open output file ") + destination.c_str());
|
|
// }
|
|
}
|
|
|
|
void Download::setHeaders(const std::vector<std::string_view>& headers) {
|
|
struct curl_slist* curlHeaders = nullptr;
|
|
for (const std::string_view& header : headers)
|
|
curlHeaders = curl_slist_append(curlHeaders, header.data());
|
|
|
|
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, curlHeaders);
|
|
}
|
|
|
|
void Download::createCurl() {
|
|
CURL* crl = curl_easy_init();
|
|
if (!crl)
|
|
throw CurlError("Error creating curl instalce");
|
|
|
|
curl_easy_setopt(crl, CURLOPT_USERAGENT, "libcurl-agent/1.0" );
|
|
curl_easy_setopt(crl, CURLOPT_VERBOSE, 1L);
|
|
curl_easy_setopt(crl, CURLOPT_DEBUGFUNCTION, trace);
|
|
curl_easy_setopt(crl, CURLOPT_DEBUGDATA, this);
|
|
curl_easy_setopt(crl, CURLOPT_FOLLOWLOCATION, 1L);
|
|
curl_easy_setopt(crl, CURLOPT_NOPROGRESS, 1L);
|
|
curl = std::unique_ptr<CURL, CurlDeleter>(crl);
|
|
}
|
|
|
|
std::optional<Download::FileInfo> Download::testFile() const {
|
|
std::smatch results;
|
|
if (std::regex_search(url, results, file)) {
|
|
info(url + " appears to be a file");
|
|
std::string ext(results[3]);
|
|
return FileInfo {
|
|
results[1],
|
|
results[2],
|
|
ext,
|
|
std::regex_search(ext, archive)
|
|
};
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
std::optional<Download::RepoInfo> Download::testRepo() const {
|
|
std::smatch results;
|
|
if (std::regex_search(url, results, repo)) {
|
|
info(url + " appears to be a git repository");
|
|
return RepoInfo {
|
|
results[1],
|
|
results[2],
|
|
results[3],
|
|
results[4]
|
|
};
|
|
}
|
|
|
|
return std::nullopt;
|
|
}
|
|
|
|
bool Download::extract(const std::filesystem::path& source, const std::filesystem::path& destination) const {
|
|
int flags = ARCHIVE_EXTRACT_TIME;
|
|
flags |= ARCHIVE_EXTRACT_PERM;
|
|
flags |= ARCHIVE_EXTRACT_ACL;
|
|
flags |= ARCHIVE_EXTRACT_FFLAGS;
|
|
|
|
struct archive* a = archive_read_new();
|
|
struct archive* ext = archive_write_disk_new();
|
|
struct archive_entry *entry;
|
|
archive_read_support_format_all(a);
|
|
archive_read_support_filter_all(a);
|
|
archive_write_disk_set_options(ext, flags);
|
|
archive_write_disk_set_standard_lookup(ext);
|
|
|
|
bool result = true;
|
|
bool readOpen = false;
|
|
bool writeOpen = false;
|
|
int r = archive_read_open_filename(a, source.c_str(), 10240);
|
|
if (r) {
|
|
major("Couldn't open file " + source.string());
|
|
result = false;
|
|
} else {
|
|
readOpen = true;
|
|
}
|
|
|
|
while (result) {
|
|
r = archive_read_next_header(a, &entry);
|
|
if (r == ARCHIVE_EOF)
|
|
break;
|
|
|
|
if (r < ARCHIVE_OK)
|
|
major(archive_error_string(a));
|
|
|
|
if (r < ARCHIVE_WARN)
|
|
break;
|
|
|
|
std::string fileName(archive_entry_pathname(entry));
|
|
std::filesystem::path filePath = destination/fileName;
|
|
debug("Extracting " + filePath.string());
|
|
if (std::filesystem::exists(filePath))
|
|
minor(filePath.string() + " exists, overwriting");
|
|
|
|
archive_entry_set_pathname_utf8(entry, filePath.c_str());
|
|
|
|
r = archive_write_header(ext, entry);
|
|
if (r < ARCHIVE_OK) {
|
|
major(archive_error_string(ext));
|
|
} else if (archive_entry_size(entry) > 0) {
|
|
writeOpen = true;
|
|
r = copy(a, ext);
|
|
if (r < ARCHIVE_OK)
|
|
major(archive_error_string(ext));
|
|
|
|
if (r < ARCHIVE_WARN)
|
|
break;
|
|
}
|
|
r = archive_write_finish_entry(ext);
|
|
if (r < ARCHIVE_OK)
|
|
major(archive_error_string(ext));
|
|
|
|
if (r < ARCHIVE_WARN)
|
|
break;
|
|
}
|
|
|
|
if (readOpen)
|
|
archive_read_close(a);
|
|
|
|
if (writeOpen)
|
|
archive_write_close(ext);
|
|
|
|
archive_read_free(a);
|
|
archive_write_free(ext);
|
|
|
|
return result;
|
|
}
|
|
|
|
int Download::copy(struct archive* ar, struct archive* aw) const {
|
|
int r;
|
|
const void *buff;
|
|
size_t size;
|
|
la_int64_t offset;
|
|
|
|
while (true) {
|
|
r = archive_read_data_block(ar, &buff, &size, &offset);
|
|
if (r == ARCHIVE_EOF)
|
|
return ARCHIVE_OK;
|
|
|
|
if (r < ARCHIVE_OK)
|
|
return r;
|
|
|
|
r = archive_write_data_block(aw, buff, size, offset);
|
|
if (r < ARCHIVE_OK) {
|
|
major(archive_error_string(aw));
|
|
return r;
|
|
}
|
|
}
|
|
}
|
|
|
|
int Download::trace(CURL* handle, curl_infotype type, char* data, size_t size, void* clientp) {
|
|
(void)(handle);
|
|
switch (type) {
|
|
case CURLINFO_TEXT: {
|
|
std::string message(data, size);
|
|
if (message[size - 1] == '\n')
|
|
message = message.substr(0, size - 1);
|
|
|
|
static_cast<Download*>(clientp)->debug(message);
|
|
} break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
size_t Download::writeString(void* data, size_t size, size_t nmemb, void* mem) {
|
|
size_t finalSize = size * nmemb;
|
|
std::string* string = static_cast<std::string*>(mem);
|
|
string->append(static_cast<char*>(data), finalSize);
|
|
|
|
return finalSize;
|
|
}
|
|
|
|
Download::CurlError::CurlError(const std::string& message) :
|
|
std::runtime_error(message) {}
|
|
|
|
Download::FileError::FileError(const std::string& message) :
|
|
std::runtime_error(message) {}
|
|
|
|
std::string Download::RepoInfo::origin() const {
|
|
return protocol + "://" + host;
|
|
}
|
|
|
|
std::string Download::RepoInfo::project() const {
|
|
return owner + "/" + name;
|
|
}
|
|
|
|
std::string Download::FileInfo::fileName() const {
|
|
return name + "." + extension;
|
|
}
|
|
|