mason/src/download.cpp
2023-09-30 17:13:44 -03:00

411 lines
12 KiB
C++

#include "download.h"
constexpr std::string_view archived("archived");
constexpr std::string_view headerAcceptJson("accept: application/json");
static const std::regex file("(^https?):\\/\\/(?:[\\w\\d\\.\\-\\_]+\\/)+([\\w\\d\\-\\_]+)\\.([\\w\\d\\.]+)");
static const std::regex repo("(^https?):\\/\\/([\\w\\d\\.\\-\\_]+)\\/([\\w\\d\\-\\_]+)\\/([\\w\\d\\-\\_]+)(?:(?:\\.git)|\\/)?$");
static const std::regex archive("tar\\.gz$");
unsigned int Download::instances(0);
AtomicMutex Download::amx{};
void Download::CurlDeleter::operator()(CURL* curl) const {
curl_easy_cleanup(curl);
}
void Download::FileDeleter::operator()(FILE* file) const {
fclose(file);
}
Download::Download(
const std::string& url,
const std::filesystem::path& destination,
const std::shared_ptr<Logger>& logger
) :
Loggable(logger),
curl(),
url(url),
destination(destination),
location(std::nullopt)
{
std::lock_guard lock(amx);
if (instances == 0) {
CURLcode res = curl_global_init(CURL_GLOBAL_ALL);
if (res != CURLE_OK)
throw CurlError(std::string("Error initializing curl global ") + curl_easy_strerror(res));
}
++instances;
createCurl();
}
Download::~Download() {
std::lock_guard lock(amx);
--instances;
if (instances == 0)
curl_global_cleanup();
}
std::optional<std::filesystem::path> Download::getLocation() const {
return location;
}
void Download::proceed() {
std::optional<RepoInfo> repo = testRepo();
if (repo.has_value()) {
const RepoInfo& rp = repo.value();
std::optional<std::filesystem::path> path = downloadAsRepo(rp);
info(url + " has been successfully donwloaded as a repository, extracting");
bool success = extract(path.value(), destination);
if (success) {
location = destination/rp.name;
info("Successfully extracted " + url);
return;
}
}
std::optional<FileInfo> file = testFile();
if (file.has_value()) {
const FileInfo& fl = file.value();
std::optional<std::filesystem::path> path = downloadAsFile(fl);
if (path.has_value()) {
if (fl.archive) {
info(url + " appears to be an archive, extracting");
bool success = extract(path.value(), destination/fl.name);
if (success) {
info("Successfully extracted " + url);
location = destination/fl.name;
}
} else {
location = path;
}
}
}
// CURLcode code = httpDownload(url, path);
// if (code == CURLE_OK)
// return path;
}
std::optional<nlohmann::json> Download::repoInfoGiteaApi1(const RepoInfo& repo) {
std::string url = repo.origin() + "/api/v1/repos/" + repo.project();
std::string data;
CURLcode code = httpGet(url, data, {headerAcceptJson});
if (code == CURLE_OK) {
try {
return nlohmann::json::parse(data);
} catch (const nlohmann::json::exception& e) {
warn(e.what());
}
}
return std::nullopt;
}
std::optional<std::filesystem::path> Download::downloadAsRepo(const RepoInfo& repo) {
info("Trying Gitea v1 API");
std::optional<nlohmann::json> repoInfo = repoInfoGiteaApi1(repo);
if (repoInfo.has_value()) {
nlohmann::json::const_iterator itr = repoInfo.value().find("default_branch");
if (itr != repoInfo.value().end() && itr->is_string()) {
std::string branchName = *itr;
info("Gitea v1 API seem to have worked");
info("Default branch is " + branchName);
return downloadRepoGiteaApi1(repo, branchName);
}
}
return std::nullopt;
}
std::optional<std::filesystem::path> Download::downloadAsFile(const FileInfo& file) {
std::filesystem::path dst;
if (file.archive)
dst = destination/archived/file.fileName();
else
dst = destination/file.fileName();
CURLcode code = httpDownload(url, dst);
if (code == CURLE_OK)
return dst;
minor("Removing " + dst.string());
if (std::filesystem::exists(dst))
std::filesystem::remove_all(dst);
return std::nullopt;
}
std::optional<std::filesystem::path> Download::downloadRepoGiteaApi1(const RepoInfo& repo, const std::string& branch) {
std::string fileName = branch + ".tar.gz";
std::string url = repo.origin() + "/api/v1/repos/" + repo.project() + "/archive/" + fileName;
std::filesystem::path path = destination/archived/fileName;
CURLcode code = httpDownload(url, path);
if (code == CURLE_OK)
return path;
minor("Removing " + path.string());
if (std::filesystem::exists(path))
std::filesystem::remove_all(path);
return std::nullopt;
}
CURLcode Download::httpGet(
const std::string& url,
std::string& result,
const std::vector<std::string_view>& headers)
{
setHeaders(headers);
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, writeString);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &result);
return curl_easy_perform(curl.get());
}
CURLcode Download::httpDownload(const std::string& url, const std::filesystem::path& path, const std::vector<std::string_view>& headers) {
info("Starting to download " + url + " to " + path.string());
std::filesystem::create_directories(path.parent_path());
if (std::filesystem::exists(path)) {
minor("File " + path.string() + " already exists, will be overwritten");
if (std::filesystem::is_directory(path))
std::filesystem::remove_all(path);
}
std::unique_ptr<FILE, FileDeleter> filePtr(fopen(path.c_str(), "wb"));
if (!filePtr)
throw FileError("Could not open file " + path.string() + " to write");
setHeaders(headers);
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, fwrite);
curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, filePtr.get());
CURLcode code = curl_easy_perform(curl.get());
if (code == CURLE_OK)
info("Successfully downloaded " + url);
else
warn("Couldn't download file " + url + ": " + curl_easy_strerror(code));
return code;
// return res;
// if (res != CURLE_OK) {
// Project::warn("Couldn't download file " + url + ": " + curl_easy_strerror(res));
// } else {
// uint32_t code;
// curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code);
// if (code == 200)
// result = true;
// else
// Project::warn("Couldn't download file " + url + ": response code " + std::to_string(code));
// }
// } else {
// Project::error(std::string("Couldn't open output file ") + destination.c_str());
// }
}
void Download::setHeaders(const std::vector<std::string_view>& headers) {
struct curl_slist* curlHeaders = nullptr;
for (const std::string_view& header : headers)
curlHeaders = curl_slist_append(curlHeaders, header.data());
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, curlHeaders);
}
void Download::createCurl() {
CURL* crl = curl_easy_init();
if (!crl)
throw CurlError("Error creating curl instalce");
curl_easy_setopt(crl, CURLOPT_USERAGENT, "libcurl-agent/1.0" );
curl_easy_setopt(crl, CURLOPT_VERBOSE, 1L);
curl_easy_setopt(crl, CURLOPT_DEBUGFUNCTION, trace);
curl_easy_setopt(crl, CURLOPT_DEBUGDATA, this);
curl_easy_setopt(crl, CURLOPT_FOLLOWLOCATION, 1L);
curl_easy_setopt(crl, CURLOPT_NOPROGRESS, 1L);
curl = std::unique_ptr<CURL, CurlDeleter>(crl);
}
std::optional<Download::FileInfo> Download::testFile() const {
std::smatch results;
if (std::regex_search(url, results, file)) {
info(url + " appears to be a file");
std::string ext(results[3]);
return FileInfo {
results[1],
results[2],
ext,
std::regex_search(ext, archive)
};
}
return std::nullopt;
}
std::optional<Download::RepoInfo> Download::testRepo() const {
std::smatch results;
if (std::regex_search(url, results, repo)) {
info(url + " appears to be a git repository");
return RepoInfo {
results[1],
results[2],
results[3],
results[4]
};
}
return std::nullopt;
}
bool Download::extract(const std::filesystem::path& source, const std::filesystem::path& destination) const {
int flags = ARCHIVE_EXTRACT_TIME;
flags |= ARCHIVE_EXTRACT_PERM;
flags |= ARCHIVE_EXTRACT_ACL;
flags |= ARCHIVE_EXTRACT_FFLAGS;
struct archive* a = archive_read_new();
struct archive* ext = archive_write_disk_new();
struct archive_entry *entry;
archive_read_support_format_all(a);
archive_read_support_filter_all(a);
archive_write_disk_set_options(ext, flags);
archive_write_disk_set_standard_lookup(ext);
bool result = true;
bool readOpen = false;
bool writeOpen = false;
int r = archive_read_open_filename(a, source.c_str(), 10240);
if (r) {
major("Couldn't open file " + source.string());
result = false;
} else {
readOpen = true;
}
while (result) {
r = archive_read_next_header(a, &entry);
if (r == ARCHIVE_EOF)
break;
if (r < ARCHIVE_OK)
major(archive_error_string(a));
if (r < ARCHIVE_WARN)
break;
std::string fileName(archive_entry_pathname(entry));
std::filesystem::path filePath = destination/fileName;
debug("Extracting " + filePath.string());
if (std::filesystem::exists(filePath))
minor(filePath.string() + " exists, overwriting");
archive_entry_set_pathname_utf8(entry, filePath.c_str());
r = archive_write_header(ext, entry);
if (r < ARCHIVE_OK) {
major(archive_error_string(ext));
} else if (archive_entry_size(entry) > 0) {
writeOpen = true;
r = copy(a, ext);
if (r < ARCHIVE_OK)
major(archive_error_string(ext));
if (r < ARCHIVE_WARN)
break;
}
r = archive_write_finish_entry(ext);
if (r < ARCHIVE_OK)
major(archive_error_string(ext));
if (r < ARCHIVE_WARN)
break;
}
if (readOpen)
archive_read_close(a);
if (writeOpen)
archive_write_close(ext);
archive_read_free(a);
archive_write_free(ext);
return result;
}
int Download::copy(struct archive* ar, struct archive* aw) const {
int r;
const void *buff;
size_t size;
la_int64_t offset;
while (true) {
r = archive_read_data_block(ar, &buff, &size, &offset);
if (r == ARCHIVE_EOF)
return ARCHIVE_OK;
if (r < ARCHIVE_OK)
return r;
r = archive_write_data_block(aw, buff, size, offset);
if (r < ARCHIVE_OK) {
major(archive_error_string(aw));
return r;
}
}
}
int Download::trace(CURL* handle, curl_infotype type, char* data, size_t size, void* clientp) {
(void)(handle);
switch (type) {
case CURLINFO_TEXT: {
std::string message(data, size);
if (message[size - 1] == '\n')
message = message.substr(0, size - 1);
static_cast<Download*>(clientp)->debug(message);
} break;
default:
break;
}
return 0;
}
size_t Download::writeString(void* data, size_t size, size_t nmemb, void* mem) {
size_t finalSize = size * nmemb;
std::string* string = static_cast<std::string*>(mem);
string->append(static_cast<char*>(data), finalSize);
return finalSize;
}
Download::CurlError::CurlError(const std::string& message) :
std::runtime_error(message) {}
Download::FileError::FileError(const std::string& message) :
std::runtime_error(message) {}
std::string Download::RepoInfo::origin() const {
return protocol + "://" + host;
}
std::string Download::RepoInfo::project() const {
return owner + "/" + name;
}
std::string Download::FileInfo::fileName() const {
return name + "." + extension;
}