Consultez cette simple page web, si vous le souhaitez : http://alvein.freevar.com C'est juste une simple galerie de 1999 petites images.
Chrome et Edge effectuent tous deux le chargement complet moins de 10 secondes (avec un cache vide) . Un outil de test externe comme celui-ci, https://tools.pingdom.com obtient des valeurs similaires.
Cependant, je n'ai pas été en mesure de coder un téléchargeur proche de ces vitesses.
Je ne vais pas (encore) poster mon code source pour simplifier cette question.
Pour l'instant, je dirai que j'ai deux versions, basées sur ces échantillons de libcurl :
https://curl.haxx.se/libcurl/c/10-at-a-time.html (single-thread, téléchargements simultanés) https://curl.haxx.se/libcurl/c/multithread.html (multithread)
J'ai étendu les deux échantillons, en chargeant le contenu de urls[] soit à la main, soit en analysant directement les liens de ressources de la galerie d'exemples. Meh, ce n'est pas important.
J'ai également ajouté un "pool" pour les deux méthodes afin que le nombre de ressources et de threads (ou "slots" pour le 1er échantillon) puisse être variable.
Mais le reste est à peu près identique.
Pour la même page web et les mêmes ressources, mes temps sont toujours les suivants plus d'une minute et je me demande pourquoi.
Que fait le navigateur pour que tout soit si rapide ? Ou peut-être que libcurl n'est pas vraiment adapté à ce genre de tâches ?
Merci d'avance pour vos suggestions !
PD. Ces échantillons ont été construits et testés avec VS2017, x64, version.
EDIT
Je ne sais pas si ma connexion rampait avant (je doute), mais dans les dernières exécutions, J'ai aussi des temps inférieurs à 10 secondes. .
Comme demandé, voici mon code (attention : long).
// Parallel downloads sample - based on https://curl.haxx.se/libcurl/c/10-at-a-time.html
#include <time.h>
#include <curl/curl.h>
// Available download slots. The smaller the download resources, the higher this value can be.
// Can't be too big or the failing fopen_s() will make some slots impossible to fill.
#define MAX_SIMULTANEOUS_DOWNLOADS 200
typedef struct {
int iIndex;
char *szURL;
char *szPath;
} ProgressHelper;
typedef struct {
unsigned __int64 ui64Size;
char *cData;
} DownloadHelper;
typedef struct {
int iTotalDownloads;
bool *bDownloaded, *bDownloading;
char **szURLs, **szPaths;
FILE **fDownloads;
CURL **curlDownloads;
ProgressHelper *phProgress;
DownloadHelper *dhDownload;
} MultiDownloadHelper;
CURLM *curlMultiHandle;
CURL *curlSharedHandles[MAX_SIMULTANEOUS_DOWNLOADS];
bool bBusyHandles[MAX_SIMULTANEOUS_DOWNLOADS];
const char *szSourceURL = "http://alvein.freevar.com";
const char *szDownloadFolder = "C:\\Users\\Alvein\\Avatars";
static size_t write_callback(char *data, size_t size, size_t nitems, void *userdata) {
// write_callback(): receives incoming download data and "saves" it in a DownloadHelper structure.
unsigned __int64 ui64DataSize = size * nitems;
DownloadHelper *dhCurrentDownload = (DownloadHelper *)userdata;
char *cDownloadedData = (char *)realloc(dhCurrentDownload->cData,
dhCurrentDownload->ui64Size + ui64DataSize);
if(NULL!= cDownloadedData) {
// Saves the downloaded chunk (data) at the end of the downloaded data (cDownloadedData)
if (0 == memcpy_s(cDownloadedData + dhCurrentDownload->ui64Size,
dhCurrentDownload->ui64Size + ui64DataSize,
data,
ui64DataSize)) {
dhCurrentDownload->cData = cDownloadedData;
dhCurrentDownload->ui64Size += ui64DataSize;
return ui64DataSize;
}
}
return 0;
}
static int progress_callback(void *userdata, curl_off_t dltotal, curl_off_t dlnow, curl_off_t ultotal, curl_off_t ulnow) {
// progress_callback(): just a simple callback for future use.
ProgressHelper *phCurrentDownload = (ProgressHelper *)userdata;
if(dltotal)
fprintf(stderr,"%s: %lld of %lld\n", phCurrentDownload->szURL, dlnow, dltotal);
return CURL_PROGRESSFUNC_CONTINUE;
}
bool singleDownload(const char *szURL, char **cContentData, unsigned __int64 *ui64ContentLength) {
// singleDownload(): downloads the resource in szURL.
// cContentData: returned array of bytes (not a string). Must be released by caller.
// ui64ContentLength: the content length written in cContentData.
bool bResult = false;
CURL *curlHandle;
DownloadHelper dhSingle = { 0,NULL };
*cContentData = NULL;
*ui64ContentLength = 0;
curlHandle = curl_easy_init();
if (NULL != curlHandle) {
curl_easy_setopt(curlHandle, CURLOPT_URL, szURL);
curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, &dhSingle);
if (CURLE_OK == curl_easy_perform(curlHandle))
if (dhSingle.ui64Size) {
*cContentData = dhSingle.cData;
*ui64ContentLength = dhSingle.ui64Size;
bResult = true;
}
}
curl_easy_cleanup(curlHandle);
return bResult;
}
bool multiDownload_StartOne(MultiDownloadHelper *mdhHelper, int iIndex) {
// multiDownload_StartOne(): adds a given download job to the multi interface
bool bResult = false;
int iK;
FILE *fHandle;
CURL *curlHandle;
if (0 == fopen_s(&fHandle, mdhHelper->szPaths[iIndex], "wb")) {
// Finds a free download slot
for (iK = 0; iK < MAX_SIMULTANEOUS_DOWNLOADS; iK++)
if (!bBusyHandles[iK])
break;
if (iK < MAX_SIMULTANEOUS_DOWNLOADS) {
curlHandle = curlSharedHandles[iK];
bBusyHandles[iK] = true; // Seizes the download slot
mdhHelper->fDownloads[iIndex] = fHandle;
mdhHelper->curlDownloads[iIndex] = curlHandle; // Assigns the shared handle to this job
mdhHelper->phProgress[iIndex] = { iIndex,mdhHelper->szURLs[iIndex],mdhHelper->szPaths[iIndex] };
mdhHelper->dhDownload[iIndex] = { 0,NULL }; // Resets the download progress
curl_easy_setopt(curlHandle, CURLOPT_URL, mdhHelper->szURLs[iIndex]);
curl_easy_setopt(curlHandle, CURLOPT_WRITEFUNCTION, write_callback);
curl_easy_setopt(curlHandle, CURLOPT_WRITEDATA, &mdhHelper->dhDownload[iIndex]);
#ifdef _DEBUG // Progress is disabled in Release - too much stuff on the console
curl_easy_setopt(curlHandle, CURLOPT_NOPROGRESS, 0L);
curl_easy_setopt(curlHandle, CURLOPT_XFERINFOFUNCTION, progress_callback);
curl_easy_setopt(curlHandle, CURLOPT_XFERINFODATA, &mdhHelper->phProgress[iIndex]);
fprintf(stderr, "multiDownload_StartOne(%d)...\n", iIndex);
#endif
curl_multi_add_handle(curlMultiHandle, curlHandle);
bResult = true;
}
}
return bResult;
}
void multiDownload(MultiDownloadHelper *mdhHelper) {
// multiDownload(): performs all the download jobs contained in mdhHelper.
int iK, iJ, iActiveDownloads, iTotalDownloaded, iActiveHandles, iPendingMessages;
CURLMsg *curlMessage;
// Finds every not-completed/not-busy download job...
iActiveDownloads = iTotalDownloaded = 0;
for (; iActiveDownloads < MAX_SIMULTANEOUS_DOWNLOADS; iActiveDownloads++) {
for (iK = 0; iK < mdhHelper->iTotalDownloads; iK++)
if (!mdhHelper->bDownloaded[iK])
if (!mdhHelper->bDownloading[iK])
break;
if (iK < mdhHelper->iTotalDownloads)
mdhHelper->bDownloading[iK] = multiDownload_StartOne(mdhHelper, iK); // ...and starts them...
else
break;
} // ...as long as there are no more than MAX_SIMULTANEOUS_DOWNLOADS active jobs
do {
curl_multi_perform(curlMultiHandle, &iActiveHandles);
do {
curlMessage = curl_multi_info_read(curlMultiHandle, &iPendingMessages);
if (NULL != curlMessage) {
// Finds the index of the download job the received message belongs to
for (iK = 0; iK < mdhHelper->iTotalDownloads; iK++)
if (curlMessage->easy_handle == mdhHelper->curlDownloads[iK])
break;
if (iK < mdhHelper->iTotalDownloads) {
if (CURLMSG_DONE == curlMessage->msg) {
if (CURLE_OK == curlMessage->data.result) {
long lResCode;
curl_easy_getinfo(mdhHelper->curlDownloads[iK], CURLINFO_RESPONSE_CODE, &lResCode);
// The response code is ignored in this sample (let's assume it's always HTTP 200 OK)
mdhHelper->bDownloaded[iK] = true;
mdhHelper->bDownloading[iK] = false;
iTotalDownloaded++;
fwrite(mdhHelper->dhDownload[iK].cData,
sizeof(char),
mdhHelper->dhDownload[iK].ui64Size,
mdhHelper->fDownloads[iK]); // Saves the downloaded file in a single shot
#ifdef _DEBUG
fprintf(stderr, "\nDownload is complete (%ld): %s\n", lResCode, mdhHelper->szPaths[iK]);
#endif
}
else {
fprintf(stderr, "\n**Download failed (%d): %s\n", curlMessage->data.result, mdhHelper->szPaths[iK]);
mdhHelper->bDownloading[iK] = false;
}
fclose(mdhHelper->fDownloads[iK]);
mdhHelper->fDownloads[iK] = NULL;
curl_multi_remove_handle(curlMultiHandle, mdhHelper->curlDownloads[iK]);
// Instead of calling curl_easy_cleanup(mdhHelper->curlDownloads[iK])...
for (iJ = 0; iJ < MAX_SIMULTANEOUS_DOWNLOADS; iJ++)
if (curlSharedHandles[iJ] == mdhHelper->curlDownloads[iK])
break;
bBusyHandles[iJ] = false; // ...frees the associated download slot...
mdhHelper->curlDownloads[iK] = NULL; // ...where mdhHelper->curlDownloads[iK] is in
iActiveDownloads--;
if (iTotalDownloaded < mdhHelper->iTotalDownloads) {
// Finds all the pending download jobs, and starts them...
for (; iActiveDownloads < MAX_SIMULTANEOUS_DOWNLOADS; iActiveDownloads++) {
for (iK = 0; iK < mdhHelper->iTotalDownloads; iK++)
if (!mdhHelper->bDownloaded[iK])
if (!mdhHelper->bDownloading[iK])
break;
if (iK < mdhHelper->iTotalDownloads)
mdhHelper->bDownloading[iK] = multiDownload_StartOne(mdhHelper, iK);
else
break;
} // ...as long as there are no more than MAX_SIMULTANEOUS_DOWNLOADS active jobs
}
}
else // Improbable to happen
fprintf(stderr, "\n!!Unknown message (%d): %s\n", curlMessage->msg, mdhHelper->szPaths[iK]);;
}
else // Impossible to happen
fprintf(stderr, "\n!!Could not find the messaging handle in the downloads list\n");
}
} while (NULL != curlMessage);
if (iActiveHandles) // Gives one second to the active and non responsive downloads...
curl_multi_wait(curlMultiHandle, NULL, 0, 1000, NULL); // ...before continuing the messages poll
else
if (iTotalDownloaded == mdhHelper->iTotalDownloads)
break; // Exits if every download job has finished
} while (true);
}
void allocMultiDownloadHelper(MultiDownloadHelper *mdhHelper, int iHowMany) {
// allocMultiDownloadHelper(): allocates the required memory for every download job.
mdhHelper->iTotalDownloads = iHowMany;
mdhHelper->bDownloaded = (bool *)malloc(iHowMany * sizeof(bool));
mdhHelper->bDownloading = (bool *)malloc(iHowMany * sizeof(bool));
mdhHelper->szURLs = (char **)malloc(iHowMany * sizeof(char *));
mdhHelper->szPaths = (char **)malloc(iHowMany * sizeof(char *));
mdhHelper->fDownloads = (FILE **)malloc(iHowMany * sizeof(FILE *));
mdhHelper->curlDownloads = (CURL **)malloc(iHowMany * sizeof(CURL *));
mdhHelper->phProgress = (ProgressHelper *)malloc(iHowMany * sizeof(ProgressHelper));
mdhHelper->dhDownload = (DownloadHelper *)malloc(iHowMany * sizeof(DownloadHelper));
}
void freeMultiDownloadHelper(MultiDownloadHelper mdhHelper) {
// freeMultiDownloadHelper(): releases the memory allocated for every download job.
for (int iK = 0; iK < mdhHelper.iTotalDownloads; iK++) {
free(mdhHelper.szURLs[iK]);
free(mdhHelper.szPaths[iK]);
free(mdhHelper.dhDownload[iK].cData);
}
free(mdhHelper.bDownloaded);
free(mdhHelper.bDownloading);
free(mdhHelper.szURLs);
free(mdhHelper.szPaths);
free(mdhHelper.fDownloads);
free(mdhHelper.curlDownloads);
free(mdhHelper.phProgress);
free(mdhHelper.dhDownload);
}
void parseHTMLImgTags(char *szHTML, char ***szImgSources, int *iTotal) {
// parseHTMLImgTags(): shameless <img> tags parsing in the HTML content supplied in szHTML.
// Not to be taken seriously.
// szImgSources: returned array of URLs as NULL-terminated strings.
// iTotal: the number of image URLs found.
unsigned __int64 ui64ImgSrcLen;
char *szHTMLNdx, *szImgSrc, **szRllSources,
*szImgTagStart, *szImgTagEnd, *szSrcAttStart, *szSrcAttEnd;
*iTotal = 0;
*szImgSources = NULL;
szHTMLNdx = szHTML;
do {
szImgTagStart = strstr(szHTMLNdx, "<img ");
if (NULL != szImgTagStart) {
szImgTagEnd = strstr(szImgTagStart + 5, ">");
if (NULL != szImgTagEnd) {
szSrcAttStart = strstr(szImgTagStart, "src=\"");
if (NULL != szSrcAttStart) {
szSrcAttEnd = strstr(szSrcAttStart + 5, "\"");
if (NULL != szSrcAttEnd) {
ui64ImgSrcLen = szSrcAttEnd - szSrcAttStart - 5;
szImgSrc = (char *)malloc(ui64ImgSrcLen + 1);
if (0 == strncpy_s(szImgSrc, ui64ImgSrcLen + 1, szSrcAttStart + 5, ui64ImgSrcLen)) {
szImgSrc[ui64ImgSrcLen] = '\0';
szRllSources = (char **)realloc(*szImgSources, (*iTotal + 1) * sizeof(char *));
if (NULL != szRllSources) {
*szImgSources = szRllSources;
(*szImgSources)[(*iTotal)++] = _strdup(szImgSrc);
}
}
free(szImgSrc);
}
}
}
szHTMLNdx = szImgTagEnd + 1;
}
} while (NULL != szImgTagStart);
}
int main(void) {
int iResult = EXIT_FAILURE, iK, iTotalDownloads;
unsigned __int64 ui64HTMLSize;
char *cHTML, *szImgExt, **szURLs, szLocalFile[MAX_PATH];
double dblElapsed;
time_t tmTimer;
FILE *fHTML;
MultiDownloadHelper mdhDownloads;
curl_global_init(CURL_GLOBAL_ALL);
time(&tmTimer);
// Downloads the source web page
if (singleDownload(szSourceURL, &cHTML, &ui64HTMLSize)) {
dblElapsed = difftime(time(NULL), tmTimer);
iTotalDownloads = 0;
szURLs = NULL;
sprintf_s(szLocalFile, MAX_PATH, "%s\\source.html", szDownloadFolder);
(void)fopen_s(&fHTML, szLocalFile, "w");
if (ui64HTMLSize) {
// Saves the content in the download folder
fwrite(cHTML, sizeof(char), ui64HTMLSize, fHTML);
cHTML = (char *)realloc(cHTML, ui64HTMLSize + 1);
if (NULL != cHTML) {
cHTML[ui64HTMLSize] = '\0'; // Assumes the content is HTML - handles it as ASCIIz
parseHTMLImgTags(cHTML, &szURLs, &iTotalDownloads);
}
}
fclose(fHTML);
free(cHTML);
if (iTotalDownloads) {
// Initializes every handle in the download slots - sets them as "available"
for (iK = 0; iK < MAX_SIMULTANEOUS_DOWNLOADS; iK++) {
curlSharedHandles[iK] = curl_easy_init();
bBusyHandles[iK] = false;
}
allocMultiDownloadHelper(&mdhDownloads, iTotalDownloads);
// Initializes the download jobs (1 per image resource)
for (iK = 0; iK < iTotalDownloads; iK++) {
#ifdef _DEBUG
fprintf(stderr, "Image resource: %s\n", szURLs[iK]);
#endif
mdhDownloads.bDownloaded[iK] = mdhDownloads.bDownloading[iK] = false;
mdhDownloads.szURLs[iK] = szURLs[iK];
// Makes the local filename for each job - just a numeric sequence, for simplicity
mdhDownloads.szPaths[iK] = (char *)malloc(MAX_PATH * sizeof(char));
sprintf_s(mdhDownloads.szPaths[iK], MAX_PATH, "%s\\%05u", szDownloadFolder, iK);
// Adds a file extension, based on the image resource URL - rudimentary method
szImgExt = strrchr(szURLs[iK], '.');
if (NULL != szImgExt)
if (szImgExt == strstr(szImgExt, ".jpg"))
strcat_s(mdhDownloads.szPaths[iK], MAX_PATH, ".jpg");
else if (szImgExt == strstr(szImgExt, ".png"))
strcat_s(mdhDownloads.szPaths[iK], MAX_PATH, ".png");
else if (szImgExt == strstr(szImgExt, ".gif"))
strcat_s(mdhDownloads.szPaths[iK], MAX_PATH, ".gif");
else
strcat_s(mdhDownloads.szPaths[iK], MAX_PATH, ".tmp");
else
strcat_s(mdhDownloads.szPaths[iK], MAX_PATH, ".tmp");
}
curlMultiHandle = curl_multi_init();
curl_multi_setopt(curlMultiHandle, CURLMOPT_MAXCONNECTS, MAX_SIMULTANEOUS_DOWNLOADS);
fprintf(stderr, "Downloading %d images...\n", iTotalDownloads);
time(&tmTimer);
multiDownload(&mdhDownloads);
dblElapsed += difftime(time(NULL), tmTimer);
curl_multi_cleanup(curlMultiHandle);
freeMultiDownloadHelper(mdhDownloads);
for (iK = 0; iK < MAX_SIMULTANEOUS_DOWNLOADS; iK++)
curl_easy_cleanup(curlSharedHandles[iK]);
fprintf(stderr, "Load time: %0.2f\n", dblElapsed);
iResult = EXIT_SUCCESS;
}
else
fprintf(stderr, "Could not find a single image resource the source web page\n");
}
else
fprintf(stderr, "Could not download the source web page\n");
curl_global_cleanup();
return iResult;
}
Je sais que c'est assez rapide maintenant, mais toute suggestion supplémentaire est la bienvenue.
Merci beaucoup les gars pour les idées de réutilisation des poignées.