vs2022 c++使用curl+libxml2库获取网站标题demo

您所在的位置:网站首页 计算机主机主要有哪些部件组成和功能和作用 vs2022 c++使用curl+libxml2库获取网站标题demo

vs2022 c++使用curl+libxml2库获取网站标题demo

#vs2022 c++使用curl+libxml2库获取网站标题demo| 来源: 网络整理| 查看: 265

开发环境 windows10 64位 首先需要安装vcpkg工具。 开始使用 vcpkg 创建项目 打开CMD进行安装curl和libxml2库 vcpkg install curl:x64-windows vcpkg install libxml2:x64-windows 安装完成后执行vcpkg integrate install,让vs2022可以识别该lib库。 先填一下坑,使用VS2022,打开X:\vcpkg\vcpkg\installed\x64-windows\include\iconv.h文件,然后Ctrl+A全选代码,菜单文件->iconv.h 另存为-> 选择编码保存->选择UNICODE 代码页 1200 确定保存。 编码保存 UNICODE 代码页 1200 Demo main.cpp代码,参考地址 https://curl.se/libcurl/c/htmltitle.html #include #include #include #include #include #include # pragma warning (disable:4819) // // Case-insensitive string comparison // #ifdef _MSC_VER #define COMPARE(a, b) (!_stricmp((a), (b))) #else #define COMPARE(a, b) (!strcasecmp((a), (b))) #endif // // libxml callback context structure // struct Context { Context() : addTitle(false) { } bool addTitle; std::string title; }; // // libcurl variables for error strings and returned data static char errorBuffer[CURL_ERROR_SIZE]; static std::string buffer; // // libcurl write callback function // static int writer(char* data, size_t size, size_t nmemb, std::string* writerData) { if (writerData == NULL) return 0; writerData->append(data, size * nmemb); return size * nmemb; } // // libcurl connection initialization // static bool init(CURL*& conn, char* url) { CURLcode code; conn = curl_easy_init(); if (conn == NULL) { fprintf(stderr, "Failed to create CURL connection\n"); exit(EXIT_FAILURE); } code = curl_easy_setopt(conn, CURLOPT_ERRORBUFFER, errorBuffer); if (code != CURLE_OK) { fprintf(stderr, "Failed to set error buffer [%d]\n", code); return false; } code = curl_easy_setopt(conn, CURLOPT_URL, url); if (code != CURLE_OK) { fprintf(stderr, "Failed to set URL [%s]\n", errorBuffer); return false; } code = curl_easy_setopt(conn, CURLOPT_FOLLOWLOCATION, 1L); if (code != CURLE_OK) { fprintf(stderr, "Failed to set redirect option [%s]\n", errorBuffer); return false; } code = curl_easy_setopt(conn, CURLOPT_WRITEFUNCTION, writer); if (code != CURLE_OK) { fprintf(stderr, "Failed to set writer [%s]\n", errorBuffer); return false; } code = curl_easy_setopt(conn, CURLOPT_WRITEDATA, &buffer); if (code != CURLE_OK) { fprintf(stderr, "Failed to set write data [%s]\n", errorBuffer); return false; } return true; } // // libxml start element callback function // static void StartElement(void* voidContext, const xmlChar* name, const xmlChar** attributes) { Context* context = static_cast(voidContext); if (COMPARE(reinterpret_cast(name), "TITLE")) { context->title = ""; context->addTitle = true; } (void)attributes; } // // libxml end element callback function // static void EndElement(void* voidContext, const xmlChar* name) { Context* context = static_cast(voidContext); if (COMPARE(reinterpret_cast(name), "TITLE")) context->addTitle = false; } // // Text handling helper function // static void handleCharacters(Context* context, const xmlChar* chars, int length) { if (context->addTitle) context->title.append(reinterpret_cast(chars), length); } // // libxml PCDATA callback function // static void Characters(void* voidContext, const xmlChar* chars, int length) { Context* context = static_cast(voidContext); handleCharacters(context, chars, length); } // // libxml CDATA callback function // static void cdata(void* voidContext, const xmlChar* chars, int length) { Context* context = static_cast(voidContext); handleCharacters(context, chars, length); } // // libxml SAX callback structure // static htmlSAXHandler saxHandler = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, StartElement, EndElement, NULL, Characters, NULL, NULL, NULL, NULL, NULL, NULL, NULL, cdata, NULL }; // // Parse given (assumed to be) HTML text and return the title // static void parseHtml(const std::string& html, std::string& title) { htmlParserCtxtPtr ctxt; Context context; ctxt = htmlCreatePushParserCtxt(&saxHandler, &context, "", 0, "", XML_CHAR_ENCODING_NONE); htmlParseChunk(ctxt, html.c_str(), html.size(), 0); htmlParseChunk(ctxt, "", 0, 1); htmlFreeParserCtxt(ctxt); title = context.title; } int main(int argc, char* argv[]) { CURL* conn = NULL; CURLcode code; std::string title; // Ensure one argument is given if (argc != 2) { fprintf(stderr, "Usage: %s \n", argv[0]); exit(EXIT_FAILURE); } curl_global_init(CURL_GLOBAL_DEFAULT); // Initialize CURL connection if (!init(conn, argv[1])) { fprintf(stderr, "Connection initializion failed\n"); exit(EXIT_FAILURE); } // Retrieve content for the URL code = curl_easy_perform(conn); curl_easy_cleanup(conn); if (code != CURLE_OK) { fprintf(stderr, "Failed to get '%s' [%s]\n", argv[1], errorBuffer); exit(EXIT_FAILURE); } // Parse the (assumed) HTML code parseHtml(buffer, title); // Display the extracted title printf("Title: %s\n", title.c_str()); return EXIT_SUCCESS; }

项目->属性->如下图设置。关闭所有警告

关闭所有警告

测试结果

测试结果


【本文地址】


今日新闻


推荐新闻


CopyRight 2018-2019 办公设备维修网 版权所有 豫ICP备15022753号-3