查看: 75|回复: 0

[转载图文] URL路径拆分文件下载

[复制链接]
发表于 2020-6-15 21:00:30 | 显示全部楼层 |阅读模式
  URL路径拆分: 例如我们传入 http://www.baidu.com/index.php 拆分为 www.baidu.com 和 /index.php
#include <Windows.h>
#include <iostream>

int ParseUrl(char szUrl[], char szHost[], char szPath[])
{
int iStart = 0;
int iEnd = 0;
int iLen = 0;

if (strncmp(szUrl, "http://", 7) == 0)
iStart = 7;
else if (strncmp(szUrl, "https://", 8) == 0)
iStart = 8;

while (szUrl[iStart + iLen] != '\0' && szUrl[iStart + iLen] != '/')
{ iLen++; }

memcpy(szHost, szUrl + iStart, iLen);
if (strlen(szUrl) - iStart - iLen == 0)
szPath[0] = '/';
else
memcpy(szPath, szUrl + iStart + iLen, strlen(szUrl) - iStart - iLen);
return 0;
}

int main(int argc,char *argv [])
{
char szUrl[] = "http://www.baidu.com/index.html";
char szHost[1024] = { 0 };
char szPath[2048] = { 0 };

int ret = ParseUrl(szUrl,szHost,szPath);

if (ret == 0)
{
printf("主机: %s \n", szHost);
printf("路径: %s \n", szPath);
}

system("pause");
return 0;
}
  http 文件下载
#define _CRT_SECURE_NO_WARNINGS
#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Spide(const char *pszUrl, const char *pszFile)
{
char szHost[256] = {0};
char *ptr = (char *)pszUrl;

// 判断开头是否为http://如果不是则返回-1
if (_strnicmp(ptr, "http://", 7) != 0) { return -1; }

ptr = ptr + 7;
int index = 0;

while (index < 255 && *ptr && *ptr != '/')
{
szHost[index++] = *ptr++;
}
szHost[index] = '\0';

//printf("去掉http后的域名地址: %s \n", szHost);

char *buffer = new char[1024 * 8];
index = sprintf(buffer,
"GET %s HTTP/1.1\r\n"
"Host: %s\r\n"
"User-Agent: IE or Chrome\r\n"
"Accept-Type: */*\r\n"
"Connection: Close\r\n\r\n",
ptr, szHost);

//printf("构建好的请求头:\n %s \n", buffer);

// ------------------------------------------------------------

SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);

SOCKADDR_IN addr;
addr.sin_addr.S_un.S_addr = 0;
addr.sin_port = htons(0);
addr.sin_family = AF_INET;

index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
hostent *p = ::gethostbyname(szHost);

if (p) {
ULONG ai = *(ULONG*)p->h_addr_list[0];
addr.sin_addr.S_un.S_addr = ai;
addr.sin_port = htons(80);
index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
if (index == NOERROR) {
index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
FILE *pf = fopen(pszFile, "wb");
do {
index = recv(fd, buffer, 8191, 0);
if (index <= 0) {
break;
}
buffer[index] = '\0';
fwrite(buffer, 1, index, pf);
printf("%s", buffer);
} while (TRUE);
fclose(pf);
}
}
closesocket(fd);
delete[] buffer;
return 0;
}


int main(int argc,char *argv[])
{
WSADATA wsaData;
WSAStartup(0x0202, &wsaData);

Spide("http://cn.bing.com/","index.html");

system("pause");
return 0;
}
  现HTTP页面下载功能:
#include <Windows.h>
#include <iostream>
#include <winsock.h>

#pragma comment(lib,"ws2_32.lib")

int Curl_Get(const char *pszUrl)
{
char szHost[256] = { 0 };
char *ptr = (char *)pszUrl;

// 判断开头是否为http:// 或者 https:// 如果不是则返回-1
if (_strnicmp(ptr, "http://", 7) == 0)
ptr = ptr + 7;
else if (_strnicmp(ptr, "https://", 8) == 0)
ptr = ptr + 8;
else
return -1;

int index = 0;
while (index < 255 && *ptr && *ptr != '/')
szHost[index++] = *ptr++;
szHost[index] = '\0';

char *buffer = new char[1024 * 8];
index = sprintf(buffer,
"GET %s HTTP/1.1 \r\n"
"Host: %s \r\n"
"User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:74.0) Gecko/20100101 Firefox/74.0 \r\n"
"Accept-Type: */* \r\n"
"Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 \r\n"
"Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 \r\n"
"Connection: Close \r\n\r\n",
ptr, szHost);
printf("%s \n", buffer);

SOCKADDR_IN addr;
SOCKET fd = socket(AF_INET, SOCK_STREAM, 0);
addr.sin_addr.S_un.S_addr = 0;
addr.sin_port = htons(0);
addr.sin_family = AF_INET;
index = bind(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
hostent *p = gethostbyname(szHost);

if (p)
{
ULONG ai = *(ULONG*)p->h_addr_list[0];
addr.sin_addr.S_un.S_addr = ai;
addr.sin_port = htons(80);

index = connect(fd, (const sockaddr*)&addr, sizeof(SOCKADDR_IN));
if (index == NOERROR)
{
index = send(fd, (const char*)buffer, (int)strlen(buffer), 0);
do
{
index = recv(fd, buffer, 8191, 0);
if (index <= 0) { break; }
buffer[index] = '\0';
printf("%s \n", buffer);
} while (TRUE);
}
}
closesocket(fd);
return 0;
}

int main(int argc, char *argv[])
{
WSADATA wsaData;
WSAStartup(0x0202, &wsaData);
Curl_Get("http://cn.bing.com/");

WSACleanup();

system("pause");
return 0;
}


温馨提示:
1.如果您喜欢这篇帖子,请给作者点赞评分,点赞会增加帖子的热度,评分会给作者加学币。(评分不会扣掉您的积分,系统每天都会重置您的评分额度)。
2.回复帖子不仅是对作者的最好奖励,还可以获得学币奖励,请尊重作者的劳动成果,拒绝做伸手党!
3.发广告、灌水回复等违规行为一经发现直接禁言,如果本帖内容涉嫌违规,请点击论坛底部的举报反馈按钮,也可以在【投诉建议】板块发帖举报。
论坛交流群:672619046
微信公众号
快速回复 返回顶部 返回列表