'How to download a file from http using C?
I spent the last days trying to figure out how to download a file from an URL. This is my first challenge with socket and I'm using it to have an understanding of protocols so I would like to do it without cURL libraries and only in C language!! I searched a lot....now I'm able to printf the source code of a page but I think it's different with a file, I don't have only to put the received data from a buffer to a file, right? any tips?
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
int main(void)
{
char domain[] = "www.sstatic.net", path[]="stackexchange/img/logos/so/so-logo-med.png"; //example
int sock, bytes_received;
char send_data[1024],recv_data[9999], *p;
struct sockaddr_in server_addr;
struct hostent *he;
FILE *fp;
he = gethostbyname(domain);
if (he == NULL){
herror("gethostbyname");
exit(1);
}
if ((sock = socket(AF_INET, SOCK_STREAM, 0))== -1){
perror("Socket");
exit(1);
}
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(80);
server_addr.sin_addr = *((struct in_addr *)he->h_addr);
bzero(&(server_addr.sin_zero),8);
if (connect(sock, (struct sockaddr *)&server_addr,sizeof(struct sockaddr)) == -1){
perror("Connect");
exit(1);
}
snprintf(send_data, sizeof(send_data), "GET /%s HTTP/1.1\r\nHost: /%s\r\n\r\n", path, domain);
//printf("%s\n", send_data);
send(sock, send_data, strlen(send_data), 0);
printf("Data sended.\n");
fp=fopen("received_file","wb");
bytes_received = recv(sock, recv_data, 9999, 0);
recv_data[bytes_received] = '\0';
printf("Data receieved.\n");
printf("%s\n", recv_data);
p = strstr(recv_data, "\r\n\r\n"); //to find "\r\n\r\n" sequence and put the pointer p after that
p=p+4;
fwrite(p,strlen(p),1,fp);
close(sock);
fclose(fp);
return 0;
}
UPDATE 1 thanks to milevyo for some improvements! It works good with a txt file but it doesn't with other kinds of file (png in this case)
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
int main(void){
//char domain[] = "www.gnu.org", path[]="/licenses/gpl.txt"; //example
char domain[] = "sstatic.net", path[]="stackexchange/img/logos/so/so-logo-med.png"; //example
int sock, bytes_received;
char send_data[1024],recv_data[9999];
struct sockaddr_in server_addr;
struct hostent *he;
FILE *fp;
he = gethostbyname(domain);
if (he == NULL){
herror("gethostbyname");
exit(1);
}
if ((sock = socket(AF_INET, SOCK_STREAM, 0))== -1){
perror("Socket");
exit(1);
}
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(80);
server_addr.sin_addr = *((struct in_addr *)he->h_addr);
bzero(&(server_addr.sin_zero),8);
printf("Connecting ...\n");
if (connect(sock, (struct sockaddr *)&server_addr,sizeof(struct sockaddr)) == -1){
perror("Connect");
exit(1);
}
printf("Sending data ...\n");
snprintf(send_data, sizeof(send_data), "GET /%s HTTP/1.1\r\nHost: /%s\r\n\r\n", path, domain);
if(send(sock, send_data, strlen(send_data), 0)==-1){
perror("send");
exit(2);
}
printf("Data sent.\n");
fp=fopen("received_file","wb");
printf("Recieving data...\n\n");
while((bytes_received = recv(sock, recv_data, 9999, 0))>0){
if(bytes_received==-1){
perror("recieve");
exit(3);
}
recv_data[bytes_received] = '\0';
fwrite(recv_data,bytes_received,1,fp);
printf("%s", recv_data);
}
close(sock);
fclose(fp);
printf("\n\nDone.\n\n");
return 0;
}
this code produce a 334 bytes file (instead of 12,4kb of the original file) with this inside:
HTTP/1.1 400 Bad Request
Date: Sat, 28 Nov 2015 16:20:45 GMT
Content-Type: text/html
Content-Length: 177
Connection: close
Server: -nginx
CF-RAY: -
<html>
<head><title>400 Bad Request</title></head>
<body bgcolor="white">
<center><h1>400 Bad Request</h1></center>
<hr><center>cloudflare-nginx</center>
</body>
</html>
somebody knows how to fix this "400 Bad Request"?
Solution 1:[1]
This is an update for the previous posted code. The http protocol is far to be implementation in just a small example.
reformatting the code , or giving a modification to it is more than welcome.
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
#include <errno.h>
#include <string.h>
#include <errno.h>
#include <arpa/inet.h>
#include <string.h>
int ReadHttpStatus(int sock){
char c;
char buff[1024]="",*ptr=buff+1;
int bytes_received, status;
printf("Begin Response ..\n");
while(bytes_received = recv(sock, ptr, 1, 0)){
if(bytes_received==-1){
perror("ReadHttpStatus");
exit(1);
}
if((ptr[-1]=='\r') && (*ptr=='\n' )) break;
ptr++;
}
*ptr=0;
ptr=buff+1;
sscanf(ptr,"%*s %d ", &status);
printf("%s\n",ptr);
printf("status=%d\n",status);
printf("End Response ..\n");
return (bytes_received>0)?status:0;
}
//the only filed that it parsed is 'Content-Length'
int ParseHeader(int sock){
char c;
char buff[1024]="",*ptr=buff+4;
int bytes_received, status;
printf("Begin HEADER ..\n");
while(bytes_received = recv(sock, ptr, 1, 0)){
if(bytes_received==-1){
perror("Parse Header");
exit(1);
}
if(
(ptr[-3]=='\r') && (ptr[-2]=='\n' ) &&
(ptr[-1]=='\r') && (*ptr=='\n' )
) break;
ptr++;
}
*ptr=0;
ptr=buff+4;
//printf("%s",ptr);
if(bytes_received){
ptr=strstr(ptr,"Content-Length:");
if(ptr){
sscanf(ptr,"%*s %d",&bytes_received);
}else
bytes_received=-1; //unknown size
printf("Content-Length: %d\n",bytes_received);
}
printf("End HEADER ..\n");
return bytes_received ;
}
int main(void){
char domain[] = "sstatic.net", path[]="stackexchange/img/logos/so/so-logo-med.png";
int sock, bytes_received;
char send_data[1024],recv_data[1024], *p;
struct sockaddr_in server_addr;
struct hostent *he;
he = gethostbyname(domain);
if (he == NULL){
herror("gethostbyname");
exit(1);
}
if ((sock = socket(AF_INET, SOCK_STREAM, 0))== -1){
perror("Socket");
exit(1);
}
server_addr.sin_family = AF_INET;
server_addr.sin_port = htons(80);
server_addr.sin_addr = *((struct in_addr *)he->h_addr);
bzero(&(server_addr.sin_zero),8);
printf("Connecting ...\n");
if (connect(sock, (struct sockaddr *)&server_addr,sizeof(struct sockaddr)) == -1){
perror("Connect");
exit(1);
}
printf("Sending data ...\n");
snprintf(send_data, sizeof(send_data), "GET /%s HTTP/1.1\r\nHost: %s\r\n\r\n", path, domain);
if(send(sock, send_data, strlen(send_data), 0)==-1){
perror("send");
exit(2);
}
printf("Data sent.\n");
//fp=fopen("received_file","wb");
printf("Recieving data...\n\n");
int contentlengh;
if(ReadHttpStatus(sock) && (contentlengh=ParseHeader(sock))){
int bytes=0;
FILE* fd=fopen("test.png","wb");
printf("Saving data...\n\n");
while(bytes_received = recv(sock, recv_data, 1024, 0)){
if(bytes_received==-1){
perror("recieve");
exit(3);
}
fwrite(recv_data,1,bytes_received,fd);
bytes+=bytes_received;
printf("Bytes recieved: %d from %d\n",bytes,contentlengh);
if(bytes==contentlengh)
break;
}
fclose(fd);
}
close(sock);
printf("\n\nDone.\n\n");
return 0;
}
Solution 2:[2]
Try some thing like below: -
#include <sys/socket.h>
#include <sys/errno.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <unistd.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
#include <stdio.h>
#include <openssl/ssl.h>
#include <openssl/err.h>
#define BUFLEN 4096
#define HOST "www.t.edu.pk"
#define PORT 443
int main()
{
int sock, iResult;
char *cmd, *ip;
char recvbuf[BUFLEN];
//
struct sockaddr_in sin;
struct hostent* hent;
//
hent = gethostbyname(HOST);
if(hent == NULL)
{
printf("gethostbyname failed: %d\n", errno);
return -1;
}
printf("gethostbyname succeeded\n");
ip = inet_ntoa(*((struct in_addr*)hent->h_addr_list[0]));
printf("Host IP: %s\n", ip);
//
sock = socket(AF_INET, SOCK_STREAM, 0);
if(sock == -1)
{
printf("socket failed: %d\n", errno);
return -1;
}
printf("socket created\n");
//
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = inet_addr(ip);
sin.sin_port = htons(PORT);
iResult = connect(sock, (struct sockaddr*)&sin, sizeof(sin));
if(iResult < 0)
{
printf("connect failed: %d\n", errno);
return -1;
}
printf("connect succeeded\n");
//
iResult = SSL_library_init();
if(iResult < 0)
{
printf("SSL failed\n");
return -1;
}
printf("SSL library initialised\n");
OpenSSL_add_all_algorithms();
ERR_load_crypto_strings();
SSL_load_error_strings();
SSL_CTX* ctx = SSL_CTX_new(TLSv1_2_client_method());
if(ctx == NULL)
{
printf("ctx failed\n");
ERR_print_errors_fp(stderr);
return -1;
}
printf("ctx loaded\n");
SSL* ssl = SSL_new(ctx);
if(ssl == NULL)
{
printf("ssl failed\n");
ERR_print_errors_fp(stderr);
return -1;
}
printf("ssl loaded\n");
SSL_set_fd(ssl, sock);
SSL_connect(ssl);
//
cmd = "GET / HTTP/1.1\r\nHost: www.t.edu.pk\r\n\r\n";
iResult = SSL_write(ssl, cmd, strlen(cmd));
if(iResult <= 0)
{
printf("SSL write failed\n");
ERR_print_errors_fp(stderr);
return -1;
}
printf("Byte(s) sent: %d\n", iResult);
bzero(recvbuf, BUFLEN);
do
{
iResult = SSL_read(ssl, recvbuf, BUFLEN - 1);
if(iResult < 0)
{
printf("error receiving data\n");
break;
}
if(iResult == 0)
{
printf("host closed connection\n");
break;
}
printf("%s\n", recvbuf);
}while(iResult > 0);
//
iResult = SSL_shutdown(ssl);
if(iResult == 0)
{
printf("SSL shutdown in progress...\n");
}
iResult = SSL_shutdown(ssl);
if(iResult == 1)
{
printf("SSL shutdown complete!\n");
}
if(iResult == -1)
{
printf("SSL shutdown unsuccessful!\n");
}
SSL_CTX_free(ctx);
//
iResult = shutdown(sock, SHUT_RDWR);
if(iResult == -1)
{
printf("Socket shutdown failed: %d\n", errno);
return -1;
}
printf("Socket shutdown succeeded\n");
iResult = close(sock);
if(iResult != 0)
{
printf("error closing socket: %d\n", errno);
return -1;
}
printf("Socket closed\n");
//
return 0;
}
Solution 3:[3]
This work for C/C++ in linux Environment. You can run it by makefile, or just add -lcurl option in g++.
Notice that should have lib cURL.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
FILE *fp = fopen("file.txt", "w");
char outfilename[FILENAME_MAX] = "file_downloaded.txt";
FILE *fp1 = fopen(outfilename,"wb");
struct MemoryStruct {
char *memory;
size_t size;
};
static size_t
WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *)userp;
char *ptr = (char*)realloc(mem->memory, mem->size + realsize + 1);
if(!ptr) {
/* out of memory! */
printf("not enough memory (realloc returned NULL)\n");
return 0;
}
fprintf(fp, "%ld - %ld - %ld\n", realsize, size, nmemb);
size_t written = fwrite(contents, size, nmemb, fp1);
mem->memory = ptr;
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
int main(void)
{
char link_download[] = "https://www.example.com/";
CURL *curl_handle;
CURLcode res;
struct MemoryStruct chunk;
chunk.memory = (char*)malloc(1); /* will be grown as needed by the realloc above */
chunk.size = 0; /* no data at this point */
curl_global_init(CURL_GLOBAL_ALL);
/* init the curl session */
curl_handle = curl_easy_init();
/* specify URL to get */
curl_easy_setopt(curl_handle, CURLOPT_URL, link_download);
/* send all data to this function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
/* we pass our 'chunk' struct to the callback function */
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
/* some servers do not like requests that are made without a user-agent
field, so we provide one */
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
/* get it! */
res = curl_easy_perform(curl_handle);
/* check for errors */
if(res != CURLE_OK) {
fprintf(stderr, "curl_easy_perform() failed: %s\n",
curl_easy_strerror(res));
}
else {
/*
* Now, our chunk.memory points to a memory block that is chunk.size
* bytes big and contains the remote file.
*
* Do something nice with it!
*/
printf("%lu bytes retrieved\n", (unsigned long)chunk.size);
}
/* cleanup curl stuff */
curl_easy_cleanup(curl_handle);
free(chunk.memory);
/* we are done with libcurl, so clean it up */
curl_global_cleanup();
fclose(fp);
fclose(fp1);
return 0;
}
Sources
This article follows the attribution requirements of Stack Overflow and is licensed under CC BY-SA 3.0.
Source: Stack Overflow
Solution | Source |
---|---|
Solution 1 | |
Solution 2 | Awais Mushtaq |
Solution 3 | ??ng Công T?o |