/* HTTP/1.0 client program * Copyright (C) 2000, HIRATA Yasuyuki * (Computer Languages Laboratory, NAIST) * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR (HIRATA YASUYUKI) ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include #include #include #include #include #include #include #include #include #include #include #define strnncpy(dst, src, maxlen, len) \ strncpy((dst), (src), (((maxlen) < (len))? (maxlen): (len))) #define usage() \ errx(EXIT_FAILURE, "usage: %s [-O outfile] URI", __progname); #define CRLF "\r\n" #define DEFAULT_INDEX_FILE "index.html" #define HTTP_USER_AGENT_NAME "CreamyMami" #define HTTP_USER_AGENT_VERSION "0.0" #define HTTP_FROM "mail@address.example" #define HTTP_VERSION "HTTP/1.0" #define URI_REGEX /* subset regex of URI defined in RFC 2396 */ \ /* scheme */ \ "^([a-z][a-z0-9+.-]*):" \ /* userinfo (opt) */ \ "//(([a-z0-9_.!~*\'();:&=+$,-]*)@)?" \ /* hostname */ \ "(((([a-z0-9]|[a-z0-9][a-z0-9-]*[a-z0-9])\\.)*" \ "([a-z]|[a-z][a-z0-9-]*[a-z]))\\.?|" \ /* IPv4address */ \ "([0-9]+\\.[0-9]+\\.[0-9]+\\.[0-9]+))" \ /* port (opt) */ \ "(:([0-9]+))?" \ /* path (opt) */ \ "/(([a-z0-9_.!~*\'():@&=+$,-]|%[0-9a-f][0-9a-f])*" \ "(;([a-z0-9_.!~*\'():@&=+$,-]|%[0-9a-f][0-9a-f])*)*" \ "(/([a-z0-9_.!~*\'():@&=+$,-]|%[0-9a-f][0-9a-f])*" \ "(;([a-z0-9_.!~*\'():@&=+$,-]|%[0-9a-f][0-9a-f])*)*" ")*" ")" \ /* query (opt) */ \ "(\\?([a-z0-9_.!~*\'():@&=+$,;/?:@&=+$,-]|%[0-9a-f][0-9a-f])*)?$" #define URI_RE_NMATCH 32 #define URI_RE_SCHEME_NUM 1 #define URI_RE_USERINFO_NUM 3 #define URI_RE_HOST_NUM 4 #define URI_RE_HOSTNAME_NUM 5 #define URI_RE_IPV4ADDR_NUM 9 #define URI_RE_PORT_NUM 11 #define URI_RE_PATH_NUM 12 #define URI_RE_QUERY_NUM 20 #define URI_SCHEME_MAXLEN 16 #define URI_USERINFO_MAXLEN 32 #define URI_HOST_MAXLEN 128 #define URI_PORT_MAXLEN 5 #define URI_PATH_MAXLEN 128 #define URI_QUERY_MAXLEN 128 #define URI_MAXLEN (URI_SCHEME_MAXLEN + URI_USERINFO_MAXLEN + \ URI_HOST_MAXLEN + URI_PATH_MAXLEN + URI_QUERY_MAXLEN + 7) struct uri_t { char scheme[URI_SCHEME_MAXLEN + 1]; char userinfo[URI_USERINFO_MAXLEN + 1]; char host[URI_HOST_MAXLEN + 1]; char port[URI_PORT_MAXLEN + 1]; char path[URI_PATH_MAXLEN + 1]; char query[URI_QUERY_MAXLEN + 1]; }; int parse_uri(const char *uri, struct uri_t *urip); FILE *http_open(const struct uri_t *urip); extern char *__progname; int main(int argc, char **argv) { int c, last; char *outfile; FILE *outfp; FILE *rfp; struct uri_t uri; outfile = NULL; while((c = getopt(argc, argv, "O:")) != -1) switch(c) { case 'O': outfile = optarg; case '?': default: usage(); } argc -= optind; argv += optind; if(!argc) usage(); if(!parse_uri(*argv, &uri)) errx(EXIT_FAILURE, "URI format error."); if(!outfile) { outfile = rindex(uri.path, '/'); if(!outfile || !outfile[1]) outfile = DEFAULT_INDEX_FILE; else outfile++; } if(strcmp(outfile, "-") != 0) { if((outfp = fopen(outfile, "w")) == NULL) errx(EXIT_FAILURE, "Could not open outfile %s.", outfile); } else outfp = stdout; printf("%s =>\n", *argv); printf("scheme: [%s]\nuserinfo: [%s]\nhost: [%s]\n" "port: [%s]\npath: [%s]\nquery: [%s]\n\n", uri.scheme, uri.userinfo, uri.host, uri.port, uri.path, uri.query); if(strcmp(uri.scheme, "http") != 0) errx(EXIT_FAILURE, "Scheme %s not supported.", uri.scheme); if((rfp = http_open(&uri)) == NULL) { fprintf(stderr, "Could not open http."); return EXIT_FAILURE; } c = last = 0; for(;;) { c = fgetc(rfp); if(c == '\r') continue; else if(c == EOF || (c == '\n' && last == '\n')) break; putchar(c); last = c; } while((c = fgetc(rfp)) != EOF) fputc(c, outfp); return EXIT_SUCCESS; } int parse_uri(const char *uri, struct uri_t *urip) { regex_t reg; regmatch_t match[URI_RE_NMATCH]; if(!uri || regcomp(®, URI_REGEX, REG_EXTENDED | REG_ICASE) != 0 || regexec(®, uri, URI_RE_NMATCH, match, 0) != 0) return 0; memset(urip, 0, sizeof(struct uri_t)); if(-1 < match[URI_RE_SCHEME_NUM].rm_so) strnncpy(urip->scheme, uri + match[URI_RE_SCHEME_NUM].rm_so, URI_SCHEME_MAXLEN, match[URI_RE_SCHEME_NUM].rm_eo - match[URI_RE_SCHEME_NUM].rm_so); else urip->scheme[0] = 0; if(-1 < match[URI_RE_USERINFO_NUM].rm_so) strnncpy(urip->userinfo, uri + match[URI_RE_USERINFO_NUM].rm_so, URI_USERINFO_MAXLEN, match[URI_RE_USERINFO_NUM].rm_eo - match[URI_RE_USERINFO_NUM].rm_so); else urip->userinfo[0] = 0; if(-1 < match[URI_RE_HOST_NUM].rm_so) strnncpy(urip->host, uri + match[URI_RE_HOST_NUM].rm_so, URI_HOST_MAXLEN, match[URI_RE_HOST_NUM].rm_eo - match[URI_RE_HOST_NUM].rm_so); else urip->host[0] = 0; if(-1 < match[URI_RE_PORT_NUM].rm_so) strnncpy(urip->port, uri + match[URI_RE_PORT_NUM].rm_so, URI_PORT_MAXLEN, match[URI_RE_PORT_NUM].rm_eo - match[URI_RE_PORT_NUM].rm_so); else urip->port[0] = 0; if(-1 < match[URI_RE_PATH_NUM].rm_so) strnncpy(urip->path, uri + match[URI_RE_PATH_NUM].rm_so, URI_PATH_MAXLEN, match[URI_RE_PATH_NUM].rm_eo - match[URI_RE_PATH_NUM].rm_so); else urip->path[0] = 0; if(-1 < match[URI_RE_QUERY_NUM].rm_so) strnncpy(urip->query, uri + match[URI_RE_QUERY_NUM].rm_so, URI_QUERY_MAXLEN, match[URI_RE_QUERY_NUM].rm_eo - match[URI_RE_QUERY_NUM].rm_so); else urip->query[0] = 0; return 1; } FILE *http_open(const struct uri_t *urip) { FILE *sfp, *rfp; int sock; struct sockaddr_in saddr; struct hostent *hp; struct servent *sp; if((sock = socket(PF_INET, SOCK_STREAM, 0)) < 0) return NULL; memset(&saddr, 0, sizeof saddr); saddr.sin_len = sizeof saddr; saddr.sin_family = AF_INET; if(inet_aton(urip->host, &saddr.sin_addr) == 0) { if((hp = gethostbyname2(urip->host, AF_INET)) == NULL) return NULL; saddr.sin_addr = *(struct in_addr *)hp->h_addr_list[0]; } sp = getservbyname("http", "tcp"); if(*(urip->port)) saddr.sin_port = atoi(urip->port); else saddr.sin_port = sp->s_port; if (connect(sock, (struct sockaddr *)&saddr, sizeof(saddr)) < 0) return NULL; if((sfp = fdopen(sock, "w")) == NULL || (rfp = fdopen(sock, "r")) == NULL) return NULL; fprintf(sfp, "GET /%s%s%s %s" CRLF, urip->path, (*(urip->query)? "?": ""), urip->query, HTTP_VERSION); fprintf(sfp, "Host: %s" CRLF, urip->host); fprintf(sfp, "User-Agent: %s/%s" CRLF, HTTP_USER_AGENT_NAME, HTTP_USER_AGENT_VERSION); fprintf(sfp, CRLF); fflush(sfp); return rfp; }