Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

wvhttpstream.cc

Go to the documentation of this file.
00001 /* 00002 * Worldvisions Weaver Software: 00003 * Copyright (C) 1997-2002 Net Integration Technologies, Inc. 00004 * 00005 * A fast, easy-to-use, parallelizing, pipelining HTTP/1.1 file retriever. 00006 * 00007 * See wvhttppool.h. 00008 */ 00009 #include "wvhttppool.h" 00010 #include "wvtcp.h" 00011 #include "wvsslstream.h" 00012 #include "wvbuf.h" 00013 #include "wvbase64.h" 00014 #include "strutils.h" 00015 00016 #ifdef _WIN32 00017 #define ETIMEDOUT WSAETIMEDOUT 00018 #endif 00019 00020 WvHttpStream::WvHttpStream(const WvIPPortAddr &_remaddr, WvStringParm _username, 00021 bool _ssl, WvIPPortAddrTable &_pipeline_incompatible) 00022 : WvUrlStream(_remaddr, _username, WvString("HTTP %s", _remaddr)), 00023 pipeline_incompatible(_pipeline_incompatible) 00024 { 00025 log("Opening server connection.\n"); 00026 http_response = ""; 00027 encoding = Unknown; 00028 bytes_remaining = 0; 00029 in_chunk_trailer = false; 00030 pipeline_test_count = 0; 00031 last_was_pipeline_test = false; 00032 00033 enable_pipelining = global_enable_pipelining 00034 && !pipeline_incompatible[target.remaddr]; 00035 ssl = _ssl; 00036 00037 if (ssl) 00038 cloned = new WvSSLStream(static_cast<WvFDStream*>(cloned)); 00039 00040 sent_url_request = false; 00041 00042 alarm(60000); // timeout if no connection, or something goes wrong 00043 } 00044 00045 00046 WvHttpStream::~WvHttpStream() 00047 { 00048 log(WvLog::Debug2, "Deleting.\n"); 00049 if (geterr()) 00050 log("Error was: %s\n", errstr()); 00051 close(); 00052 } 00053 00054 00055 void WvHttpStream::close() 00056 { 00057 // assume pipelining is broken if we're closing without doing at least 00058 // one successful pipelining test and a following non-test request. 00059 if (enable_pipelining && max_requests > 1 00060 && (pipeline_test_count < 1 00061 || (pipeline_test_count == 1 && last_was_pipeline_test))) 00062 pipelining_is_broken(2); 00063 00064 if (isok()) 00065 log("Closing.\n"); 00066 WvStreamClone::close(); 00067 00068 if (geterr()) 00069 { 00070 // if there was an error, count the first URL as done. This prevents 00071 // retrying indefinitely. 00072 WvUrlRequest *msgurl = curl; 00073 if (!msgurl && !urls.isempty()) 00074 msgurl = urls.first(); 00075 if (!msgurl && !waiting_urls.isempty()) 00076 msgurl = waiting_urls.first(); 00077 if (msgurl) 00078 log("URL '%s' is FAILED\n", msgurl->url); 00079 } 00080 waiting_urls.zap(); 00081 if (curl) 00082 doneurl(); 00083 } 00084 00085 00086 void WvHttpStream::doneurl() 00087 { 00088 assert(curl != NULL); 00089 log("Done URL: %s\n", curl->url); 00090 00091 http_response = ""; 00092 encoding = Unknown; 00093 in_chunk_trailer = false; 00094 bytes_remaining = 0; 00095 00096 last_was_pipeline_test = curl->pipeline_test; 00097 bool broken = false; 00098 if (last_was_pipeline_test) 00099 { 00100 pipeline_test_count++; 00101 if (pipeline_test_count == 1) 00102 start_pipeline_test(&curl->url); 00103 else if (pipeline_test_response != http_response) 00104 { 00105 // getting a bit late in the game to be detecting brokenness :( 00106 // However, if the response code isn't the same for both tests, 00107 // something's definitely screwy. 00108 pipelining_is_broken(4); 00109 broken = true; 00110 } 00111 pipeline_test_response = http_response; 00112 } 00113 00114 assert(curl == urls.first()); 00115 curl->done(); 00116 curl = NULL; 00117 sent_url_request = false; 00118 urls.unlink_first(); 00119 00120 if (broken) 00121 close(); 00122 00123 request_next(); 00124 } 00125 00126 00127 static WvString encode64(WvStringParm user, WvStringParm password) 00128 { 00129 WvBase64Encoder encoder; 00130 WvString ret; 00131 encoder.flushstrstr(WvString("%s:%s", user, password), ret); 00132 return ret; 00133 } 00134 00135 00136 static WvString fixnl(WvStringParm nonl) 00137 { 00138 WvDynBuf b; 00139 const char *cptr; 00140 00141 for (cptr = nonl; cptr && *cptr; cptr++) 00142 { 00143 if (*cptr == '\r') 00144 continue; 00145 else if (*cptr == '\n') 00146 b.put("\r", 1); // put BOTH \r and \n 00147 b.put(cptr, 1); 00148 } 00149 00150 return b.getstr(); 00151 } 00152 00153 00154 WvString WvHttpStream::request_str(WvUrlRequest *url, bool keepalive) 00155 { 00156 WvString request; 00157 WvString auth(""), content = putstream_data.getstr(); 00158 if(!!url->url.getuser() && !!url->url.getpassword()) 00159 auth = WvString("Authorization: Basic %s\n", 00160 encode64(url->url.getuser(), url->url.getpassword())); 00161 00162 request = fixnl(WvString("%s %s HTTP/1.1\n" 00163 "Host: %s:%s\n" 00164 "Connection: %s\n" 00165 "%s" 00166 "%s" 00167 "%s%s" 00168 "\n" 00169 "%s", 00170 url->method, 00171 url->url.getfile(), 00172 url->url.gethost(), url->url.getport(), 00173 keepalive ? "keep-alive" : "close", 00174 auth, 00175 (content.len() > 0 ? WvString("Content-Length: %s\n", content.len()).cstr() : ""), 00176 trim_string(url->headers.edit()), 00177 !!url->headers ? "\n" : "", 00178 (content.len() > 0 ? content.cstr() : "")) 00179 ); 00180 return request; 00181 } 00182 00183 00184 void WvHttpStream::send_request(WvUrlRequest *url) 00185 { 00186 request_count++; 00187 log("Request #%s: %s\n", request_count, url->url); 00188 write(request_str(url, url->pipeline_test 00189 || request_count < max_requests)); 00190 sent_url_request = true; 00191 alarm(60000); 00192 } 00193 00194 00195 void WvHttpStream::start_pipeline_test(WvUrl *url) 00196 { 00197 WvUrl location(WvString( 00198 "%s://%s:%s/wvhttp-pipeline-check-should-not-exist/", 00199 url->getproto(), url->gethost(), url->getport())); 00200 WvUrlRequest *testurl = new WvUrlRequest(location, "HEAD", "", NULL, 00201 false, true); 00202 testurl->instream = this; 00203 send_request(testurl); 00204 urls.append(testurl, true, "sent_running_url"); 00205 } 00206 00207 00208 void WvHttpStream::request_next() 00209 { 00210 // Clear the putstream buffer before we start any new requests 00211 putstream_data.zap(); 00212 00213 // don't do a request if we've done too many already or we have none 00214 // waiting. 00215 if (request_count >= max_requests || waiting_urls.isempty()) 00216 return; 00217 00218 // don't do more than one request at a time if we're not pipelining. 00219 if (!enable_pipelining && !urls.isempty()) 00220 return; 00221 00222 // okay then, we really do want to send a new request. 00223 WvUrlRequest *url = waiting_urls.first(); 00224 00225 waiting_urls.unlink_first(); 00226 if (!url->putstream) 00227 { 00228 if (enable_pipelining && !request_count && max_requests > 1) 00229 start_pipeline_test(&url->url); 00230 send_request(url); 00231 } 00232 urls.append(url, false, "sent_running_url"); 00233 } 00234 00235 00236 void WvHttpStream::pipelining_is_broken(int why) 00237 { 00238 if (!pipeline_incompatible[target.remaddr]) 00239 { 00240 pipeline_incompatible.add(new WvIPPortAddr(target.remaddr), true); 00241 log("Pipelining is broken on this server (%s)! Disabling.\n", why); 00242 } 00243 } 00244 00245 00246 bool WvHttpStream::pre_select(SelectInfo &si) 00247 { 00248 SelectRequest oldwant = si.wants; 00249 WvUrlRequest *url; 00250 00251 if (WvUrlStream::pre_select(si)) 00252 return true; 00253 00254 if (!urls.isempty()) 00255 { 00256 url = urls.first(); 00257 if(url && url->putstream && url->putstream->pre_select(si)) 00258 return true; 00259 } 00260 00261 si.wants = oldwant; 00262 return false; 00263 } 00264 00265 00266 bool WvHttpStream::post_select(SelectInfo &si) 00267 { 00268 SelectRequest oldwant = si.wants; 00269 WvUrlRequest *url; 00270 00271 if (WvUrlStream::post_select(si)) 00272 return true; 00273 00274 if (!urls.isempty()) 00275 { 00276 url = urls.first(); 00277 if(url && url->putstream && url->putstream->post_select(si)) 00278 return true; 00279 } 00280 00281 si.wants = oldwant; 00282 return false; 00283 } 00284 00285 00286 void WvHttpStream::execute() 00287 { 00288 char buf[1024], *line; 00289 size_t len; 00290 00291 WvStreamClone::execute(); 00292 00293 // make connections timeout after some idleness 00294 if (alarm_was_ticking) 00295 { 00296 log(WvLog::Debug4, "urls count: %s\n", urls.count()); 00297 if (!urls.isempty()) 00298 { 00299 seterr(ETIMEDOUT); 00300 00301 // Must check again here since seterr() 00302 // will close our stream and if we only 00303 // had one url then it'll be gone. 00304 if (!urls.isempty()) 00305 { 00306 WvUrlRequest *url = urls.first(); 00307 if (url->outstream) 00308 url->outstream->seterr(ETIMEDOUT); 00309 } 00310 } 00311 else 00312 close(); // timed out, but not really an error 00313 return; 00314 } 00315 00316 // Die if somebody closed our outstream. This is so that if we were 00317 // downloading a really big file, they can stop it in the middle and 00318 // our next url request can start downloading immediately. 00319 if (curl && !curl->outstream) 00320 { 00321 // don't complain about pipelining failures 00322 pipeline_test_count++; 00323 last_was_pipeline_test = false; 00324 00325 close(); 00326 if (curl) 00327 doneurl(); 00328 return; 00329 } 00330 else if (curl) 00331 curl->inuse = true; 00332 00333 if(!sent_url_request && !urls.isempty()) 00334 { 00335 WvUrlRequest *url = urls.first(); 00336 if(url) 00337 { 00338 if(url->putstream) 00339 { 00340 int len = 0; 00341 if(url->putstream->isok()) 00342 len = url->putstream->read(putstream_data, 1024); 00343 00344 if(!url->putstream->isok() || len == 0) 00345 { 00346 url->putstream = NULL; 00347 send_request(url); 00348 } 00349 } 00350 } 00351 } 00352 00353 if (!curl) 00354 { 00355 // in the header section 00356 line = getline(0); 00357 if (line) 00358 { 00359 line = trim_string(line); 00360 log(WvLog::Debug4, "Header: '%s'\n", line); 00361 if (!http_response) 00362 { 00363 http_response = line; 00364 00365 // there are never two pipeline test requests in a row, so 00366 // a second response string exactly like the pipeline test 00367 // response implies that everything between the first and 00368 // second test requests was lost: bad! 00369 if (last_was_pipeline_test 00370 && http_response == pipeline_test_response) 00371 { 00372 pipelining_is_broken(1); 00373 close(); 00374 return; 00375 } 00376 00377 // http response #400 is "invalid request", which we 00378 // shouldn't be sending. If we get one of these right after 00379 // a test, it probably means the stuff that came after it 00380 // was mangled in some way during transmission ...and we 00381 // should throw it away. 00382 if (last_was_pipeline_test && !!http_response) 00383 { 00384 const char *cptr = strchr(http_response, ' '); 00385 if (cptr && atoi(cptr+1) == 400) 00386 { 00387 pipelining_is_broken(3); 00388 close(); 00389 return; 00390 } 00391 } 00392 } 00393 00394 if (urls.isempty()) 00395 { 00396 log("got unsolicited data.\n"); 00397 seterr("unsolicited data from server!"); 00398 return; 00399 } 00400 00401 if (!strncasecmp(line, "Content-length: ", 16)) 00402 { 00403 bytes_remaining = atoi(line+16); 00404 encoding = ContentLength; 00405 } 00406 else if (!strncasecmp(line, "Transfer-Encoding: ", 19) 00407 && strstr(line+19, "chunked")) 00408 { 00409 encoding = Chunked; 00410 } 00411 00412 if (line[0]) 00413 { 00414 char *p; 00415 WvBufUrlStream *outstream = urls.first()->outstream; 00416 00417 if ((p = strchr(line, ':')) != NULL) 00418 { 00419 *p = 0; 00420 p = trim_string(p+1); 00421 struct WvHTTPHeader *h = new struct WvHTTPHeader(line, p); 00422 if (outstream) 00423 outstream->headers.add(h, true); 00424 } 00425 else if (strncasecmp(line, "HTTP/", 5) == 0) 00426 { 00427 char *p = strchr(line, ' '); 00428 if (p) 00429 { 00430 *p = 0; 00431 if (outstream) 00432 { 00433 outstream->version = line+5; 00434 outstream->status = atoi(p+1); 00435 } 00436 } 00437 } 00438 } 00439 else 00440 { 00441 // blank line is the beginning of data section 00442 curl = urls.first(); 00443 in_chunk_trailer = false; 00444 log(WvLog::Debug4, 00445 "Starting data: %s (enc=%s)\n", bytes_remaining, encoding); 00446 00447 if (encoding == Unknown) 00448 encoding = Infinity; // go until connection closes itself 00449 00450 if (curl->method == "HEAD") 00451 { 00452 log("Got all headers.\n"); 00453 // getline(0); 00454 doneurl(); 00455 } 00456 } 00457 } 00458 } 00459 else if (encoding == Chunked && !bytes_remaining) 00460 { 00461 line = getline(0); 00462 if (line) 00463 { 00464 line = trim_string(line); 00465 00466 if (in_chunk_trailer) 00467 { 00468 // in the trailer section of a chunked encoding 00469 log(WvLog::Debug4, "Trailer: '%s'\n", line); 00470 00471 // a blank line means we're finally done! 00472 if (!line[0]) 00473 doneurl(); 00474 } 00475 else 00476 { 00477 // in the "length line" section of a chunked encoding 00478 if (line[0]) 00479 { 00480 bytes_remaining = (size_t)strtoul(line, NULL, 16); 00481 if (!bytes_remaining) 00482 in_chunk_trailer = true; 00483 log(WvLog::Debug4, "Chunk length is %s ('%s').\n", 00484 bytes_remaining, line); 00485 } 00486 } 00487 } 00488 } 00489 else if (encoding == Infinity) 00490 { 00491 // just read data until the connection closes, and assume all was 00492 // well. It sucks, but there's no way to tell if all the data arrived 00493 // okay... that's why Chunked or ContentLength encoding is better. 00494 len = read(buf, sizeof(buf)); 00495 if (len) 00496 log(WvLog::Debug5, "Infinity: read %s bytes.\n", len); 00497 if (curl->outstream) 00498 curl->outstream->write(buf, len); 00499 00500 if (!isok()) 00501 doneurl(); 00502 } 00503 else // not chunked or currently in a chunk - read 'bytes_remaining' bytes. 00504 { 00505 // in the data section of a chunked or content-length encoding, 00506 // with 'bytes_remaining' bytes of data left. 00507 00508 if (bytes_remaining > sizeof(buf)) 00509 len = read(buf, sizeof(buf)); 00510 else 00511 len = read(buf, bytes_remaining); 00512 bytes_remaining -= len; 00513 if (len) 00514 log(WvLog::Debug5, 00515 "Read %s bytes (%s bytes left).\n", len, bytes_remaining); 00516 if (curl->outstream) 00517 curl->outstream->write(buf, len); 00518 00519 if (!bytes_remaining && encoding == ContentLength) 00520 doneurl(); 00521 00522 if (bytes_remaining && !isok()) 00523 seterr("connection interrupted"); 00524 } 00525 00526 if (urls.isempty()) 00527 alarm(5000); // just wait a few seconds before closing connection 00528 else 00529 alarm(60000); // give the server a minute to respond, if we're waiting 00530 }

Generated on Tue Oct 5 01:09:20 2004 for WvStreams by doxygen 1.3.7