#line 2 "http.c"
/*-
 * C-SaCzech
 * Copyright (c) 1996-2002 Jaromir Dolecek <dolecek@ics.muni.cz>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *	This product includes software developed by Jaromir Dolecek
 *	for the CSacek project.
 * 4. The name of Jaromir Dolecek may not be used to endorse or promote
 *    products derived from this software without specific prior written
 *    permission.
 *
 * THIS SOFTWARE IS PROVIDED BY JAROMIR DOLECEK ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL JAROMIR DOLECEK BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/* $Id: http.c,v 1.201 2002/02/03 11:13:41 dolecek Exp $ */

#include "csacek.h"
#include "csa_version.h"

/* local functions */
static int x_is_member_of __P((const char * const array[], const char *item,
	int checkprefix));
static int x_fgets_line __P((csa_params_t *, csa_String_b *, int));

/* headers for various reasons unsafe to be passed from client to server */
static const char * const unsafe_headersin[] = {
	"Content-MD5",
#ifdef CSA_DO_NOT_CACHE
	"If-Modified-Since",	"If-Unmodified-Since",
	"If-Match",		"If-None-Match",
	"Unless-Modified-Since", /* MSIE specific */
#endif
	NULL
};

/* headers unsafe to be passed from server's reply back to client */
static const char * const unsafe_headersout[] = {
	"Content-MD5",
#ifdef CSA_DO_NOT_CACHE
	"Last-Modified",
#endif
	NULL
};

/* used in csa_decodequery() to map decimal value to hexadecimal */
static const char csa_hexdigits[16] = "0123456789ABCDEF";

/* 
 * returns 1 if ``item'' was found in ``array''
 * last item of ``array'' has to be NULL
 */
static int
x_is_member_of(array, item, checkprefix)
  const char * const array[];
  const char *item;
  int checkprefix;
{
	for(;*array; array++) {
	    if ((checkprefix && strncasecmp(*array, item, strlen(*array)) == 0)
		|| (!checkprefix && strcasecmp(*array, item) == 0))
			return 1;
	}

	return 0;
}

/*
 * reads input until \n or EOF is encountered; buffer is dynamically
 * enlarged as needed
 *
 * buffer is allocated in TMP pool, initial buffer is allocated
 * if buf->value is NULL or buf->maxlen <= 0
 */
static int
x_fgets_line(p, buf, raw)
  csa_params_t *p;
  csa_String_b *buf;
  int raw;
{
        char *temp, input='\0';
	int some_data_read=0;

	if (buf->maxlen == 0 || buf->value == NULL) {
		/* buffer has to be initialized */
		temp = (char *) ap_palloc(p->pool_tmp, (int) buf->maxlen);
		buf->value = temp;
		buf->maxlen = 32;
		buf->len = 0;
	}

        buf->len	= 0;
        buf->value[0]	= '\0';

	while( input != '\n' && csa_md_read_response(p, &input, 1) )
	{
		some_data_read = 1;
                if (buf->len + 2 >= buf->maxlen)
                {
                        buf->maxlen *= 2;
                        temp = (char *) ap_palloc(p->pool_tmp,(int)buf->maxlen);
                        memcpy(temp, buf->value, buf->len);
                        buf->value = temp;
                }

		buf->value[buf->len++] = input;
	}

	if (!raw && input == '\n') {
		buf->len--; /* skip the \n */
		while(buf->len>0 && buf->value[buf->len-1] == '\r')
				buf->len--;
        }

	buf->value[buf->len] = '\0';

        return some_data_read;
}

/*
 * split header into three parts - header name, it's value and
 * (optional) parameters
 * after splitting, value contains whole header value; val contains
 * part to first semicolon and options part after the semicolon
 *
 * Note: trailing \r*\n is NOT stripped by this routine;
 *   original strings get changed
 *
 * returns 0 on success
 */
int
csa_split_header(wpool, line, header, value, val, options)
  struct pool *wpool;
  char *line;
  char **header, **value, **val, **options;
{
        char *chp;

        chp = strchr(line, ':');
        if (!chp) return 1;

        *(chp++) = '\0';
	chp += strspn(chp, " \t\r\n");

        *header = line;
        *value  = chp;

        /* remove trailing space characters */
        chp = strchr(*value, '\0');
        if (chp > *value) {
                chp--;
                while( chp >= *value && isspace((unsigned char)*chp)) chp--;
                *(chp+1) = 0;
        }

        /* cut header parameters (if any); put header up to parameter */
	/* to *val and the parameter to *options */
        chp = strchr(*value, ';');
        if (chp) {
                char *temp = chp + 1;
		temp += strspn(temp, " \t\r\n");
                *options = temp;

                /* copy header value up to options to *val */
                while(*(--chp) && chp > *value && isspace((unsigned char)*chp));
		*val = ap_pstrndup(wpool, *value, chp - *value + 1);
        }
        else {
		*val = *value;
		*options = NULL;
	}

        return 0;
}


/* 
 * makes list of "safe" headers from p->headersin
 * returns pointer to the first member of list created
 */
csa_item_t * 
csa_make_headersin(p)
  csa_params_t *p;
{
	csa_item_t *item, *hh=NULL;
	const csa_String *al;
	const char *clientenc, *cvalue, *new_al;
	int itemflag;
#ifndef CSA_DO_NOT_CACHE
	const char *opt;
#endif

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "make_headersin: called");
#endif /* CSA_DEBUG */

	if (p == NULL) return NULL;

	item = p->headersin;
	for(;item;item = item->prev)
	{
		itemflag  = CSA_I_HEADERCAT;

		if (x_is_member_of(unsafe_headersin, item->key.value, 0))
			continue;

		/* following does inverse of ETag transformation (see */
		/* csa_process_headers() to see how ETag is modified) */
		if (strcasecmp(item->key.value, "If-Match") == 0
			|| strcasecmp(item->key.value, "If-None-Match") == 0
			|| strcasecmp(item->key.value, "If-Range") == 0)
		{
			char *temp, *pivot, *last, *value;
			value = ap_pstrdup(p->pool_tmp, item->value.value);
			last = temp = value;
			while((temp = strchr(temp, ',')) || *last) {
				if (!temp) temp = strchr(last, '\0');
				pivot = temp - 1;
				while(pivot > last && (isspace((unsigned char)*pivot)
					|| (*pivot >= '0' && *pivot <= '9')))
						pivot--;
				if (*pivot == '-' &&
				    (cstools_t)atoi(pivot+1) == p->outcharset)
				{
					strcpy(pivot, temp);
				}
				last = ++temp;
			}

			/* do not forward If-Range header */
			if (strcasecmp(item->key.value, "If-Range") == 0) {
				p->if_range = ap_pstrdup(p->pool_req, value);
				continue;
			}

			cvalue = value;
		}
#ifndef CSA_DO_NOT_CACHE
		/* MS is once again breaking standard: MSIE sends parameter
		 * length with If-[Un]Modified-Since header and IIS checks the
		 * length of document as well as the date of creation against
		 * it - as "physical" size of the document can be different
		 * from amount of data CSacek actually sent to client, so the
		 * document is unnecessarily sent to client again */
		else if ((strcasecmp(item->key.value, "If-Modified-Since") == 0
		    || strcasecmp(item->key.value, "If-UnModified-Since") == 0)
			&& (opt = strchr(item->value.value, ';')) != NULL)
		{
			cvalue = ap_pstrndup(p->pool_tmp, item->value.value,
						opt - item->value.value);
		}
#endif /* !CSA_DO_NOT_CACHE */
		else {
			if (strcasecmp(item->key.value, "Cookie") == 0)
				itemflag = 0; /* don't cat cookies */
			cvalue = item->value.value;
		}

		csa_setitem(p, &hh, item->key.value, cvalue,CSA_I_TMP|itemflag);
	}		

	/* set Accept-Charset to charset preferred by CSacek */
	csa_setitem(p, &hh, "Accept-Charset",
		"iso-8859-2, utf-8;q=0.5, us-ascii;q=0.001",
		CSA_I_TMP|CSA_I_OVERWRITE);

	/* if first token of Accept-Language is not equal to partname,
	 * prepend partname to it */
	al = csa_getheaderin(p, "Accept-Language");
	new_al = NULL;
	if (al) {
		size_t tokenlen = strcspn(al->value, " \t,");

		if (p->part.len > 0 && tokenlen == (p->part.len - 1)
		    && strncasecmp(al->value, p->part.value+1, tokenlen) == 0)
		{
			char *buf = (char *) ap_palloc(p->pool_tmp,
					(int)(al->len + 2 + p->part.len + 1));
			sprintf(buf, "%s, %s", p->part.value + 1, al->value);
			new_al = buf;
		}
	}
	csa_setitem(p, &hh, "Accept-Language",
		(new_al) ? (new_al) : (p->part.value + 1),
		CSA_I_TMP|CSA_I_OVERWRITE);
		
	/* don't forward Accept-Encoding header */
	if (csa_getitem(hh, "Accept-Encoding"))
		csa_unsetitem(&hh, "Accept-Encoding");

	/* don't forward Range header */
	al = csa_getitem(hh, "Range");
	if (al) {
		p->range = csa_range_compile(p->pool_req, al->value);
		csa_unsetitem(&hh, "Range");
	}

	/* do not forward Transfer-Encoding header */
	if (csa_getitem(hh, "Transfer-Encoding"))
		csa_unsetitem(&hh, "Transfer-Encoding");

	/* pass the information about encoding in which the data
	 * are sent to client */
	clientenc = cstools_name(p->outcharset, CSTOOLS_MIMENAME);
	if (clientenc && *clientenc) {
		/* only set the header if mime name is not null */
		csa_setitem(p, &hh, "X-Client-Charset", clientenc, CSA_I_TMP);
	}
	return hh;
}

/*
 * processes headers returned by request 
 */
int
csa_process_headers(p)
  csa_params_t *p;
{
   csa_String_b buffer, *buf = &buffer;
   int status_no, itemflag, something_read;
   int st_read;
   char *header, *value, *val, *options;
   const char *value_c;
   char *status_str, *temp;

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "process_headers: called");
#endif

   /* initialize buffer */
   buf->value = (char *) ap_palloc(p->pool_tmp, 100);
   buf->maxlen = 100;
   buf->len = 0;

   /* if we have been here already, jump directly to the reading of next
    * available headers */
   if (CSA_ISSET(p->flags, CSA_FL_HEADERS_READING)) {
	   status_no = p->status_no;
	   goto process_data;
   }

   CSA_SET(p->flags, CSA_FL_HEADERS_READING);

   /* read first line of output - should be Status information */
   something_read = x_fgets_line(p, buf, 1);

   /* first line (Status) should be something like: "HTTP/1.X 200 OK"	*/
   /* extract code of response and comment behind it - it will		*/
   /* be returned by CSacek as it's Status: */
   status_str = buf->value;

   if (strncasecmp(status_str, "HTTP/", 5) != 0) {
	/* handle HTTP/0.9 */
	if (something_read) {
		/* first line of HTTP/1.0+ response is HTTP/, so we got */
		/* HTTP/0.9  response from server - try to recover */
		csa_setheaderout(p, "Status", "200 OK", CSA_I_OVERWRITE);
		csa_add_output(p, buf->value, buf->len, 0);
#ifdef CSA_DEBUG
		csa_debug(p->dbg,
			"process_headers: HTTP/0.9 response detected, exiting");
#endif
	
		/* we don't know if the data are binary or HTML, so don't */
		/* do anything with them - leave p->convert unset */
		return CSA_OK;
	}
	else {
		/* nothing read --> connection just hung up */
		csa_http_error(p, "Internal Server Error", "Sub-request returned no data");
		return CSA_FATAL;
	}
   }

   /* string contains raw data, we have to cut \r*\n on end manually */
   val = strchr(status_str, '\0');
   while(--val >= status_str && (*val == '\n' || *val == '\r'));
   status_str[val - status_str + 1] = '\0'; /* cut the end */

   /* store raw Status line for MD code needs */
   p->status_raw = ap_pstrdup(p->pool_req, status_str);

   /* get status code */
   while( *status_str && !isspace((unsigned char)*status_str)) status_str++; 
   while( *status_str && isspace((unsigned char)*status_str)) status_str++; 
   csa_setheaderout(p, "Status", status_str, CSA_I_COPYVALUE);

   status_no = atoi(status_str);
   if (status_no == 0) { /* invalid number */
	csa_http_error(p,"Subrequest returned invalid status line",buf->value);
	return CSA_FATAL;
   }
   p->status_no = status_no;

  process_data:

   while ((st_read = x_fgets_line(p, buf, 0)) && buf->len)
   {
	itemflag = CSA_I_HEADERCAT;

	/* process & send to client only "safe" headers */
	if (x_is_member_of(unsafe_headersout, buf->value, 1))
		continue;

	value_c = NULL;
	csa_split_header(p->pool_tmp, buf->value, &header, &value, &val,
		&options);

	/* handle Content-Type only when returned answer is "valid", i.e. */
	/* we want to process returned data */
	if (strcasecmp(header, "Content-Type") == 0)
	{   
	  if (strncasecmp(val, "text/", 5) == 0) {
		CSA_SET(p->flags, CSA_FL_CONVERT);

		/*
		 * Be sure to compress text/plain and text/html only - some
		 * clients (most notably Netscape 4.6) get seriously confused
		 * when they get e.g. cascade style sheet files (.css)
		 * compressed.
		 */
		if (strcasecmp(val + 5, "html") == 0)
			CSA_SET(p->flags, CSA_FL_ISHTML|CSA_FL_COMPRESSABLE);
		else if (strcasecmp(val + 5, "plain") == 0)
			CSA_SET(p->flags, CSA_FL_COMPRESSABLE);
	  }


	  /* take care of (optional) parameter "charset" of Content-Type */
	  /* header; it marks encoding of body */
	  if (options && CSA_ISSET(p->flags, CSA_FL_CONVERT))
	  {
		const char *pomch;
		size_t len;

		pomch = csa_strcasestr(options, "charset=");
		if (pomch) {
			pomch += strlen("charset=");
			if (*pomch == '\"') {
				pomch++;
				len = strspn(pomch, " \t,;\"");	
			}
			else {
				len = strspn(pomch, " \t,;");	
			}

			(void)csa_switch_incharset(p, 
					cstools_whichcode(pomch, len));
		}
	  } /* if options && p->convert */

	  /* if we will recode the document, update Content-Type header */
	  /* so that it would include correct charset parameter */
	  if (CSA_ISSET(p->flags, CSA_FL_CONVERT))
	  	value_c = csa_get_ct(p->pool_tmp, p->outcharset, val);
 	}

	else if (strcasecmp(header, "Connection") == 0
		|| strcasecmp(header, "Keep-Alive") == 0) {
		/* don't forward the header - the value sent to CSacek
		 * need not to be valid for CSacek's response
		 */
		continue;
	}

	else if (strcasecmp(header, "ETag") == 0) {
   		/*
		 * Change ETag header (if any) to be unique for same document
   		 * in various encodings
		 */
		char *newetag;
		newetag = ap_palloc(p->pool_req, (int) strlen(value) + 1
				+ CSA_GETMAXNUMCOUNT(int) + 1);
		sprintf(newetag, "%s-%d", value, p->outcharset);

		/*
		 * test ETag against If-Range header, if sent by client; if
		 * it doesn't match, unset p->range, since no range processing
		 * should be done
		 */
		if (p->if_range && strcmp(value, p->if_range) != 0)
			p->range = NULL;

		value = newetag;
	}

	else if (strcasecmp(header, "Location") == 0) {
		/* Location should be rewrited to go through CSacek  */
		/* to preserve information about charset & part used */
		/* by client */

		csa_url_t *urlt;

		/* substitute __FOO__ strings first */
		value = csa_subs_string(p, value);

		/* parse the URI and rewrite it appropriately if it leads
		 * to CSacek server */
		urlt = csa_parse_url(p, value);
		if (urlt->can_rewrite && CSA_IS_CSACEK_SERVER(p, urlt))
			value = csa_unparse_url(p, urlt, p->csacek->value);
	}

	else if (strcasecmp(header, "Content-Length") == 0) {
		p->available_in = atoi(value);
		continue; /* do not forward this directly to client */
	}

	else if (strcasecmp(header, "Set-Cookie") == 0) {
		char *setcookie;
		const csa_String *sn = p->csacek;

		if (options && sn->len > 0
		    && ( temp = strstr(options, "path=" ) )
		    && strncasecmp(temp+5, sn->value, sn->len) != 0
		    && strcmp(temp+5, "/") != 0)
		{
			const char *prefix, *suffix;
			int prefix_len, temp_len;
	
			prefix = options;
			prefix_len = temp - prefix;

			temp += 5;
	
			suffix = strchr(temp, ';');
			if (suffix) {
				temp_len = suffix - temp;
				suffix++; /* everything after the path */
			} else
				temp_len = strlen(temp);

			setcookie =(char *)csa_alloca(
					strlen(val) + 2 + prefix_len + 5
					+ sn->len + temp_len
					+ ((suffix) ? 1 + strlen(suffix) : 0)
					+ 1, p->pool_tmp);
			sprintf(setcookie, "%s; %.*spath=%s%.*s%s%s",
				val, prefix_len, prefix, sn->value,
				temp_len, temp,
				(suffix) ? ";" : "",
				(suffix) ? suffix : "");

			value = setcookie;
		}
		/* The HTTP specification says that it is legal to merge
		 * duplicate headers into one.  Some browsers that support
		 * Cookies don't like merged headers and prefer that each
		 * Set-Cookie header is sent separately. So don't merge
		 * duplicate Set-Cookie headers.
		 */
		itemflag = 0;
	}

	else if (strcasecmp(header, "Transfer-Encoding") == 0) {
		if (strncasecmp(value, "chunked", 7) != 0) {
			csa_http_error(p,
				"subrequest returned unknown Transfer-Encoding",
				value);
			return CSA_FATAL;
		}
		CSA_SET(p->flags, CSA_FL_CHUNKED_RESP);
		continue; /* client will get dechunked response */
	}

	else if (strcasecmp(header, "WWW-Authenticate") == 0) {
		/* MSIE really doesn't like if several WWW-Authenticate
		 * headers are merged into one. Other browsers might
		 * behave similarily. So send each separately.
		 * Note that even if CSacek sends the headers separately,
		 * server under which CSacek runs can still merge
		 * them together - but that's beyond CSacek's control.
		 */
		itemflag = 0;
	}
			
	csa_setheaderout(p, header, (value_c) ? value_c : value,
			CSA_I_COPYKEY|CSA_I_COPYVALUE|itemflag);

   } /* while */

   if (!st_read) return CSA_NEED_MORE;

   CSA_UNSET(p->flags, CSA_FL_HEADERS_READING);

   /*
    * If the reply was 100 Continue, send the same to client
    * if it's HTTP/1.1 client and return indication that more data
    * is needed.
    */
   if (p->status_no == 100) {
	if (p->protocol >= 11) {
		const char *str = "HTTP/1.1 100 Continue\r\n\r\n";
		csa_md_send_output(p, str, strlen(str));
	}
	p->headersout = NULL;
	return CSA_CONTINUE;
   }
		
   if (CSA_ISSET(p->flags, CSA_FL_CHUNKED_RESP)) {
   	CSA_SET(p->flags, CSA_FL_CHUNK_FINISHED);
	CSA_UNSET(p->flags, CSA_FL_CHUNK_ALL);
	p->chunk_remaining = 0;
   }

   /*
    * If we are going to process ranges or the reply is not 200 OK, don't
    * send output to client in chunked form nor continually.
    */
   if (p->range || p->status_no != HTTP_OK)
	CSA_UNSET(p->flags, CSA_FL_OUT_CONT|CSA_FL_OUT_CHUNKED);
#ifdef CSA_WANT_COMPRESSION
   else if (CSA_ISSET(p->flags, CSA_FL_CONVERT|CSA_FL_COMPRESSABLE)) {
   	/*
	 * Only compress output if we would be sending whole document (not just
	 * part) and we are going to process it's contents. Compression
	 * engine would be initialized in csa_add_output() as needed.
	 * 
	 * Also be sure to compress text/plain and text/html only - see comment
	 * at the Content-Type header handling for the reason why.
	 */

	p->compress = p->ua_compress;
   }
#endif

   CSA_SET(p->flags, CSA_FL_HEADERS_READ);

   return CSA_OK;
}

/*
 * generate error message to the client
 */
void 
csa_http_error( p, title, body )
  csa_params_t *p;
  const char *title, *body;
{
	char *temp;
	size_t len;
	const csa_String *pi = csa_getvar(p, "PATH_INFO");
	const csa_String *sn = csa_getvar(p, "SCRIPT_NAME");

	len = strlen(CSA_VERSION) + strlen(title) + strlen(body);
	temp = (char *) csa_alloca(len + 200, p->pool_tmp);

	sprintf(temp, "C-SaCzech/%s failed for %s%s, reason: %s - %s",
		CSA_VERSION,
		sn ? sn->value : "(unknown)",
		pi ? pi->value : "(unknown)",
		title, body );

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_http_error: %s", temp);
#endif

	/* call MD error logger -- do nothing else if it doesn't */
	/* return 0 */
	if (csa_md_log_error(p, temp) != 0) return;

	csa_setheaderout(p, "Status", "500 Internal C-SaCzech error", CSA_I_OVERWRITE);
	csa_setheaderout(p, "Content-Type", "text/html", CSA_I_OVERWRITE);

	sprintf(temp,
"<HTML><HEAD><title>Internal server error - C-SaCzech %s</title></HEAD>\n\
<BODY><H3>Internal server error - C-SaCzech: %s</H3> %s</BODY></HTML>\n",
		CSA_VERSION, 
		(title) ? title : "",
		(body)  ? body  : "" );
	csa_add_output(p, temp, 0, CSA_OUT_STR);
}

/*
 * Recodes given text to server's encoding. The result string is guaranteed
 * to have exactly the same length as the original one. Content of _text
 * is overwritten only when we actually get to recode some characters.
 */
void
csa_decodequery(str, p, _text, len)
  csa_String *str;
  csa_params_t *p;
  char *_text;
  size_t len;
{
	cstools_t outcodeset;
	cstools_cstocs_t mp;
	char pomc, tobuf, *text = _text;
	size_t remains;
	int is_urlenc=0;

	/*
	 * We don't support recoding to Unicode.
	 */
	if (CSTOOLS_ISUNICODE(p->incharset) || p->incharset == CSTOOLS_UNKNOWN)
		goto out;

	outcodeset = p->outcharset;

	/*
	 * We normally assume that user sent input in the same code set
	 * (s)he is using for viewing pages. If the code set is too
	 * general, we have to try guess, what code set was used for
	 * sent data. Such guessing is of course inherently prone to errors
	 * (some values are used in several code sets for different characters),
	 * but it turned out to be good enough if the text is sufficiently
	 * large.
	 */
	if (outcodeset == CSTOOLS_ASCII || outcodeset == CSTOOLS_ISOLatin2
	    || outcodeset == CSTOOLS_UNKNOWN || CSTOOLS_ISUNICODE(outcodeset))
	{
		/* code set is too general, better try to guess it */
		outcodeset = cstools_guess_charset(text, len);

		/*
		 * Don't do anything if outer encoding is unknown, Unicode
		 * or contains binary codes.
		 */
		if (outcodeset == CSTOOLS_BINARY
		    || outcodeset == CSTOOLS_UNKNOWN
		    || CSTOOLS_ISUNICODE(outcodeset))
			goto out;
	}

	/*
	 * If user code set is same as server's, don't need to do
	 * anything else.
	 */
	if (outcodeset == p->incharset)
		goto out;

	cstools_init(&mp, outcodeset, p->incharset);

	/*
	 * Scan the text; if there is any code greater equal 128, recode it
	 * to server code set; %XY escaped characters are handled properly,
	 * too.
	 */
	for(remains = len; remains > 0; remains--, text++) {
		if (remains > 2 && text[0] == '%' && CSA_ISHEXA(text[1])
		    && CSA_ISHEXA(text[2]))
		{
			/* urlencoded character (%XY), have to find out
			 * it's real value so that we can deside whether
			 * we should mess with it */
			tobuf = CSA_UPPER(text[1]);
			tobuf = CSA_HEX2DEC(tobuf) << 4;
			/* don't bother doing anything with ASCII */
			if ((tobuf & 0x80) == 0) {
				remains -= 2; text += 2;
				continue;
			}
			pomc = CSA_UPPER(text[2]);
			tobuf += CSA_HEX2DEC(pomc);
			is_urlenc = 1;
		} else if (text[0] & 0x80) {
			/* national character, process it */
			tobuf = text[0];
		} else if (remains > 1 && text[0] == '%' && text[1] == '%') {
			/* skip next character - we encountered escape
			 * sequence %% */
			remains--, text++;
			continue;
		} else
			continue;
		
		if (cstools_recode(&mp, &tobuf, &tobuf, 1) != 1) {
			/* we encontered and error, better to leave
			 * immedially */
			goto out;
		}

		if (is_urlenc) {
			text[1] = csa_hexdigits[((unsigned int)tobuf>>4)&0x0F];
			text[2] = csa_hexdigits[tobuf & 0x0F];
			is_urlenc = 0;
			remains -= 2; text += 2;
		} else {
			text[0] = tobuf;
		}
	}

    out:
	csa_fillstring(str, _text, (int)len, -1);
}

/*
 * creates URL, preserving at least method, server name and port of request
 * to CSacek
 */
char *
csa_construct_url(p, script_name, extra_path)
  csa_params_t	*p;
  const char	*script_name, *extra_path;
{
	const csa_String *strp, *server_name, *server_port, *qs;
	int sn_len, ep_len, len, printport;
	char *ret;
	const char *method;

	if (!script_name) {
		strp = p->csacek;
		script_name = strp->value;
		sn_len = strp->len;
	}
	else
		sn_len = strlen(script_name);

	if (!extra_path) {
		strp = csa_getvar(p, "PATH_INFO");
		extra_path = strp->value;
		ep_len = strp->len;
	}
	else
		ep_len = strlen(extra_path);

	qs = csa_getvar(p, "QUERY_STRING");

	server_name = csa_getvar(p, "SERVER_NAME");
	server_port = csa_getvar(p, "SERVER_PORT");
	method = CSA_METHOD(p);
	printport = (atoi(server_port->value) != csa_getmethodport(method));

	len = strlen(method) + 3 /* "://" */ + server_name->len
		+ (printport ? 1 + server_port->len : 0)
		+ sn_len + ep_len + (qs ? qs->len + 1 : 0);
	ret = (char *) ap_palloc(p->pool_tmp, len + 1);
	sprintf(ret, "%s://%s%s%s%s%s%s%s",
		method,
		server_name->value,
		(printport) ? ":" : "",
		(printport) ? server_port->value : "",
		script_name,
		extra_path,
		(qs) ? "?" : "", (qs ? qs->value : "")
	);
	
	return ret;
}

/*
 * finds a name for an IP address
 */
const char *
csa_gethostbyaddr(wpool, addr)
  struct pool *wpool;
  const char *addr;
{
	struct hostent *he;
	unsigned long addr_num;
	const char *retval=NULL;

	addr_num = inet_addr(addr);
	if (addr_num != INADDR_NONE) {
		he = gethostbyaddr((void *) &addr_num, INADDRSZ, AF_INET);
		if (he) retval = ap_pstrdup(wpool, he->h_name);
	}

	return retval;
}

/*
 * this gets the first component of "path" given as location
 * and parses it for presence of charset and part; then, it
 * copies appropriate strings
 * returns 1 if the location has been succesfully parsed, 0 otherwise
 */
int
csa_parse_sn(poo, location, charsetp, partp, behindp, csacekp,
			 can_rewritep)
  struct pool *poo;
  const char *location;
  char **charsetp, **partp, **behindp, **csacekp;
  int *can_rewritep;
{
	const char *sep, *behind, *orig_loc=location;
	size_t len, strippeddot=0;
	int can_rewrite=0;

	while (*location == '/') location++;
	if (!location[0]) return 0;

	if (location[0] == 't' && location[1] == 'o') location += 2;
	else if (location[0] == '.') {
		strippeddot = 1;
		location++;
	}

	len = strcspn(location, "./");
	sep = location + len;

	/* we support "empty" charset name, such as when SCRIPT_NAME is  */
	/* something like to.en, so it's possible to explicitely choose  */
	/* language variant without explicitely choosing charset as well */
	if (!len) {
		/* when location begins with dot immedialy followed by next
		 * dot or slash, it can't be anything we support */
		if (strippeddot) return 0;
		can_rewrite = 1;
	}
	else if (cstools_whichcode(location, len) == CSTOOLS_UNKNOWN) {
		if (strncasecmp(location, "whichcode", 9) == 0) {}
		else if (strncasecmp(location, "GUESS", 5) == 0
				|| strncmp(location, "__CHARSET__", 11) == 0)
			can_rewrite = 1;
		else
		    return 0;
	}

	behind = strchr(sep, '/');
	if (!behind) behind = strchr(sep, '\0');

	if (charsetp) *charsetp = ap_pstrndup(poo, location, (int)len);
	if (partp) {
		if (*sep != '/') *partp = ap_pstrndup(poo, sep, behind - sep);
		else *partp = NULL;
	}
	if (behindp) *behindp = ap_pstrdup(poo, behind);
	if (csacekp) *csacekp = ap_pstrndup(poo, orig_loc, behind - orig_loc);
	if (can_rewritep) *can_rewritep = can_rewrite;

	return 1;
}

/*
 * returns port assigned to appropriate method  or 0 if method is unknown
 */
int
csa_getmethodport(method)
  const char *method;
{
	int retval=0;

	if (strcasecmp(method, "http") == 0)
		retval = 80;
	else if (strcasecmp(method, "https") == 0)
		retval = 443;

	return retval;
}

	

/*
 * parses URL and splits it into structure csa_url_t
 */
csa_url_t *
csa_parse_url(p, url)
  csa_params_t *p;
  const char *url;
{
	csa_url_t *urlt;
	const char *t1, *t2, *t3, *qs;
	char *t4, *csacek, *part;
	int can_rewrite=0;
	const csa_String *csa_dir=NULL;

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_parse_url: called for '%s'", url);
#endif

	urlt = (csa_url_t *)ap_pcalloc(p->pool_tmp, sizeof(csa_url_t));

	if (url[0] != '/' && (t1 = strstr(url, ":/")) != NULL && t1 != url) {
		urlt->method = ap_pstrndup(p->pool_tmp, url, t1 - url);
		if (t1[2] != '/') t1++; /* support for URL 'http:/path/' */
		else {
		    /* 'traditional' URL - 'http://server/path/' */
		    t1 += 3;
		    t2 = t1;
		    while (*t2 && *t2 != ':' && *t2 != '/') t2++;
		    urlt->server = ap_pstrndup(p->pool_tmp, t1, t2 - t1);
		    if (*t2 == ':') urlt->port = atoi(t2+1);
		    while(*t2 && *t2 != '/') t2++;
		    t1 = t2;
		}

		/* set port accordingly to method, if port is still */
		/* uninitialized */
		if (urlt->port == 0)
			urlt->port = csa_getmethodport(urlt->method);
		url = t1;
	}

	t3 = url;
	if (p->csacek_dir && p->csacek_dir->len > 0
	    && strncmp(t3, p->csacek_dir->value, p->csacek_dir->len) == 0)
	{
		csa_dir = p->csacek_dir;
		t3 += csa_dir->len;
	}
	else if (p->ignoreprefix && p->ignoreprefix->len > 0
	    && strncmp(t3, p->ignoreprefix->value, p->ignoreprefix->len) == 0)
	{
		csa_dir = p->ignoreprefix;
		t3 += csa_dir->len;
	}

	if (csa_parse_sn(p->pool_tmp, t3, NULL, &part, &t4, &csacek,
		&can_rewrite))
	{
		t3 = (*csacek) ? csacek : NULL;
		url = t4;
		urlt->csa_dir = csa_dir;
	}
	else {
		t3 = NULL;
		part = NULL;
		can_rewrite = 1;
	}
	
	urlt->can_rewrite = can_rewrite;
	urlt->csacek = t3;
	qs = strchr(url, '?');
	if (qs) {
		urlt->uri = ap_pstrndup(p->pool_tmp, url, qs-url);
		urlt->qs  = qs + 1;
	}
	else
		urlt->uri = url;
	urlt->csa_part = (part && *part) ? part : NULL;
	
	return urlt;
}

/*
 * this constructs url out from csa_url_t previously splitted by
 * csa_parse_url()
 */
char *
csa_unparse_url(p, urlt, csacek)
  csa_params_t *p;
  const csa_url_t *urlt;
  const char *csacek;
{
	char *newurl, port[CSA_GETMAXNUMCOUNT(urlt->port)+1];
	const char *lastdot, *suffix=NULL, *csa_part=urlt->csa_part;
	const char *csa_dir = (urlt->csa_dir) ? urlt->csa_dir->value : NULL;
	short printport, printcsacek, printpart;
	size_t csa_len=0;
	size_t len = strlen(urlt->uri);

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_unparse_url: called");
#endif

	/* if part has been specified in original url, keep it in new URL
	 * as well */
	if (urlt->can_rewrite && csacek && *csacek) {
		suffix = csa_has_suffix(csacek, CSA_IGNORE_SUFFIXES, CSA_SEP);
		if (suffix) {
		    lastdot = suffix;
		    while(lastdot>csacek && *(--lastdot)!='.');
		    if (lastdot == csacek && *lastdot != '.')
			lastdot = suffix;
		}
		else {
		    lastdot = strrchr(csacek, '.');
		}
		if (lastdot && !strchr(lastdot, '/'))
			csa_len = lastdot - csacek;
		csa_dir = NULL;
	}
	else {
		/* if new CSacek is empty, use the value from original URL */
		csacek = urlt->csacek;
		/* don't duplicate part - if it has been in original
		 * CSacek URL, it's contained in urlt->csacek already */
		if (csa_part) csa_len = strlen(csacek) - strlen(csa_part);
	}

	/* if URL didn't have csa_dir part or correct suffix (.cgi or such),
	 * change it to include one */
	if (p->csacek_dir->len && !csa_dir && csacek) {
		csa_dir = p->csacek_dir->value;
		/* if csacek starts with csa_dir already, unset
		 * csa_dir */
		if (strncmp(csacek, csa_dir,strlen(csa_dir)) == 0)
			csa_dir = NULL;
	}
	if (p->csa_suffix && !suffix) suffix = p->csa_suffix;

	/* if the original CSacek path had not part specified, do not specify
	 * it in "output" URL as well; otherwise, if the CSacek part is same
	 * as one of CSA_IGNORE_SUFFIXES, remember it as suffix and don't use
	 * it for specifying "part" */
	if (csacek && *csacek) {
		const char *suff;
		if (csa_part && (suff =
		    	csa_has_suffix(csa_part, CSA_IGNORE_SUFFIXES, CSA_SEP)))
		{
		    if (csa_part == suff) {
			suffix = csa_part;
			csa_part = NULL;
		    }
		    else {
			/* no need to duplicate suffix - it's part of csa_part*/
			suffix = NULL;
		    }
		}
		if (!csa_part && !CSA_ISSET(p->flags, CSA_FL_PART_IS_EMPTY))
			csa_part = p->part.value;
	}

	printport = (urlt->server && urlt->method && urlt->port
		&& urlt->port != csa_getmethodport(urlt->method));
	printcsacek = (csacek && *csacek && urlt->uri[0] == '/');
	printpart = (printcsacek && csa_part);

	if (urlt->method) len += strlen(urlt->method) + 1;
	if (urlt->server) len += 2 + strlen(urlt->server);
	if (printport) len += 1 + sprintf(port, "%u", (unsigned int)urlt->port);
	if (printcsacek) {
		if (!csa_len) csa_len = strlen(csacek);
		len += csa_len;
		if (csa_dir) len += strlen(csa_dir);
		if (suffix) len += strlen(suffix);
	}
	if (printpart) len += strlen(csa_part);
	if (urlt->qs) len += 1 + strlen(urlt->qs);

	newurl = (char *)ap_palloc(p->pool_tmp, (int) len + 10);
	sprintf(newurl, "%s%s%s%s%s%s%s%.*s%s%s%s%s%s",
		(urlt->method ? urlt->method : ""), (urlt->method ? ":" : ""),
		(urlt->server ? "//" : ""),
		(urlt->server ? urlt->server : ""),
		(printport ? ":" : ""), (printport ? port : ""),
		((printcsacek && csa_dir) ? csa_dir : ""),
		(int) csa_len, (printcsacek ? csacek : ""),
		((printpart) ? csa_part : ""),
		(printcsacek && suffix ? suffix : ""),
		urlt->uri,
		(urlt->qs ? "?" : ""), (urlt->qs ? urlt->qs : ""));

#ifdef CSA_DEBUG
	csa_debug(p->dbg, "csa_unparse_url: result is '%s'", newurl);
#endif
	return newurl;
}

/*
 * this parses Range header and creates an array of chunks
 */
csa_range_t **
csa_range_compile(pp, range)
  struct pool *pp;
  const char *range;	
{
	int count, sindex;
	signed long from, to;
	csa_range_t **newrange, *rangestrip;
	const char *tmp, *next;

	if (strncmp(range, "bytes=", 6) != 0)
		return NULL;

	range += 6;

	tmp = range;
	count = 1;
	while ((tmp = strchr(tmp, ',')) != NULL) tmp++, count++;

	newrange = (csa_range_t **) ap_palloc(pp,
		(int) ((count + 1)* sizeof(csa_range_t *)));

	next = range;
	sindex = 0;
	do {
		tmp = next;
		next = strchr(tmp, ',');
		if (!next) next = strchr(tmp, '\0');
		else next++;

		from = atoi(tmp);
		if (*tmp == '-') tmp++;
		if (from == 0) {
			/* test if the number is really zero */
			while(*tmp == '0') tmp++;
			if (*tmp != '\0' && *tmp != '-' && *tmp != ',') {
				/* not a number, go next spec */
				continue;
			}
		}
		while ((unsigned char)*tmp >= '0' && (unsigned char)*tmp <= '9')
			tmp++;
		if (*tmp == '-') tmp++;

		if (*tmp == ',' || *tmp == '\0') {
			if (from >= 0) {
				/* 'last-byte-pos' absent, mark it as such */
				to = -1;
			}
			else to = 0;
		}
		else {
			if (from < 0) continue; /* first was negative, no more number expected */
			to = atoi(tmp);
			if (to == 0) {
				/* test if the number is really zero */
				while(*tmp == '0') tmp++;
				if (*tmp != '\0' && *tmp != '-' && *tmp != ',') continue; /* not a number, go next spec */
			}
			
			if (to < from) continue; /* invalid specification, go next */
		}

		rangestrip = (csa_range_t *) ap_palloc(pp, sizeof(csa_range_t));
		rangestrip->from = from;
		rangestrip->to   = to;
		newrange[sindex++] = rangestrip;
	} while (*next != '\0');

	newrange[sindex] = NULL; /* mark end */

	return ((sindex == 0) ? NULL : newrange);
}

/*
 * this adjusts all bounders so that they fall within specified final Content
 * Length; in future, this should maybe collapse overlapping ranges and sort
 * them too
 */
void
csa_range_fixup(range, final_content_length)
  csa_range_t **range;
  size_t final_content_length;
{
	int from, to;
	size_t sindex, removethis;

	if (final_content_length == 0) {
		range[0] = NULL;
		return;
	}

	for(sindex=0; range[sindex]; sindex++) {
		removethis = 0;

		from = range[sindex]->from;
		to   = range[sindex]->to;

		if (from < 0) {
			from += final_content_length;
			if (from < 0) from = 0;
			to = final_content_length - 1;
		}
		else {
			if (to == -1 || to > (int)final_content_length - 1)
				to = final_content_length - 1;
			if (from > to) removethis = 1;
		}

		if (removethis) {
			/* record is invalid, forget it and shift the rest */
			int workidx = sindex;
			for(; range[workidx + 1]; workidx++) {
				range[workidx]->from = range[workidx + 1]->from;
				range[workidx]->to   = range[workidx + 1]->to;
			}
			range[workidx] = NULL;
			/* to stay on the same position after index will be */
			/* shifted by for(;;) */
			sindex--;
		}
		else {
			range[sindex]->from = from;
			range[sindex]->to   = to;
		}
	}
}

/*
 * read response data; if data were originally chunked and we
 * finished reading headers already, dechunk it too
 */
int
csa_read_response(p, buf, len)
  csa_params_t *p;
  char *buf;
  size_t len;
{
	size_t chlen, read_size, readlen;
	char onechar;

	if (!CSA_ISSET(p->flags, CSA_FL_HEADERS_READ|CSA_FL_CHUNKED_RESP)) {
	    size_t howmuch=0;
	    /* ensure amount of data read is not bigger than amount available */
	    /* could have some bad side-effects */
	    if (p->available_in == 0) return 0;
	    if (p->available_in < 0) howmuch = len;
	    else {
		howmuch = ((size_t)p->available_in > len)
				? len : (size_t)p->available_in;
	    }
	    read_size = csa_md_read_response(p, buf, howmuch);
	    p->available_in -= read_size;
	    return read_size;
	}

	if (CSA_ISSET(p->flags, CSA_FL_CHUNK_ALL))
		return 0; /* no more data available */

	read_size = 0;
	while (len > 0) {
	    if (p->chunk_remaining == 0) {
		if (!CSA_ISSET(p->flags, CSA_FL_CHUNK_FINISHED)) {
			/* eat the \r*\n on the end of chunk */
			if (!csa_md_read_response(p, &onechar, 1))
				return read_size;
			if (onechar == '\r')
				csa_md_read_response(p, &onechar, 1);
			CSA_SET(p->flags, CSA_FL_CHUNK_FINISHED);
		}
		chlen = 0;
		/* XXX bad things happen when we encounter end of input
		 * after part of chunk length has been read
		 */
		for(;;) {
			if (!csa_md_read_response(p, &onechar, 1))
				return read_size;
			if (!CSA_ISHEXA(onechar)) break;
			onechar = CSA_UPPER(onechar);
			onechar -= ((unsigned char) onechar > '9') ? 55 : 48;
			chlen *= 16;
			chlen += onechar;
		}
		/* skip chunk-ext & first \r */
		while(csa_md_read_response(p, &onechar, 1)
			&& onechar != '\r' && onechar != '\n');
		/* skip \n */
		if (onechar != '\n')
			csa_md_read_response(p, &onechar, 1);

		if (chlen == 0) {
			int newline=1, emptyline=0;
			/*no more data available,skip remaining entity headers*/
			while(newline && !emptyline
				&& csa_md_read_response(p, &onechar, 1))
			{
				if (onechar != '\r') emptyline = 0;
				if (onechar == '\n') {
					if (newline) emptyline = 1;
					else newline = 1;
				}
			}
			p->flags |= CSA_FL_CHUNK_ALL;
			break;
		}

		p->chunk_remaining = chlen;
		CSA_UNSET(p->flags, CSA_FL_CHUNK_FINISHED);
	    }

	    readlen = (len > p->chunk_remaining) ? p->chunk_remaining : len;
	    readlen = csa_md_read_response(p, &buf[read_size], readlen);
	    if (!readlen) break;

	    p->chunk_remaining -= readlen;
	    read_size += readlen;
	    len -= readlen;
	}

	return read_size;
}

/*
 * return proper Content-Type header with info supplied
 * if CSA_USE_CHARSET is defined, use (optional) parameter charset
 */
const char *
csa_get_ct(wpool, outcodeset, ct)
  struct pool *wpool;
  cstools_t outcodeset;
  const char *ct; /* Content-Type of the document */
{
	const char *mimename;
	char *value;

	mimename = cstools_name(outcodeset, CSTOOLS_MIMENAME);
	if (!mimename || !*mimename)
		return ct;

	value = (char *) ap_palloc(wpool, (int) (strlen(ct) +
			sizeof("; charset=") + strlen(mimename) + 1));
	sprintf(value, "%s; charset=%s", ct, mimename);
	return value;
}
