/* $Id: unicode.c,v 1.4 2003/02/04 17:56:31 short Exp $
 * Unicode add-ons to reactos ntoskrnl/rtl/unicode.c for libcaptive
 * Copyright (C) 2002 Jan Kratochvil <project-captive@jankratochvil.net>
 * 
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; exactly version 2 of June 1991 is required
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */


#include "config.h"

#include "captive/unicode.h"	/* self */
#include "captive/unicode_reactos.h"	/* for captive_ucs2 */
#include <glib/gtypes.h>
#include <glib/gmessages.h>
#include <glib/gunicode.h>
#include <glib/gmem.h>
#include "reactos/napi/types.h"  /* for PUNICODE_STRING etc. */
#include "reactos/unicode.h"
#include "captive/macros.h"
#include <glib/gstrfuncs.h>
#include <wchar.h>	/* for wcslen() */
#include <glib/ghash.h>
#include <string.h>


/* CONFIG: */
/* Use simplified g_malloc() functions as wrappers around g_alloca() ones.
 */
#define FUNCMALLOC_FROM_ALLOCA 1


/* compiler sanity */
static gboolean captive_validate_unicode_types(void)
{
	g_return_val_if_fail(4==sizeof(gunichar),FALSE);
	g_return_val_if_fail(2==sizeof(WCHAR),FALSE);
	g_return_val_if_fail(1==sizeof(CHAR),FALSE);

	return TRUE;
}


/**
 * captive_validate_ucs4:
 * @string_ucs4: #const #gunichar * type string to validate.
 * Invalid string input is forbidden.
 *
 * Checks the validity of all 32-bit unicharacters of 0-terminated string.
 * It is required to have characters complying to g_unichar_validate().
 *
 * Returns: %TRUE if the string is valid.
 */ 
gboolean captive_validate_ucs4(const gunichar *string_ucs4)
{
const gunichar *cs_ucs4;

	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(string_ucs4!=NULL,FALSE);

	for (cs_ucs4=string_ucs4;*cs_ucs4;cs_ucs4++)
		g_return_val_if_fail(g_unichar_validate(*cs_ucs4),FALSE);

	return TRUE;
}


/**
 * captive_validate_ucs2_fixlen:
 * @string_ucs2: #const #captive_ucs2 * type string to validate.
 * Invalid string input is forbidden.
 * UTF-16 encoded strings are forbidden.
 * @string_ucs2_fixlen: Number of characters from @string_ucs2 to check.
 * captive_ucs2_strlen(@string_ucs2)>=@string_ucs2_fixlen is required.
 * Negative value is forbidden.
 *
 * Checks the validity of first @string_ucs2_fixlen 16-bit unicharacters of @string_ucs2.
 * It is required to have characters complying to g_unichar_validate().
 * String length must be equal or larger than @string_ucs2_fixlen;
 *
 * Returns: %TRUE if the string is valid.
 */ 
gboolean captive_validate_ucs2_fixlen(const captive_ucs2 *string_ucs2,glong string_ucs2_fixlen)
{
const captive_ucs2 *cs_ucs2;

	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(string_ucs2!=NULL,FALSE);
	g_return_val_if_fail(string_ucs2_fixlen>=0,FALSE);

	/* g_unichar_validate() will reject surrogates (G_UNICODE_SURROGATE) */
	for (cs_ucs2=string_ucs2;cs_ucs2<string_ucs2+string_ucs2_fixlen;cs_ucs2++) {
		g_return_val_if_fail(*cs_ucs2!=0,FALSE);
		g_return_val_if_fail(g_unichar_validate(*cs_ucs2),FALSE);
		}

	return TRUE;
}


/**
 * captive_validate_ucs2:
 * @string_ucs2: #const #captive_ucs2 * type string to validate.
 * Invalid string input is forbidden.
 * UTF-16 encoded strings are forbidden.
 *
 * Checks the validity of all 16-bit unicharacters of 0-terminated string.
 * It is required to have characters complying to g_unichar_validate().
 *
 * Returns: %TRUE if the string is valid.
 */ 
gboolean captive_validate_ucs2(const captive_ucs2 *string_ucs2)
{
	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(string_ucs2!=NULL,FALSE);

	return captive_validate_ucs2_fixlen(string_ucs2,captive_ucs2_strlen(string_ucs2));
}


/**
 * captive_validate_utf8:
 * @string_utf8: #const #gchar * utf8 type string to validate.
 * Invalid string input is forbidden.
 *
 * Checks the validity of all utf8 of 0-terminated string.
 * It is required to have characters complying to g_utf8_validate().
 *
 * Returns: %TRUE if the string is valid.
 */ 
gboolean captive_validate_utf8(const gchar *string_utf8)
{
	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(string_utf8!=NULL,FALSE);

	g_return_val_if_fail(g_utf8_validate(
					string_utf8,	/* str */
					-1,	/* max_len; -1 means '\0'-terminated */
					NULL),	/* end */
			FALSE);

	return TRUE;
}


/**
 * captive_ucs2_strlen:
 * @string_ucs2: String of type #const #gunichar2 * in pure UCS-2
 * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 *
 * Counts the number of characters (=2bytes) in @strings_ucs2.
 *
 * Returns: @string_ucs2 length in UCS-2 characters.
 */
glong captive_ucs2_strlen(const captive_ucs2 *string_ucs2)
{
glong r;

	/* Do not call captive_validate_ucs2(string_ucs2) as we would be looping! */
	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(string_ucs2!=NULL,FALSE);

	for (r=0;*string_ucs2;string_ucs2++)
		r++;

	return r;
}


/**
 * captive_validate_UnicodeString:
 * @string_UnicodeString: #PUNICODE_STRING type string to validate.
 * Invalid string input is forbidden.
 *
 * Checks the internal consistency of the given @string_UnicodeString.
 * It is required to have characters complying to g_unichar_validate().
 * @string_UnicodeString MUST be zero-terminated.
 *
 * Returns: %TRUE if the string is valid.
 */
gboolean captive_validate_UnicodeString(const UNICODE_STRING *string_UnicodeString)
{
	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString->Buffer),FALSE);
	g_return_val_if_fail(string_UnicodeString!=NULL,FALSE);
	g_return_val_if_fail(string_UnicodeString->Length%sizeof(*string_UnicodeString->Buffer)==0,FALSE);
	g_return_val_if_fail(string_UnicodeString->MaximumLength
			>=string_UnicodeString->Length+sizeof(*string_UnicodeString->Buffer),FALSE);
	g_return_val_if_fail(string_UnicodeString->Length==sizeof(*string_UnicodeString->Buffer)*
			captive_ucs2_strlen(string_UnicodeString->Buffer)
			,FALSE);

	g_return_val_if_fail(captive_validate_ucs2(string_UnicodeString->Buffer),FALSE);

	return TRUE;
}


/**
 * captive_validate_UnicodeString_noterm:
 * @string_UnicodeString_noterm: #PUNICODE_STRING type string to validate.
 * Invalid string input is forbidden.
 *
 * Checks the internal consistency of the given @string_UnicodeString.
 * It is required to have characters complying to g_unichar_validate().
 * @string_UnicodeString_noterm does not neet to be zero-terminated.
 *
 * Returns: %TRUE if the string is valid.
 */
gboolean captive_validate_UnicodeString_noterm(const UNICODE_STRING *string_UnicodeString_noterm)
{
const WCHAR *cwp;

	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(sizeof(WCHAR)==sizeof(*string_UnicodeString_noterm->Buffer),FALSE);
	g_return_val_if_fail(string_UnicodeString_noterm!=NULL,FALSE);
	g_return_val_if_fail(string_UnicodeString_noterm->Length%sizeof(*string_UnicodeString_noterm->Buffer)==0,FALSE);
	g_return_val_if_fail(string_UnicodeString_noterm->MaximumLength>=string_UnicodeString_noterm->Length,FALSE);

	for (
			cwp=string_UnicodeString_noterm->Buffer;
			cwp<string_UnicodeString_noterm->Buffer
					+(string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer));
			cwp++)
		g_return_val_if_fail(*cwp!=0,FALSE);

	g_return_val_if_fail(captive_validate_ucs2_fixlen(string_UnicodeString_noterm->Buffer,
					string_UnicodeString_noterm->Length/sizeof(*string_UnicodeString_noterm->Buffer)),
			FALSE);

	return TRUE;
}


/**
 * captive_validate_AnsiString:
 * @string_AnsiString: #PANSI_STRING type string to validate.
 * Invalid string input is forbidden.
 *
 * Checks the internal consistency of the given @string_AnsiString.
 *
 * Returns: %TRUE if the string is valid.
 */
gboolean captive_validate_AnsiString(const ANSI_STRING *string_AnsiString)
{
	g_return_val_if_fail(captive_validate_unicode_types(),FALSE);
	g_return_val_if_fail(sizeof(CHAR)==sizeof(*string_AnsiString->Buffer),FALSE);
	g_return_val_if_fail(string_AnsiString!=NULL,FALSE);
	g_return_val_if_fail(string_AnsiString->MaximumLength>=string_AnsiString->Length+1,FALSE);
	g_return_val_if_fail(string_AnsiString->Length==strlen(string_AnsiString->Buffer),FALSE);

	return TRUE;
}


/**
 * captive_ucs2_compare:
 * @string_a_ucs2: First string of type #const #gunichar2 * in pure UCS-2.
 * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 * @string_b_ucs2: Second string of type #const #gunichar2 * in pure UCS-2.
 * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 *
 * Compares case-sensitively @string_a_ucs2 and @string_b_ucs2.
 *
 * Returns: %TRUE if @string_a_ucs2 and @string_b_ucs2 are the same.
 */
gboolean captive_ucs2_compare(const captive_ucs2 *string_a_ucs2,const captive_ucs2 *string_b_ucs2)
{
guint ui;

	g_return_val_if_fail(captive_validate_ucs2(string_a_ucs2),FALSE);
	g_return_val_if_fail(captive_validate_ucs2(string_b_ucs2),FALSE);

	ui=0;
	do {
		if (string_a_ucs2[ui]!=string_b_ucs2[ui])
			return FALSE;
		} while (string_a_ucs2[ui++]);
	return TRUE;
}


/**
 * captive_UnicodeString_compare:
 * @string_a_UnicodeString: First string of type #PUNICODE_STRING.
 * Invalid string input is forbidden.
 * @string_b_UnicodeString: Second string of type #PUNICODE_STRING.
 * Invalid string input is forbidden.
 *
 * Compares case-sensitively @string_a_UnicodeString and @string_b_UnicodeString.
 *
 * Returns: %TRUE if @string_a_UnicodeString and @string_b_UnicodeString are the same.
 */
gboolean captive_UnicodeString_compare
		(const UNICODE_STRING *string_a_UnicodeString,const UNICODE_STRING *string_b_UnicodeString)
{
	g_return_val_if_fail(captive_validate_UnicodeString(string_a_UnicodeString),FALSE);
	g_return_val_if_fail(captive_validate_UnicodeString(string_b_UnicodeString),FALSE);

	if (string_a_UnicodeString->Length!=string_b_UnicodeString->Length)
		return FALSE;
	return captive_ucs2_compare(string_a_UnicodeString->Buffer,string_b_UnicodeString->Buffer);
}


/**
 * captive_ucs2_compare_insensitive:
 * @string_a_ucs2: First string of type #const #gunichar2 * in pure UCS-2.
 * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 * @string_b_ucs2: Second string of type #const #gunichar2 * in pure UCS-2.
 * Invalid string input is forbidden. UTF-16 encoded pairs are forbidden.
 *
 * Compares case-insensitively @string_a_ucs2 and @string_b_ucs2.
 *
 * Returns: %TRUE if @string_a_ucs2 and @string_b_ucs2 are the same.
 */
gboolean captive_ucs2_compare_insensitive(const captive_ucs2 *string_a_ucs2,const captive_ucs2 *string_b_ucs2)
{
guint ui;

	g_return_val_if_fail(captive_validate_ucs2(string_a_ucs2),FALSE);
	g_return_val_if_fail(captive_validate_ucs2(string_b_ucs2),FALSE);

	ui=0;
	do {
		if (g_unichar_toupper(string_a_ucs2[ui])!=g_unichar_toupper(string_b_ucs2[ui]))
			return FALSE;
		} while (string_a_ucs2[ui++]);
	return TRUE;
}


/**
 * captive_UnicodeString_compare_insensitive:
 * @string_a_UnicodeString: First string of type #PUNICODE_STRING.
 * Invalid string input is forbidden.
 * @string_b_UnicodeString: Second string of type #PUNICODE_STRING.
 * Invalid string input is forbidden.
 *
 * Compares case-insensitively @string_a_UnicodeString and @string_b_UnicodeString.
 *
 * Returns: %TRUE if @string_a_UnicodeString and @string_b_UnicodeString are the same.
 */
gboolean captive_UnicodeString_compare_insensitive
		(const UNICODE_STRING *string_a_UnicodeString,const UNICODE_STRING *string_b_UnicodeString)
{
	g_return_val_if_fail(captive_validate_UnicodeString(string_a_UnicodeString),FALSE);
	g_return_val_if_fail(captive_validate_UnicodeString(string_b_UnicodeString),FALSE);

	if (string_a_UnicodeString->Length!=string_b_UnicodeString->Length)
		return FALSE;
	return captive_ucs2_compare_insensitive(string_a_UnicodeString->Buffer,string_b_UnicodeString->Buffer);
}


/* detect required memory size for g_alloca() */
size_t _captive_UnicodeString_to_utf8_alloca_internal_sizeof(const UNICODE_STRING *string_UnicodeString)
{
glong length;
size_t r;
const WCHAR *cwcharp;

	g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),1);

	/* measure 'string_UnicodeString->Buffer' length in UTF-8 to 'r' */
	cwcharp=string_UnicodeString->Buffer;
	r=0;
	for (length=string_UnicodeString->Length/sizeof(*string_UnicodeString->Buffer);length;length--) {
gint utf8len;

		utf8len=g_unichar_to_utf8(
				*cwcharp++,	/* c */
				NULL);	/* outbuf=NULL => just the length will be computed */
		g_assert(utf8len>=0);
		r+=utf8len;
		}
	g_assert(*cwcharp==0);
	r++;	/* '\0'-termination */

	/* utf8 byte-size */
	return r;
}

/* transfer 'string_UnicodeString' to memory in 'mem' as utf8 w/o any further allocations */
void _captive_UnicodeString_to_utf8_alloca_internal_fill(gchar *mem,const UNICODE_STRING *string_UnicodeString)
{
const WCHAR *cwcharp;
#ifndef G_DISABLE_ASSERT
gchar *mem_orig=mem;
#endif /* G_DISABLE_ASSERT */

	g_return_if_fail(mem!=NULL);
	if (!captive_validate_UnicodeString(string_UnicodeString)) {
		*mem='\0';
		g_return_if_reached();
		}

	/* We can't use any glib string conversions as UNICODE_STRING uses ucs2! */
	/* We can't use any glib string conversions as we need to write the string
	 * to our supplied memory storage but glib always g_malloc()s it
	 */
	/* copy 'string_UnicodeString->Buffer' to 'mem' */
	for (cwcharp=string_UnicodeString->Buffer;*cwcharp;cwcharp++) {
gint utf8len;

		utf8len=g_unichar_to_utf8(
				(gunichar)*cwcharp,	/* c */
				mem);	/* outbuf */
		g_assert(utf8len>=0);
		mem+=utf8len;
		}
	*mem='\0';

	g_assert((size_t)((mem+1)-mem_orig) == _captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
	g_assert(captive_validate_utf8(mem_orig));
}


/**
 * captive_UnicodeString_to_utf8_malloc:
 * @string_UnicodeString: #PUNICODE_STRING type of string to convert.
 *
 * g_malloc()-based conversion from #PUNICODE_STRING to plain #utf8 string.
 * You must free the result with g_free() function.
 *
 * Returns: #const #gchar * g_malloc()ed converted string @string_UnicodeString.
 */
gchar *captive_UnicodeString_to_utf8_malloc(const UNICODE_STRING *string_UnicodeString)
{
gchar *r;
#ifndef FUNCMALLOC_FROM_ALLOCA
glong utf16_read,utf8_written;
GError *err;
#endif /* !FUNCMALLOC_FROM_ALLOCA */

	g_return_val_if_fail(captive_validate_UnicodeString(string_UnicodeString),g_strdup(""));

#ifdef FUNCMALLOC_FROM_ALLOCA

	r=g_malloc(_captive_UnicodeString_to_utf8_alloca_internal_sizeof(string_UnicodeString));
	_captive_UnicodeString_to_utf8_alloca_internal_fill(r,string_UnicodeString);

#else

	err=NULL;	/* not precleared by g_utf8_to_utf16()! */
	r=g_utf16_to_utf8(
			(const gunichar2 *)string_UnicodeString->Buffer,	/* str */
			-1,	/* len=>'\0'-terminated */
			&utf16_read,	/* items_read; counted in unichar2 (NOT UTF-16 characters or bytes!) */
			&utf8_written,	/* items_written; counted in bytes (NOT UTF-8 characters!) */
			&err);
	if (err) {
		g_warning("%s: utf16_read=%ld,utf8_written=%ld: %s",G_STRLOC,
				(long)utf16_read,(long)utf8_written,err->message);
		g_error_free(err);
		g_assert(r==NULL);
		g_return_val_if_reached(g_strdup(""));
		}
	g_assert(r!=NULL);

	g_assert(utf16_read==(glong)(string_UnicodeString->length/sizeof(*string_UnicodeString->Buffer)));
	g_assert(utf6_written==strlen(r));

#endif /* !FUNCMALLOC_FROM_ALLOCA */

	g_assert(captive_validate_utf8(r));

	return r;
}


/* detect required memory size for g_alloca() */
size_t _captive_utf8_to_UnicodeString_alloca_internal_sizeof(const gchar *string_utf8)
{
	g_return_val_if_fail(captive_validate_utf8(string_utf8),1);

	/* find the value for PUNICODE_STRING->MaximumLength */
	return 0
			+sizeof(UNICODE_STRING)
			+sizeof(WCHAR)*(g_utf8_strlen(string_utf8,
					-1	/* max; -1 means '\0'-terminated */
					)+1);	/* '\0'-termination */
}

static void terminate_static_UnicodeString(UNICODE_STRING *string_UnicodeString,glong length)
{
	/* 'string_UnicodeString' is not yet valid in this point! */
	g_return_if_fail(string_UnicodeString!=NULL);
	g_return_if_fail(length>=0);

	string_UnicodeString->Length=length*sizeof(WCHAR);
	string_UnicodeString->MaximumLength=(length+1)*sizeof(WCHAR);
	string_UnicodeString->Buffer[length]=0;

	g_assert(captive_validate_UnicodeString(string_UnicodeString));
}

/* transfer 'string_UnicodeString' to memory in 'mem' w/o any further allocations */
void _captive_utf8_to_UnicodeString_alloca_internal_fill(UNICODE_STRING *mem,const gchar *string_utf8)
{
gunichar2 *utf16;
captive_ucs2 *ucs2;
glong utf8_read,utf16_written;
GError *err;

	g_return_if_fail(mem!=NULL);
	mem->Buffer=(PWSTR)(((char *)mem)+sizeof(*mem));	/* for terminate_static_UnicodeString() below */
	if (!captive_validate_utf8(string_utf8)) {
		terminate_static_UnicodeString(mem,0);
		g_return_if_reached();
		}

	err=NULL;	/* not precleared by g_utf8_to_utf16()! */
	utf16=g_utf8_to_utf16(
			string_utf8,	/* str */
			-1,	/* len=>'\0'-terminated */
			&utf8_read,	/* items_read; counted in bytes (NOT chars!) */
			&utf16_written,	/* items_written; counted in UTF-16 characters (NOT unichar2 or bytes!) */
			&err);
	if (err) {
		g_warning("%s: utf8_read=%ld,utf16_written=%ld: %s",G_STRLOC,
				(long)utf8_read,(long)utf16_written,err->message);
		g_error_free(err);
		g_assert(utf16==NULL);
		terminate_static_UnicodeString(mem,0);
		g_return_if_reached();
		}
	g_assert(utf16!=NULL);

	/* Check for UCS-2 compliance (reject if surrogates inside) */
	g_assert(captive_validate_ucs2((const captive_ucs2 *)utf16));
	/* valid UCS-2 */
	ucs2=(captive_ucs2 *)utf16;

	g_assert(utf8_read==(glong)strlen(string_utf8));
	g_assert(utf16_written==captive_ucs2_strlen(ucs2));

	/* check of validity of _captive_utf8_to_UnicodeString_alloca_internal_sizeof() result */
	g_assert((gchar *)(mem->Buffer+(utf16_written+1))	/* +1 => '\0'-termination */
			== ((gchar *)mem)+_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));

	memcpy(mem->Buffer,ucs2,sizeof(WCHAR)*(utf16_written+1));
	g_free(ucs2);
	terminate_static_UnicodeString(mem,utf16_written);

	g_assert(captive_validate_UnicodeString(mem));
}


/**
 * captive_utf8_to_UnicodeString_malloc:
 * @string_utf8: #const #gchar * string in #utf8 to convert.
 *
 * g_malloc()-based conversion from plain #utf8 string to #PUNICODE_STRING.
 * You must free the result with g_free() function.
 *
 * Returns: #PUNICODE_STRING g_malloc()ed converted string @string_utf8.
 */
PUNICODE_STRING captive_utf8_to_UnicodeString_malloc(const gchar *string_utf8)
{
UNICODE_STRING *r;
#ifndef FUNCMALLOC_FROM_ALLOCA
gunichar *ucs4;
glong utf8_read,ucs4_written;
GError *err;
#endif /* !FUNCMALLOC_FROM_ALLOCA */

	g_return_val_if_fail(captive_validate_utf8(string_utf8),captive_utf8_to_UnicodeString_malloc(""));

#ifdef FUNCMALLOC_FROM_ALLOCA

	r=g_malloc(_captive_utf8_to_UnicodeString_alloca_internal_sizeof(string_utf8));
	_captive_utf8_to_UnicodeString_alloca_internal_fill(r,string_utf8);

#else

#error "FIXME: NOT IMPLEMENTED"

#endif /* !FUNCMALLOC_FROM_ALLOCA */

	g_assert(captive_validate_UnicodeString(r));

	return r;
}


/* map: (const gunichar *) -> (const gunichar2 *); UCS-4 -> UTF-16 */
static GHashTable *captive_ucs4_to_utf16_hash;

static void captive_ucs4_to_utf16_hash_init(void)
{
	if (captive_ucs4_to_utf16_hash)
		return;
	captive_ucs4_to_utf16_hash=g_hash_table_new_full(
			g_direct_hash,	/* hash_func */
			g_direct_equal,	/* key_equal_func */
			(GDestroyNotify)NULL,	/* key_destroy_func; we require persistent strings as input */
			(GDestroyNotify)g_free);	/* value_destroy_func; result of g_ucs4_to_utf16() */
}

/**
 * captive_ucs4_to_utf16_const:
 * @string_ucs4: #const #gunichar * type of persistent string to convert.
 * This string MUST remain readable with the same content forever.
 *
 * Constant string conversion from 32-bit #wchar_t to 16-bit (possible pairs of) UTF-16.
 * You may not modify the result in any way.
 * 
 * It is guaranteed to get two different string addresses for two different
 * input addresses even if the input strings content is the same.
 * Otherwise we would behave as #GCC option %-fmerge-constants which
 * results in %C non-conforming behaviour.
 *
 * FIXME: UTF-16 encoding IS NOT IMPLEMENTED.
 *
 * See also captive_ucs4_to_ucs2_const().
 *
 * Returns: #const #gunichar2 * converted string @string_ucs4.
 */
const gunichar2 *captive_ucs4_to_utf16_const(const gunichar *string_ucs4)
{
glong ucs4_read,utf16_written;
GError *err;
const gunichar2 *r_lookup;
gunichar2 *r;

	g_return_val_if_fail(captive_validate_ucs4(string_ucs4),captive_ucs4_to_utf16_const((const gunichar *)L""));

	captive_ucs4_to_utf16_hash_init();

	/* found already existing item in the table */
	if ((r_lookup=g_hash_table_lookup(captive_ucs4_to_utf16_hash,
			string_ucs4)	/* key */
			)) {
		return r_lookup;
		}

	/* Prepare 'r' as UTF-16 */
	err=NULL;	/* not precleared by g_ucs4_to_utf16()! */
	r=g_ucs4_to_utf16(
			(const gunichar *)string_ucs4,	/* str */
			-1,	/* len; -1 means '\0'-termination */
			&ucs4_read,	/* items_read; counted in chars (==unichars; NOT bytes!) */
			&utf16_written,	/* items_written; counted in gunichar2 (NOT chars or bytes!) */
			&err);
	if (err) {
		g_warning("%s: ucs4_read=%ld,utf16_written=%ld: %s",G_STRLOC,
				(long)ucs4_read,(long)utf16_written,err->message);
		g_error_free(err);
		g_assert(r==NULL);
		g_return_val_if_reached(captive_ucs4_to_utf16_const((const gunichar *)L""));
		}
	g_assert(r!=NULL);
	g_assert(ucs4_read==(glong)wcslen((const wchar_t *)string_ucs4));
	/* FIXME: We don't have captive_utf16_strlen() */
	g_assert(utf16_written==(glong)captive_ucs2_strlen((const gunichar2 *)r));
	/* (ucs4_read==utf16_written) check would discard any double-pair UTF-16 encodings
	 * but this function is designed as UTF-16 compliant.
	 */

	/* store new item to the table */
	g_hash_table_insert(captive_ucs4_to_utf16_hash,
			(gpointer)string_ucs4,	/* key; de-const */
			r);	/* value */

#if 0	/* We don't have captive_validate_utf16() */
	g_assert(captive_validate_utf16(r));
#endif

	return r;
}
