[sgt/charset] / xenc.c

/*
 * xenc.c - translate our internal character set codes to and from
 * X11 character encoding names.
 * 
 */

#include <ctype.h>
#include "charset.h"
#include "internal.h"

static const struct {
    const char *name;
    int charset;
} xencs[] = {
    /*
     * Officially registered encoding names. This list is derived
     * from the font encodings section of
     * 
     *   http://ftp.x.org/pub/DOCS/registry
     * 
     * Where multiple encoding names map to the same encoding id
     * (such as iso8859-15 and fcd8859-15), the first is considered
     * canonical and will be returned when translating the id to a
     * string.
     */
    { "iso646.1991-irv", CS_ASCII },
    { "iso8859-1", CS_ISO8859_1 },
    { "iso8859-2", CS_ISO8859_2 },
    { "iso8859-3", CS_ISO8859_3 },
    { "iso8859-4", CS_ISO8859_4 },
    { "iso8859-5", CS_ISO8859_5 },
    { "iso8859-6", CS_ISO8859_6 },
    { "iso8859-7", CS_ISO8859_7 },
    { "iso8859-8", CS_ISO8859_8 },
    { "iso8859-9", CS_ISO8859_9 },
    { "iso8859-10", CS_ISO8859_10 },
    { "iso8859-13", CS_ISO8859_13 },
    { "iso8859-14", CS_ISO8859_14 },
    { "iso8859-15", CS_ISO8859_15 },
    { "fcd8859-15", CS_ISO8859_15 },
    { "hp-roman8", CS_HP_ROMAN8 },
    { "koi8-r", CS_KOI8_R },
    { "jisx0201.1976-0", CS_JISX0201 },
    /*
     * Unofficial encoding names found in the wild.
     */
    { "iso8859-16", CS_ISO8859_16 },
    { "koi8-u", CS_KOI8_U },
    { "ibm-cp437", CS_CP437 },
    { "ibm-cp850", CS_CP850 },
    { "ibm-cp852", CS_CP852 },
    { "ibm-cp866", CS_CP866 },
    { "microsoft-cp1250", CS_CP1250 },
    { "microsoft-cp1251", CS_CP1251 },
    { "microsoft-cp1252", CS_CP1252 },
    { "microsoft-cp1253", CS_CP1253 },
    { "microsoft-cp1254", CS_CP1254 },
    { "microsoft-cp1255", CS_CP1255 },
    { "microsoft-cp1256", CS_CP1256 },
    { "microsoft-cp1257", CS_CP1257 },
    { "microsoft-cp1258", CS_CP1258 },
    { "mac-roman", CS_MAC_ROMAN },
    { "viscii1.1-1", CS_VISCII },
    { "viscii1-1", CS_VISCII },
};

const char *charset_to_xenc(int charset)
{
    int i;

    for (i = 0; i < (int)lenof(xencs); i++)
	if (charset == xencs[i].charset)
	    return xencs[i].name;

    return NULL;		       /* not found */
}

int charset_from_xenc(const char *name)
{
    int i;

    for (i = 0; i < (int)lenof(xencs); i++) {
	const char *p, *q;
	p = name;
	q = xencs[i].name;
	while (*p || *q) {
		if (tolower((unsigned char)*p) != tolower((unsigned char)*q))
		break;
	    p++; q++;
	}
	if (!*p && !*q)
	    return xencs[i].charset;
    }

    return CS_NONE;		       /* not found */
}
Commit	Line	Data
c6d25d8d	1	/*
	2	* xenc.c - translate our internal character set codes to and from
	3	* X11 character encoding names.
	4	*
	5	*/
	6
	7	#include <ctype.h>
	8	#include "charset.h"
	9	#include "internal.h"
	10
	11	static const struct {
	12	const char *name;
	13	int charset;
	14	} xencs[] = {
	15	/*
	16	* Officially registered encoding names. This list is derived
	17	* from the font encodings section of
	18	*
	19	* http://ftp.x.org/pub/DOCS/registry
	20	*
	21	* Where multiple encoding names map to the same encoding id
	22	* (such as iso8859-15 and fcd8859-15), the first is considered
	23	* canonical and will be returned when translating the id to a
	24	* string.
	25	*/
3f2ff6ae	26	{ "iso646.1991-irv", CS_ASCII },
c6d25d8d	27	{ "iso8859-1", CS_ISO8859_1 },
	28	{ "iso8859-2", CS_ISO8859_2 },
	29	{ "iso8859-3", CS_ISO8859_3 },
	30	{ "iso8859-4", CS_ISO8859_4 },
	31	{ "iso8859-5", CS_ISO8859_5 },
	32	{ "iso8859-6", CS_ISO8859_6 },
	33	{ "iso8859-7", CS_ISO8859_7 },
	34	{ "iso8859-8", CS_ISO8859_8 },
	35	{ "iso8859-9", CS_ISO8859_9 },
	36	{ "iso8859-10", CS_ISO8859_10 },
	37	{ "iso8859-13", CS_ISO8859_13 },
	38	{ "iso8859-14", CS_ISO8859_14 },
	39	{ "iso8859-15", CS_ISO8859_15 },
	40	{ "fcd8859-15", CS_ISO8859_15 },
	41	{ "hp-roman8", CS_HP_ROMAN8 },
	42	{ "koi8-r", CS_KOI8_R },
3c80ed0c	43	{ "jisx0201.1976-0", CS_JISX0201 },
c6d25d8d	44	/*
	45	* Unofficial encoding names found in the wild.
	46	*/
	47	{ "iso8859-16", CS_ISO8859_16 },
	48	{ "koi8-u", CS_KOI8_U },
	49	{ "ibm-cp437", CS_CP437 },
	50	{ "ibm-cp850", CS_CP850 },
5930e9ef	51	{ "ibm-cp852", CS_CP852 },
9b7e7a92	52	{ "ibm-cp866", CS_CP866 },
c6d25d8d	53	{ "microsoft-cp1250", CS_CP1250 },
	54	{ "microsoft-cp1251", CS_CP1251 },
	55	{ "microsoft-cp1252", CS_CP1252 },
	56	{ "microsoft-cp1253", CS_CP1253 },
	57	{ "microsoft-cp1254", CS_CP1254 },
	58	{ "microsoft-cp1255", CS_CP1255 },
	59	{ "microsoft-cp1256", CS_CP1256 },
	60	{ "microsoft-cp1257", CS_CP1257 },
	61	{ "microsoft-cp1258", CS_CP1258 },
	62	{ "mac-roman", CS_MAC_ROMAN },
	63	{ "viscii1.1-1", CS_VISCII },
	64	{ "viscii1-1", CS_VISCII },
	65	};
	66
	67	const char *charset_to_xenc(int charset)
	68	{
	69	int i;
	70
	71	for (i = 0; i < (int)lenof(xencs); i++)
	72	if (charset == xencs[i].charset)
	73	return xencs[i].name;
	74
	75	return NULL; /* not found */
	76	}
	77
	78	int charset_from_xenc(const char *name)
	79	{
	80	int i;
	81
	82	for (i = 0; i < (int)lenof(xencs); i++) {
	83	const char p, q;
	84	p = name;
	85	q = xencs[i].name;
	86	while (p \|\| q) {
273ef1a1	87	if (tolower((unsigned char)p) != tolower((unsigned char)q))
c6d25d8d	88	break;
	89	p++; q++;
	90	}
	91	if (!p && !q)
	92	return xencs[i].charset;
	93	}
	94
	95	return CS_NONE; /* not found */
	96	}