mdw@git.distorted.org.uk Git - sgt/charset/blame_incremental

... / ...

Commit	Line	Data
	1	/*
	2	* internal.h - internal header stuff for the charset library.
	3	*/
	4
	5	#ifndef charset_internal_h
	6	#define charset_internal_h
	7
	8	/* This invariably comes in handy */
	9	#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
	10
	11	/* This is an invalid Unicode value used to indicate an error. */
	12	#define ERROR 0xFFFFL /* Unicode value representing error */
	13
	14	#undef TRUE
	15	#define TRUE 1
	16	#undef FALSE
	17	#define FALSE 0
	18
	19	typedef struct charset_spec charset_spec;
	20	typedef struct sbcs_data sbcs_data;
	21
	22	struct charset_spec {
	23	int charset; /* numeric identifier */
	24
	25	/*
	26	* A function to read the character set and output Unicode
	27	* characters. The `emit' function expects to get Unicode chars
	28	* passed to it; it should be sent ERROR for any encoding error
	29	* on the input.
	30	*/
	31	void (read)(charset_spec const charset, long int input_chr,
	32	charset_state *state,
	33	void (emit)(void ctx, long int output), void *emitctx);
	34	/*
	35	* A function to read Unicode characters and output in this
	36	* character set. The `emit' function expects to get byte
	37	* values passed to it.
	38	*
	39	* A non-representable input character should cause a FALSE
	40	* return, _before_ `emit' is called. Successful conversion
	41	* causes a TRUE return.
	42	*
	43	* If `input_chr' is -1, this function must revert the encoding
	44	* state to any default required at the end of a piece of
	45	* encoded text.
	46	*/
	47	int (write)(charset_spec const charset, long int input_chr,
	48	charset_state *state,
	49	void (emit)(void ctx, long int output), void *emitctx);
	50	void const *data;
	51	};
	52
	53	/*
	54	* This is the format of `data' used by the SBCS read and write
	55	* functions; so it's the format used in all SBCS definitions.
	56	*/
	57	struct sbcs_data {
	58	/*
	59	* This is a simple mapping table converting each SBCS position
	60	* to a Unicode code point. Some positions may contain ERROR,
	61	* indicating that that byte value is not defined in the SBCS
	62	* in question and its occurrence in input is an error.
	63	*/
	64	unsigned long sbcs2ucs[256];
	65
	66	/*
	67	* This lookup table is used to convert Unicode back to the
	68	* SBCS. It consists of the valid byte values in the SBCS,
	69	* sorted in order of their Unicode translation. So given a
	70	* Unicode value U, you can do a binary search on this table
	71	* using the above table as a lookup: when testing the Xth
	72	* position in this table, you branch according to whether
	73	* sbcs2ucs[ucs2sbcs[X]] is less than, greater than, or equal
	74	* to U.
	75	*
	76	* Note that since there may be fewer than 256 valid byte
	77	* values in a particular SBCS, we must supply the length of
	78	* this table as well as the contents.
	79	*/
	80	unsigned char ucs2sbcs[256];
	81	int nvalid;
	82	};
	83
	84	/*
	85	* Prototypes for internal library functions.
	86	*/
	87	charset_spec const *charset_find_spec(int charset);
	88	void read_sbcs(charset_spec const *charset, long int input_chr,
	89	charset_state *state,
	90	void (emit)(void ctx, long int output), void *emitctx);
	91	int write_sbcs(charset_spec const *charset, long int input_chr,
	92	charset_state *state,
	93	void (emit)(void ctx, long int output), void *emitctx);
	94	long int sbcs_to_unicode(const struct sbcs_data *sd, long int input_chr);
	95	long int sbcs_from_unicode(const struct sbcs_data *sd, long int input_chr);
	96
	97	void read_utf8(charset_spec const *charset, long int input_chr,
	98	charset_state *state,
	99	void (emit)(void ctx, long int output), void *emitctx);
	100	int write_utf8(charset_spec const *charset, long int input_chr,
	101	charset_state *state,
	102	void (emit)(void ctx, long int output),
	103	void *emitctx);
	104
	105	long int big5_to_unicode(int r, int c);
	106	int unicode_to_big5(long int unicode, int r, int c);
	107	long int cns11643_to_unicode(int p, int r, int c);
	108	int unicode_to_cns11643(long int unicode, int p, int r, int *c);
	109	long int cp949_to_unicode(int r, int c);
	110	int unicode_to_cp949(long int unicode, int r, int c);
	111	long int ksx1001_to_unicode(int r, int c);
	112	int unicode_to_ksx1001(long int unicode, int r, int c);
	113	long int gb2312_to_unicode(int r, int c);
	114	int unicode_to_gb2312(long int unicode, int r, int c);
	115	long int jisx0208_to_unicode(int r, int c);
	116	int unicode_to_jisx0208(long int unicode, int r, int c);
	117	long int jisx0212_to_unicode(int r, int c);
	118	int unicode_to_jisx0212(long int unicode, int r, int c);
	119
	120	/*
	121	* Placate compiler warning about unused parameters, of which we
	122	* expect to have some in this library.
	123	*/
	124	#define UNUSEDARG(x) ( (x) = (x) )
	125
	126	#endif /* charset_internal_h */