#! @PYTHON@ ### ### Generate multiprecision integer representations ### ### (c) 2013 Straylight/Edgeware ### ###----- Licensing notice --------------------------------------------------- ### ### This file is part of Catacomb. ### ### Catacomb is free software; you can redistribute it and/or modify ### it under the terms of the GNU Library General Public License as ### published by the Free Software Foundation; either version 2 of the ### License, or (at your option) any later version. ### ### Catacomb is distributed in the hope that it will be useful, ### but WITHOUT ANY WARRANTY; without even the implied warranty of ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ### GNU Library General Public License for more details. ### ### You should have received a copy of the GNU Library General Public ### License along with Catacomb; if not, write to the Free ### Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, ### MA 02111-1307, USA. from __future__ import with_statement import re as RX import optparse as OP import sys as SYS; stdout = SYS.stdout import types as TY ###-------------------------------------------------------------------------- ### Random utilities. def write_header(mode, name): """ Write a C-language file header. The header comment identifies the processing MODE, and the NAME of the output file. """ stdout.write("""\ /* -*-c-*- GENERATED by mpgen (%s) * * %s */ """ % (mode, name)) def write_banner(text): """Write a separator banner to the output, with header TEXT.""" stdout.write("/*----- %s %s*/\n" % (text, '-' * (66 - len(text)))) class struct (object): """ A struct object exists so that you can set attributes on it. """ pass R_IDBAD = RX.compile('[^0-9A-Za-z]') def fix_name(name): """Replace non-alphanumeric characters in NAME with underscores.""" return R_IDBAD.sub('_', name) if SYS.version_info >= (3,): def func_name(func): return func.__name__ xrange = range long = int else: def func_name(func): return func.func_name def with_metaclass(meta, *supers): return meta('#' % meta.__name__, supers or (object,), dict()) def load(file, mod): with open(file) as f: text = f.read() code = compile(text, file, 'exec') exec(code, mod) ###-------------------------------------------------------------------------- ### Determining the appropriate types. ## A dictionary mapping type tags to subclasses of BasicIntType. TYPEMAP = {} class IntClass (type): """ The IntClass is a metaclass for integer-type classes. It associates the type class with its tag in the `TYPEMAP' dictionary. """ def __new__(cls, name, supers, dict): c = type.__new__(cls, name, supers, dict) try: TYPEMAP[c.tag] = c except AttributeError: pass return c class BasicIntType (with_metaclass(IntClass)): """ A base class for integer-type classes, providing defaults and protocol. Integer-type classes have the following attributes and methods. preamble Some code to be emitted to a header file to make use of the type. Defaults to the empty string. typedef_prefix A prefix to be written before the type's name in a `typedef' declaration, possibly to muffle warnings. Defaults to the empty string. literalfmt A Python `%' format string for generating literal values of the type; used by the default `literal' method (so if you override it then you don't need to set this). Defaults to `%su'. literal(VALUE, [FMT]) Emit a literal value of the type, encoding VALUE. The default FMT has the form `0x%0Nx', so as to emit in hex with the appropriate number of leading zeros. Instances also carry additional attributes. bits The width of the integer type, in bits. rank An integer giving the conversion rank of the type. Higher values generally denote wider types. litwd The width of a literal of the type, in characters. """ __metaclass__ = IntClass preamble = '' typedef_prefix = '' literalfmt = '%su' def __init__(me, bits, rank): me.bits = bits me.rank = rank me.litwd = len(me.literal(0)) def literal(me, value, fmt = None): if fmt is None: fmt = '0x%0' + str((me.bits + 3)//4) + 'x' return me.literalfmt % (fmt % value) class UnsignedCharType (BasicIntType): tag = 'uchar' name = 'unsigned char' class UnsignedShortType (BasicIntType): tag = 'ushort' name = 'unsigned short' class UnsignedIntType (BasicIntType): tag = 'uint' name = 'unsigned int' class UnsignedLongType (BasicIntType): tag = 'ulong' name = 'unsigned long' literalfmt = '%sul' class UnsignedLongLongType (BasicIntType): tag = 'ullong' name = 'unsigned long long' preamble = """ #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 91) # define CATACOMB_GCC_EXTENSION __extension__ #else # define CATACOMB_GCC_EXTENSION #endif """ typedef_prefix = 'CATACOMB_GCC_EXTENSION ' literalfmt = 'CATACOMB_GCC_EXTENSION %sull' class UIntMaxType (BasicIntType): tag = 'uintmax' name = 'uintmax_t' preamble = "\n#include \n" class TypeChoice (object): """ A TypeChoice object selects C integer types for multiprecision integers. It exports its decisions as attributes: mpwbits The width of an `mpw', in bits. mpw The integer type for single-precision values. mpd The integer type for double-precision values. ti An object bearing raw information about the available integer types, as follows... TYPEINFO A list of items (TAG, BITS) describing the widths of the available types suitable for multiprecision use, in ascending order of rank. LIMITS A list of items (TAG, LO, HI) describing the ranges of integer types, for constructing the `mplimits' files. """ def __init__(me, tifile): """ Read the definitions from TIFILE, and select types appropriately. The TIFILE is a tiny fragment of Python code which should set `TYPEINFO' and `LIMITS' in its global namespace. """ ## Load the captured type information. me.ti = TY.ModuleType('typeinfo') load(opts.typeinfo, me.ti.__dict__) ## Build a map of the available types. tymap = {} byrank = [] for tag, bits in me.ti.TYPEINFO: rank = len(byrank) tymap[tag] = rank byrank.append(TYPEMAP[tag](bits, rank)) ## First pass: determine a suitable word size. The criteria are (a) ## there exists another type at least twice as long (so that we can do a ## single x single -> double multiplication), and (b) operations on a ## word are efficient (so we'd prefer a plain machine word). We'll start ## at `int' and work down. Maybe this won't work: there's a plan B. mpwbits = 0 i = tymap['uint'] while not mpwbits and i >= 0: ibits = byrank[i].bits for j in xrange(i + 1, len(byrank)): if byrank[j].bits >= 2*ibits: mpwbits = ibits break ## If that didn't work, then we'll start with the largest type available ## and go with half its size. if not mpwbits: mpwbits = byrank[-1].bits//2 ## Make sure we've not ended up somewhere really silly. if mpwbits < 16: raise Exception("`mpw' type is too small: your C environment is weird") ## Now figure out suitable types for `mpw' and `mpd'. def find_type(bits, what): for ty in byrank: if ty.bits >= bits: return ty raise Exception \ ("failed to find suitable %d-bit type, for %s" % (bits, what)) ## Store our decisions. me.mpwbits = mpwbits me.mpw = find_type(mpwbits, 'mpw') me.mpd = find_type(mpwbits*2, 'mpd') ###-------------------------------------------------------------------------- ### Outputting constant multiprecision integers. ## The margin for outputting MP literals. MARGIN = 72 def write_preamble(): """ Write a preamble for files containing multiprecision literals. We define a number of macros for use by the rest of the code: ZERO_MP An `mp' initializer denoting the value zero. POS_MP(NAME) Constructs an `mp' initializer denoting a positive integer whose limbs were emitted with the given NAME. NEG_MP(NAME) Constructs an `mp' initializer denoting a negative integer whose limbs were emitted with the given NAME. """ stdout.write(""" #include #define MP_(name, flags) \\ { (/*unconst*/ mpw *)name##__mpw, \\ (/*unconst*/ mpw *)name##__mpw + N(name##__mpw), \\ N(name##__mpw), 0, MP_CONST | flags, 0 } #define ZERO_MP { 0, 0, 0, 0, MP_CONST, 0 } #define POS_MP(name) MP_(name, 0) #define NEG_MP(name) MP_(name, MP_NEG) """) def write_limbs(name, x): """ Write the limbs of the value X, with the given NAME. """ ## We don't need to do anything special for zero. if not x: return ## Start on the limb vector. No delimiter needed, and we shall need to ## start a new line before any actual output. We want to write the ## absolute value here, because we use a signed-magnitude representation. stdout.write("\nstatic const mpw %s__mpw[] = {" % name) sep = '' pos = MARGIN if x < 0: x = -x mask = (1 << TC.mpwbits) - 1 ## We work from the little-end up, picking off `mpwbits' at a time. Start ## a new line if we can't fit the value on the current one. while x > 0: w, x = x & mask, x >> TC.mpwbits f = TC.mpw.literal(w) if pos + 2 + len(f) <= MARGIN: stdout.write(sep + ' ' + f) else: pos = 2 stdout.write(sep + '\n ' + f) pos += len(f) + 2 sep = ',' ## We're done. Finish off the initializer. stdout.write("\n};\n") def mp_body(name, x): """ Write the body of an `mp' object, for the value NAME. """ return "%s_MP(%s)" % (x >= 0 and "POS" or "NEG", name) ###-------------------------------------------------------------------------- ### Mode definition machinery. ## A dictionary mapping mode names to their handler functions. MODEMAP = {} def defmode(func): """ Function decorator: associate the function with the name of a mode. The mode name is taken from the function's name: a leading `m_' is stripped off, if there is one. Mode functions are invoked with the positional arguments from the command and are expected to write their output to stdout. """ name = func_name(func) if name.startswith('m_'): name = name[2:] MODEMAP[name] = func return func ###-------------------------------------------------------------------------- ### The basic types header. @defmode def m_mptypes(): """ Write the `mptypes.h' header. This establishes the following types. mpw An integer type for single-precision values. mpd An integer type for double-precision values. And, for t being each of `w' or `d', the following constants: MPt_BITS The width of the type, in bits. MPt_P2 The smallest integer k such that 2^k is not less than MPt_BITS. (This is used for binary searches.) MPt_MAX The largest value which may be stored in an object of the type. """ ## Write the preamble. write_header("mptypes", "mptypes.h") stdout.write("""\ #ifndef CATACOMB_MPTYPES_H #define CATACOMB_MPTYPES_H """) ## Write any additional premable for the types we've selected. have = set([TC.mpw, TC.mpd]) for t in have: stdout.write(t.preamble) ## Emit the types and constants. for label, t, bits in [('mpw', TC.mpw, TC.mpwbits), ('mpd', TC.mpd, TC.mpwbits*2)]: LABEL = label.upper() stdout.write("\n%stypedef %s %s;\n" % (t.typedef_prefix, t.name, label)) stdout.write("#define %s_BITS %d\n" % (LABEL, bits)) i = 1 while 2*i < bits: i *= 2 stdout.write("#define %s_P2 %d\n" % (LABEL, i)) stdout.write("#define %s_MAX %s\n" % (LABEL, t.literal((1 << bits) - 1, "%d"))) ## Done. stdout.write("\n#endif\n") ###-------------------------------------------------------------------------- ### Constant tables. @defmode def m_mplimits_c(): """ Write the `mplimits.c' source file. This contains `mp' constants corresponding to the various integer types' upper and lower bounds. The output is a vector `mp_limits' consisting of the distinct nonzero bounds, in order of their first occurrence in the `ti.LIMITS' list. """ ## Write the preamble. write_header("mplimits_c", "mplimits.c") stdout.write('#include "mplimits.h"\n') write_preamble() ## Write out limbs for limits as we come across them. seen = {} v = [] def write(x): if not x or x in seen: return seen[x] = 1 write_limbs('limits_%d' % len(v), x) v.append(x) for tag, lo, hi in TC.ti.LIMITS: write(lo) write(hi) ## Write the main vector. stdout.write("\nmp mp_limits[] = {") i = 0 sep = "\n " for x in v: stdout.write("%s%s_MP(limits_%d)" % (sep, x < 0 and "NEG" or "POS", i)) i += 1 sep = ",\n " stdout.write("\n};\n"); @defmode def m_mplimits_h(): """ Write the `mplimits.h' source file. For each type TAG, this defines constants MP_TAG_MIN and MP_TAG_MAX representing the lower and upper bounds of the type. """ ## Write the preamble. write_header("mplimits_h", "mplimits.h") stdout.write("""\ #ifndef CATACOMB_MPLIMITS_H #define CATACOMB_MPLIMITS_H #ifndef CATACOMB_MP_H # include "mp.h" #endif extern mp mp_limits[]; """) ## Now define constants for the bounds. Things which are zero can go to ## our existing `MP_ZERO'; otherwise we index the `mp_limits' vector. seen = { 0: "MP_ZERO" } slot = [0] def find(x): try: r = seen[x] except KeyError: r = seen[x] = '(&mp_limits[%d])' % slot[0] slot[0] += 1 return r for tag, lo, hi in TC.ti.LIMITS: stdout.write("#define MP_%s_MIN %s\n" % (tag, find(lo))) stdout.write("#define MP_%s_MAX %s\n" % (tag, find(hi))) ## All done. stdout.write("\n#endif\n") ###-------------------------------------------------------------------------- ### Group tables. class GroupTableClass (type): """ Metaclass for group tables, which registers them in the `MODEMAP'. Such a class must define an attribute `mode' giving the mode name, and a class method `run' which writes the necessary output. """ def __new__(cls, name, supers, dict): c = type.__new__(cls, name, supers, dict) try: mode = c.mode except AttributeError: pass else: MODEMAP[c.mode] = c.run return c class GroupTable (with_metaclass(GroupTableClass)): """ Base class for group tables objects. A `group table' is a table of constants, typically defining a cyclic group or something similar. We read the values from an input file, and write them out as C definitions. These have a somewhat stereotyped format, so we can mostly handle them uniformly. Specifically, input files consist of lines which are split into whitespace-separated words. Blank lines, and lines beginning with `#', are ignored. The remaining lines are gathered together into stanzas of the form KEYWORD NAME [HEAD-VALUE ...] SLOT VALUE ... (Indentation is shown for clarity only.) Such a stanza describes a group NAME; some slots are assigned values from the headline, and others from their own individual lines. Subclasses must define the following attributes. data_t The name of the type for describing a particular group. entry_t The name of the type which associates a name with some group data; this will be defined as typedef struct ENTRY_T { const char *name; DATA_T *data; } ENTRY_T; or similar. filename The filename, typically `SOMETHING.c', to put in the output header. header The header file to include, so as to establish the necessary types and other definitions. keyword The keyword beginning a new definition in the input file. The default is `group'. mode The mode name, used to invoke this kind of table operation (used by GroupTableClass). slots A vector of slot objects (see BaseSlot for the protocol) describing the structure of this particular kind of group, in the order they should be written in an initializer. Instances carry an `st' attribute, which contains a `struct' object in which slots can maintain some state. This object carries the following attributes maintained by this class. d A dictionary mapping slots (not their names!) to values assigned in the current stanza. This is reset at the start of each stanza. Slot implementations a free to use this or not, and the representation is internal to the specific slot class. mpmap A dictionary mapping values (integers, or `None') to C initializers (typically, actually, macro invocations). name The name of the group currently being parsed. nextmp Index for the next `mp' object to be written. """ ## Additional attributes, for internal use: ## ## _defs A set of known names for groups. ## ## _headslots A list of slots filled in from the headline. ## ## _names A list of pairs (ALIAS, DATA) mapping alias names to ## the actual group data. ## ## _slotmap A dictionary mapping slot names to their ## descriptions. ## Default values. keyword = 'group' slots = [] def __init__(me): """ Initialize a group table object. """ me.st = st = struct() st.nextmp = 0 st.mpmap = { None: 'NO_MP', 0: 'ZERO_MP' } st.d = {} st.name = None me._names = [] me._defs = set() me._slotmap = dict([(s.name, s) for s in me.slots]) me._headslots = [s for s in me.slots if s.headline] def _flush(me): """ Write out the data for a group once we've detected the end of its stanza. """ ## If there's no current stanza, then do nothing. if me.st.name is None: return ## Start emitting the object. stdout.write("/* --- %s --- */\n" % me.st.name) ## Get the various slots to compute themselves. for s in me.slots: s.setup(me.st) ## Write the initializer. stdout.write("\nstatic %s c_%s = {" % (me.data_t, fix_name(me.st.name))) sep = "\n " for s in me.slots: stdout.write(sep) s.write(me.st) sep = ",\n " stdout.write("\n};\n\n") ## Clear the state for the next stanza. me.st.d = {} me.st.name = None @classmethod def run(cls, input): """ Main output for a group table. Reads the file INPUT. """ ## Make an object for us to work on. me = cls() ## Write the output preamble. write_header(me.mode, me.filename) stdout.write('#include "%s"\n' % me.header) write_preamble() stdout.write("#define NO_MP { 0, 0, 0, 0, 0, 0 }\n\n") ## The main group data. This will contain a `data_t' object for each ## group we read. We'll also build the name to data map as we go. write_banner("Group data") stdout.write('\n') with open(input) as file: for line in file: ## Parse the line into fields. ff = line.split() if not ff or ff[0].startswith('#'): continue if ff[0] == 'alias': ## An alias. Just remember this. if len(ff) != 3: raise Exception("wrong number of alias arguments") me._flush() me._names.append((ff[1], ff[2])) elif ff[0] == me.keyword: ## A headline for a new group. ## Check the headline syntax. Headline slots may be set here, or ## later by name. if len(ff) < 2 or len(ff) > 2 + len(me._headslots): raise Exception("bad number of headline arguments") ## Flush out the previous stanza. me._flush() ## Remember the new stanza's name, and add it to the list. me.st.name = name = ff[1] me._defs.add(name) me._names.append((name, name)) ## Set headline slots from the remaining headline words. for f, s in zip(ff[2:], me._headslots): s.set(me.st, f) elif ff[0] in me._slotmap: ## A slot assignment. Get the slot to store a value. if me.st.name is None: raise Exception("no group currently being defined") if len(ff) != 2: raise Exception("bad number of values for slot `%s'" % ff[0]) me._slotmap[ff[0]].set(me.st, ff[1]) else: ## Something incomprehensible. raise Exception("unknown keyword `%s'" % ff[0]) ## End of the input. Write out the final stanza. me._flush() ## Now for the name-to-data mapping. write_banner("Main table") stdout.write("\nconst %s %s[] = {\n" % (me.entry_t, me.tabname)) for a, n in me._names: if n not in me._defs: raise Exception("alias `%s' refers to unknown group `%s'" % (a, n)) stdout.write(' { "%s", &c_%s },\n' % (a, fix_name(n))) stdout.write(" { 0, 0 }\n};\n\n") ## We're done. write_banner("That's all, folks") class BaseSlot (object): """ Base class for slot types. The slot protocol works as follows. Throughout, ST is a state object as maintained by a GroupTable. __init__(NAME, [HEADLINE], [OMITP], [ALLOWP], ...) Initialize the slot. The NAME identifies the slot, and the keyword used to set it in input files. If HEADLINE is true then the slot can be set from the stanza headline. OMITP and ALLOWP are optional functions: if OMITP(ST) returns true then the slot may be omitted; conversely, if ALLOWP(ST, VALUE) returns false then the slot cannot be assigned the given VALUE. Other arguments may be allowed by specific slot types. set(ST, VALUE) Set the slot to the given VALUE, typically by setting ST.d[me]. The default just stores the VALUE without interpreting it. setup(ST) Prepare the slot for output. The default method just checks that ST.d contains a mapping for the slot. All of the stanza's slots are set up before starting on the initializer for the group data, so slots can use this opportunity to emit preparatory definitions. write(ST) Write an initializer for the slot to standard output. There is no default. The following attributes are exported. headline A flag: can the slot be initialized from the stanza headline? name The slot's name. """ def __init__(me, name, headline = False, omitp = None, allowp = None): """ Initialize a new slot object, setting the necessary attributes. """ me.name = name me.headline = headline me._omitp = omitp me._allowp = allowp def set(me, st, value): """ Store a VALUE for the slot. """ if me._allowp and not me._allowp(st, value): raise Exception("slot `%s' not allowed here" % me.name) st.d[me] = value def setup(me, st): """ Prepare the slot for output, checking its value and so on. """ if me not in st.d and (not me._omitp or not me._omitp(st)): raise Exception("missing slot `%s'" % me.name) class EnumSlot (BaseSlot): """ An EnumSlot object represents a slot which can contain one of a number of named values. An omitted value is written as a literal `0'. """ def __init__(me, name, prefix, values, **kw): """ Initialize an EnumSlot object. The VALUES are a set of value names. On output, a value is converted to uppercase, and prefixed by the PREFIX and an underscore. """ super(EnumSlot, me).__init__(name, **kw) me._values = set(values) me._prefix = prefix def set(me, st, value): """ Check that the VALUE is one of the ones we know. """ if value not in me._values: raise Exception("invalid %s value `%s'" % (me.name, value)) super(EnumSlot, me).set(st, value) def write(me, st): """ Convert the slot value to the C constant name. """ try: stdout.write('%s_%s' % (me._prefix, st.d[me].upper())) except KeyError: stdout.write('0') class MPSlot (BaseSlot): """ An MPSlot object represents a slot which can contain a multiprecision integer. An omitted value is written as a invalid `mp' object. """ def set(me, st, value): """ Set a value; convert it to a Python integer. """ super(MPSlot, me).set(st, long(value, 0)) def setup(me, st): """ Prepare to write the slot. If this is a new integer, then write out a limb vector. Names for the limbs are generated unimaginitively, using a counter. """ super(MPSlot, me).setup(st) v = st.d.get(me) if v not in st.mpmap: write_limbs('v%d' % st.nextmp, v) st.mpmap[v] = mp_body('v%d' % st.nextmp, v) st.nextmp += 1 def write(me, st): """ Write out an `mp' initializer for the slot. """ stdout.write(st.mpmap[st.d.get(me)]) class BinaryGroupTable (GroupTable): mode = 'bintab' filename = 'bintab.c' header = 'bintab.h' data_t = 'bindata' entry_t = 'binentry' tabname = 'bintab' slots = [MPSlot('p'), MPSlot('q'), MPSlot('g')] class EllipticCurveTable (GroupTable): mode = 'ectab' filename = 'ectab.c' header = 'ectab.h' keyword = 'curve' data_t = 'ecdata' entry_t = 'ecentry' tabname = 'ectab' _typeslot = EnumSlot('type', 'FTAG', ['prime', 'niceprime', 'binpoly', 'binnorm'], headline = True) slots = [_typeslot, MPSlot('p'), MPSlot('beta', allowp = lambda st, _: st.d[EllipticCurveTable._typeslot] == 'binnorm', omitp = lambda st: st.d[EllipticCurveTable._typeslot] != 'binnorm'), MPSlot('a'), MPSlot('b'), MPSlot('r'), MPSlot('h'), MPSlot('gx'), MPSlot('gy')] class PrimeGroupTable (GroupTable): mode = 'ptab' filename = 'ptab.c' header = 'ptab.h' data_t = 'pdata' entry_t = 'pentry' tabname = 'ptab' slots = [MPSlot('p'), MPSlot('q'), MPSlot('g')] ###-------------------------------------------------------------------------- ### Main program. op = OP.OptionParser( description = 'Generate multiprecision integer representations', usage = 'usage: %prog [-t TYPEINFO] MODE [ARGS ...]', version = 'Catacomb, version @VERSION@') for shortopt, longopt, kw in [ ('-t', '--typeinfo', dict( action = 'store', metavar = 'PATH', dest = 'typeinfo', help = 'alternative typeinfo file'))]: op.add_option(shortopt, longopt, **kw) op.set_defaults(typeinfo = './typeinfo.py') opts, args = op.parse_args() ## Parse the positional arguments. if len(args) < 1: op.error('missing MODE') mode = args[0] ## Establish the choice of low-level C types. TC = TypeChoice(opts.typeinfo) ## Find the selected mode, and invoke the appropriate handler. try: modefunc = MODEMAP[mode] except KeyError: op.error("unknown mode `%s'" % mode) modefunc(*args[1:]) ###----- That's all, folks --------------------------------------------------