3 ### Shamir secret sharing
5 ### (c) 2011 Mark Wooding
8 ###----- Licensing notice ---------------------------------------------------
10 ### This file is part of the distorted.org.uk key management suite.
12 ### distorted-keys is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
17 ### distorted-keys is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
22 ### You should have received a copy of the GNU General Public License
23 ### along with distorted-keys; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 from __future__ import with_statement
32 from cStringIO import StringIO
33 import optparse as OPT
35 ###--------------------------------------------------------------------------
36 ### Arithmetic in GF(2^8).
38 ### These functions implement arithmetic in a field containing exactly 256
39 ### elements. This is convenient, because there's exactly one field element
40 ### for each one-byte value.
42 ### Actually, we only implement multiplication and division, because addition
43 ### and subtraction are trivial -- they're the XOR operation that Python
44 ### already has. Multiplication and (especially) division are fiddly if you
45 ### do them directly, but the field is easily small enough that log tables
46 ### are a feasible alternative.
48 ### There are many (isomorphic) representations of the field GF(2^8). The
49 ### one we use here is a field of residue classes of binary polynomials:
50 ### GF(2)[x]/(x^8 + x^4 + x^3 + x^2 + 1). Usefully, the element [x] (i.e.,
51 ### the residue class containing x) generates the multiplicative group of the
52 ### field: every nonzero element can be written in the form x^k for some
53 ### integer 0 <= k < 255. Multiplication by x is especially easy, so this is
54 ### a good choice as a base of logarithms.
56 ### More precisely, we use an integer encoding of the residue classes. Each
57 ### class contains exactly one polynomial of degree less than 8, called the
58 ### principal representative. If the principal representative of a residue
59 ### class is a_7 x^7 + ... + a_1 x + a_0, with each a_i either 0 or 1, then
60 ### the encoding of the representative is 2^7 a_7 + ... + 2 a_1 + a_0. It is
61 ### easily verified that this mapping is invertible.
63 def gf28_log_tables():
64 """Build the log tables used to implement GF(2^8) arithmetic."""
65 global GF28_LOG, GF28_EXP
76 GF28_EXP = exp[:255] * 2
80 """Multiply two elements X and Y of GF(2^8), retturning their product."""
81 if x == 0 or y == 0: return 0
82 else: return GF28_EXP[GF28_LOG[x] + GF28_LOG[y]]
86 Divide element X by Y in GF(2^8), returning the quotient.
88 Raise `ZeroDivisionError' if Y is zero.
90 if y == 0: raise ZeroDivisionError
92 else: return GF28_EXP[GF28_LOG[x] + 255 - GF28_LOG[y]]
94 ###--------------------------------------------------------------------------
95 ### Secret sharing machinery.
97 ### Shamir's secret sharing scheme uses polynomial interpolation. Let K be a
98 ### finite field, and let s -- the `secret' -- be an element of K. Let t --
99 ### the `threshold' -- be a positive integer. A degree-(t - 1) polynomial
100 ### p(x) over K has the general form a_{t-1} x^{t-1} + ... + a_1 x + a_0; we
101 ### then have p(0) = a_0. If we choose a_0 = s, and the other a_i uniformly
102 ### at random from K, then we have chosen a degree-(t - 1) polynomial
103 ### uniformly at random, subject to the restrictiion that it evaluates to s
104 ### at 0. We can issue shares of the form (x, p(x)) for 0 < x < #K.
106 ### Any collection of t shares can be used to reconstruct the polynomial. We
107 ### can use Lagrange interpolation to do this. Suppose that we're given t
108 ### different shares (x_i, y_i) for 0 <= i < t. Firstly, set
111 ### l_i(x) = | | ---------
116 ### Now notice that l_i(x_i) = 1 and l_i(x_j) = 0 if j /= i. Then
119 ### p(x) = > l_i(x) y_i
123 ### To see this, it suffices to note that (a) l_i(x) (and therefore the sum)
124 ### is a degree-(t - 1) polynomial, and (b) it agrees with p(x) at t distinct
125 ### points, namely each of the x_i. So, with t shares, we can recover the
126 ### original polynomial, and hence p(0) = s. With t - 1 shares, we can add a
127 ### fake `share' (0, s') for any guessed s' in K and interpolate
128 ### corresponding values for the remaining shares; we therefore have perfect
131 ### We use K = GF(2^8), as implemented above. Each byte of a long secret is
132 ### shared independently.
134 ### All of this machinery is the same as in my Catacomb library and Daniel
135 ### Silverstone's `gfshare' package.
137 class SecretSplit (object):
139 Create shares of a secret.
141 Initialize with a secret S and a threshold T; then issue shares with the
145 def __init__(me, s, t):
147 Initialize secret sharing.
149 S is a string containing the secret; T is the threshold, i.e., the number
150 of shares requierd to recover the secret.
153 ## The number of bytes in the secret is a useful thing to store.
156 ## Build an array of coefficients chosen at random.
157 me._coeffs = [OS.urandom(me._n) for i in xrange(t - 1)] + [s]
161 Issue the share with index X, returning it as a string.
163 The share will be binary, even if the original secret is plain text. The
164 index must be in the range 1 <= X <= 255.
167 ## Check that the index is in range.
169 raise ValueError, 'share index out of range'
171 ## Evaluate the polynomial at the chosen index.
173 for i in xrange(me._n):
176 y = gf28_mul(x, y) ^ ord(j[i])
180 class SecretJoin (object):
182 Recover a secret given a number of shares.
188 def insert(me, x, p):
190 Insert a share P with index X.
192 All the shares must have the same length. It doesn't matter what the
193 length of the first share is, but the others have to match it.
196 ## Check that the share index is in range.
198 raise ValueError, 'share index out of range'
200 ## Make sure that we haven't been given this one already.
202 raise ValueError, 'duplicate share index'
204 ## Check the length of the share. Store the length if this is the first
208 elif len(p) != me._n:
209 raise ValueError, 'bad share length'
216 Recover the secret from the shares provided, and return it as a string.
218 It's assumed that the caller provided the correct number of shares.
221 ## Iniitialize a map of l_i(0) values. These depend only on the share
222 ## indices provided, and are therefore independent of the secret length.
229 l = gf28_mul(l, gf28_div(xj, xi ^ xj))
232 ## Now do the interpolation.
234 for i in xrange(me._n):
237 y = y ^ gf28_mul(ll[xi], ord(me._m[xi][i]))
241 ###--------------------------------------------------------------------------
242 ### Key/value encoding.
244 ### We use a simple encoding for the data structures we handle. The encoding
247 ### LABEL:KEY=VALUE;KEY=VALUE;...
249 ### The LABEL just says what kind of structure this is meant to be. The KEYs
250 ### are short strings which identify the structure slots. We're doing this
251 ### on the cheap, so order is significant, and there are no creature comforts
252 ### like optional slots. The VALUEs are encodings of the slot values; the
253 ### details of the encoding depend on the VALUE type, which is statically
254 ### determined by the structure.
256 class StructureSlot (object):
258 Base class for structure slot types.
260 Subclasses must implement `encode' and `decode' methods.
262 def __init__(me, name):
263 """Initialize a structure slot, remembering its name."""
266 class IntegerSlot (StructureSlot):
271 def encode(me, value):
273 def decode(me, string):
276 class StringSlot (StructureSlot):
278 A string slot, unencoded. Be careful.
281 def encode(me, value):
283 def decode(me, string):
286 class BinarySlot (StructureSlot):
288 A binary slot, encoded as Base64.
291 def encode(me, value):
292 return B.b64encode(value)
293 def decode(me, string):
294 return B.b64decode(string)
296 class StructureType (object):
298 Base class for classes which can encode and decode their slots.
301 def __init__(me, *args, **kw):
303 Initialize a structure object.
305 The arguments correspond to slots. Positional arguments are read first,
306 and stored in the appropriate attributes. Keywords arguments are used to
307 set arbitrary attributes, but it is an error to set an attribute which
310 The `decoded' method is called after argument processing.
315 ## Decode the arguments.
316 if len(args) > len(me.SLOTS):
317 raise ValueError, 'too many positional arguments'
318 for i in xrange(len(args)):
319 slotdef, attr = me.SLOTS[i]
320 setattr(me, attr, args[i])
321 for k, v in kw.iteritems():
326 raise ValueError, 'already set attribute'
329 ## Initialize the object. (There's a hack here for the `decode' method.)
335 Encode an object as a string.
341 for slotdef, attr in me.SLOTS:
343 o.write('%s=%s' % (slotdef._name, slotdef.encode(getattr(me, attr))))
349 Write an encoded description of the object to PATH.
351 with open(path + '.new', 'w') as f:
354 OS.rename(path + '.new', path)
357 def decode(cls, string):
359 Decode a STRING, returning a new object.
361 The `decoded' method is called on the new object.
363 if not string.startswith(cls.LABEL + ':'):
364 raise SyntaxError, 'incorrect label'
365 slots = string[len(cls.LABEL) + 1:].split(';')
366 if len(slots) != len(cls.SLOTS):
367 raise SyntaxError, 'wrong number of slots'
368 obj = cls(_magic = True)
369 for i in xrange(len(slots)):
371 slotdef, attr = cls.SLOTS[i]
372 if not slot.startswith(slotdef._name + '='):
373 raise SyntaxError, 'incorrect slot name'
375 val = slotdef.decode(slot[len(slotdef._name) + 1:])
376 except (ValueError, SyntaxError):
377 raise SyntaxError, 'invalid %s value' % slotdef.TYPE
378 setattr(obj, attr, val)
385 Read an encoded description of a structure and return the decoded object.
387 with open(path, 'r') as f:
389 if cont and cont[-1] == '\n':
392 raise SyntaxError, 'encoding must be a single line'
393 return cls.decode(cont)
397 Hook for object initialization.
399 This is called by `__init__' and `decode'.
403 ###--------------------------------------------------------------------------
404 ### Higher-level share management.
406 ### We store shares together with a bunch of metadata about the sharing
407 ### parameters, and a hash of all of that together with the final secret.
408 ### Theoretically, storing a hash weakens the whole scheme from providing
409 ### unconditional security to merely providing computational security (based
410 ### on the assupmption that the hash function is one-way). In practice, the
411 ### secret will be used as a symmetric encryption key or similar, so it's
412 ### already only secure against computationally bounded adversaries.
414 class Secret (StructureType):
416 Represents a secret, either about to be shared or recently recovered.
418 LABEL = 'shamir-secret'
419 SLOTS = [(IntegerSlot('n'), 'count'),
420 (IntegerSlot('t'), 'thresh'),
421 (BinarySlot('s'), 'secret')]
423 def hash(me, hashfn):
425 Hash the secret and its parameters using the hash function HASHFN.
427 The hash is returned as a raw binary string. The HASHFN must be one of
428 the hash functions known to Python's `hashlib' module (essentially, hash
429 functions known to OpenSSL).
432 h.update(me.encode())
435 class Params (StructureType):
437 Represents secret sharing parameters.
439 LABEL = 'shamir-params'
440 SLOTS = [(IntegerSlot('n'), 'count'),
441 (IntegerSlot('t'), 'thresh'),
442 (StringSlot('f'), 'hashfn'),
443 (BinarySlot('h'), 'hash')]
445 class Share (StructureType):
447 Represents a share of a secret.
449 LABEL = 'shamir-share'
450 SLOTS = [(IntegerSlot('i'), 'index'),
451 (BinarySlot('y'), 'share')]
453 ###--------------------------------------------------------------------------
456 QUIS = OS.path.basename(SYS.argv[0])
459 SYS.stderr.write('%s: %s\n' % (QUIS, message))
461 def die(message, rc = 1):
465 class struct (object):
466 def __init__(me, **kw):
467 me.__dict__.update(kw)
469 class Subcommand (struct):
472 class SubcommandOptionParser (OPT.OptionParser, object):
474 def __init__(me, usage = '%prog [-OPTIONS] COMMAND [ARGS ...]',
476 super(SubcommandOptionParser, me).__init__(usage = usage, *args, **kw)
478 Subcommand(name = 'help', usage = '[COMMAND ...]', opts = [],
480 desc = 'Show help for %prog, or for the COMMANDs.')
482 me.disable_interspersed_args()
484 def add_subcommand(me, name, func, usage, desc = None, opts = []):
485 subcmd = Subcommand(name = name, func = func, usage = usage,
486 desc = desc, opts = opts)
487 me.subcommands.append(subcmd)
489 def lookup_subcommand(me, name):
491 for sub in me.subcommands:
494 elif sub.name.startswith(name):
498 elif len(match) == 0:
499 me.error("unknown command `%s'"% name)
501 me.error("ambiguous command `%s': could be any of %s"
502 % (name, ', '.join(["`%s'" % sub.name for sub in match])))
504 def subcmd_usage(me, sub):
506 return '%s %s' % (sub.name, sub.usage)
511 super(SubcommandOptionParser, me).print_help()
514 for sub in me.subcommands:
515 if sub.usage is None:
517 print ' ' + me.subcmd_usage(sub)
519 def make_subcommand_optparser(me, sub):
520 op = OPT.OptionParser(usage = 'usage: %%prog %s' % me.subcmd_usage(sub),
521 description = sub.desc)
526 def cmd_help(me, gopts, opts, args):
531 sub = me.lookup_subcommand(name)
532 op = me.make_subcommand_optparser(sub)
535 def dispatch(me, args = None):
538 gopts, args = me.parse_args(args)
540 me.error('missing command')
541 sub = me.lookup_subcommand(args[0])
542 op = me.make_subcommand_optparser(sub)
543 opts, args = op.parse_args(args[1:])
544 sub.func(gopts, opts, args)
546 OPTPARSE = SubcommandOptionParser(description = """\
547 Split and recombine secrets using Shamir's secret sharing system.
550 ###--------------------------------------------------------------------------
551 ### Issue shares given a secret.
553 def parse_sharing(string):
554 slash = string.find('/')
556 raise ValueError, "no `/'"
557 return int(string[slash + 1:]), int(string[:slash])
559 def cmd_issue(gopts, opts, args):
561 if len(args) < 1 or len(args) > 2:
562 OPTPARSE.error('bad arguments')
565 count, thresh = parse_sharing(args[0])
566 except ValueError, err:
567 OPTPARSE.error("bad sharing parameters `%s': %s"
568 % (args[0], err.message))
570 if len(args) < 2 or args[1] == '-':
571 sec = SYS.stdin.read()
573 with open(args[1], 'rb') as f:
576 secret = Secret(count, thresh, sec)
577 h = secret.hash(opts.hashfn)
578 params = Params(count, thresh, opts.hashfn, h)
579 print params.encode()
581 ss = SecretSplit(sec, thresh)
582 for i in xrange(count):
583 share = Share(i, ss.issue(i + 1))
586 OPTPARSE.add_subcommand(
588 '[-H HASHFN] THRESH/COUNT [SECRET-FILE]',
590 Issue shares of a secret. The secret (as a binary lump) is read from
591 SECRET-FILE, or standard input if SECRET-FILE is omitted. COUNT + 1 lines
592 are written to standard output: the first line contains the sharing
593 parameters, and does not need to be kept especially secret (though it
594 contains a hash of the secret, so it compromises the unconditional security
595 of the secret sharing scheme); the remaining lines are the shares, one per
596 line. All of the lines are plain text.""",
597 [OPT.make_option('-H', '--hash-function', type = 'string',
598 action = 'store', dest = 'hashfn', default = 'sha256',
599 help = 'Hash function for verifying the share.')])
601 ###--------------------------------------------------------------------------
602 ### Recover a secret from shares.
604 def cmd_recover(gopts, opts, args):
606 ## Keep track of how well we've done so far. Report all of the things
611 ## Read the system parameters.
612 line = SYS.stdin.readline()
613 if line.endswith('\n'):
616 params = Params.decode(line)
617 except ValueError, err:
618 moan("parameters (line %d) invalid: %s" % (lineno, err.message))
621 ## Now read the shares. Pay attention to syntax errors, but not to much
622 ## else. Build the maps for the sharing parameters.
625 for line in SYS.stdin:
627 if line.endswith('\n'):
630 share = Share.decode(line)
631 except ValueError, err:
632 moan("share invalid (line %d): %s" % (lineno, err.message))
635 share.lineno = lineno
636 if share.index in shmap:
637 moan("duplicate share index "
638 "(line %d, index %d previously on line %d)" %
639 (lineno, share.index, shmap[share.index].lineno))
641 elif share.index < 0 or share.index >= params.count:
642 moan("share index %d out of range (line %d)" % (share.index, lineno))
644 elif len(shmap) >= params.thresh:
645 moan("enough shares already (line %d)" % lineno)
648 sj.insert(share.index + 1, share.share)
649 shmap[share.index] = share
651 if len(shmap) < params.thresh:
652 moan("not enough shares")
658 ## Reassemble the secret.
659 secret = Secret(params.count, params.thresh, sj.recover())
661 if params.hash != secret.hash(params.hashfn):
662 die('secret doesn\'t match hash')
665 SYS.stdout.write(secret.secret)
667 with open(opts.file, 'wb') as f:
668 f.write(secret.secret)
670 OPTPARSE.add_subcommand(
671 'recover', cmd_recover,
674 Standard input should contain a number of lines. The first line should
675 contain the secret sharing parameters (the first line output by `issue');
676 the remaining lines should contain shares. The shares may be supplied in
677 any order. The secret is written to the OUTPUT file, or standard output.""",
678 [OPT.make_option('-o', '--output', action = 'store', type = 'string',
679 dest = 'file', default = '-',
680 help = 'Write the secret to FILE.')])
682 ###----- That's all, folks --------------------------------------------------
684 if __name__ == '__main__':