3 ### Shamir secret sharing
5 ### (c) 2011 Mark Wooding
8 ###----- Licensing notice ---------------------------------------------------
10 ### This file is part of the distorted.org.uk key management suite.
12 ### distorted-keys is free software; you can redistribute it and/or modify
13 ### it under the terms of the GNU General Public License as published by
14 ### the Free Software Foundation; either version 2 of the License, or
15 ### (at your option) any later version.
17 ### distorted-keys is distributed in the hope that it will be useful,
18 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 ### GNU General Public License for more details.
22 ### You should have received a copy of the GNU General Public License
23 ### along with distorted-keys; if not, write to the Free Software Foundation,
24 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
26 from __future__ import with_statement
32 from cStringIO import StringIO
33 import optparse as OPT
38 ###--------------------------------------------------------------------------
39 ### Arithmetic in GF(2^8).
41 ### These functions implement arithmetic in a field containing exactly 256
42 ### elements. This is convenient, because there's exactly one field element
43 ### for each one-byte value.
45 ### Actually, we only implement multiplication and division, because addition
46 ### and subtraction are trivial -- they're the XOR operation that Python
47 ### already has. Multiplication and (especially) division are fiddly if you
48 ### do them directly, but the field is easily small enough that log tables
49 ### are a feasible alternative.
51 ### There are many (isomorphic) representations of the field GF(2^8). The
52 ### one we use here is a field of residue classes of binary polynomials:
53 ### GF(2)[x]/(x^8 + x^4 + x^3 + x^2 + 1). Usefully, the element [x] (i.e.,
54 ### the residue class containing x) generates the multiplicative group of the
55 ### field: every nonzero element can be written in the form x^k for some
56 ### integer 0 <= k < 255. Multiplication by x is especially easy, so this is
57 ### a good choice as a base of logarithms.
59 ### More precisely, we use an integer encoding of the residue classes. Each
60 ### class contains exactly one polynomial of degree less than 8, called the
61 ### principal representative. If the principal representative of a residue
62 ### class is a_7 x^7 + ... + a_1 x + a_0, with each a_i either 0 or 1, then
63 ### the encoding of the representative is 2^7 a_7 + ... + 2 a_1 + a_0. It is
64 ### easily verified that this mapping is invertible.
66 def gf28_log_tables():
67 """Build the log tables used to implement GF(2^8) arithmetic."""
68 global GF28_LOG, GF28_EXP
79 GF28_EXP = exp[:255] * 2
83 """Multiply two elements X and Y of GF(2^8), retturning their product."""
84 if x == 0 or y == 0: return 0
85 else: return GF28_EXP[GF28_LOG[x] + GF28_LOG[y]]
89 Divide element X by Y in GF(2^8), returning the quotient.
91 Raise `ZeroDivisionError' if Y is zero.
93 if y == 0: raise ZeroDivisionError
95 else: return GF28_EXP[GF28_LOG[x] + 255 - GF28_LOG[y]]
97 ###--------------------------------------------------------------------------
98 ### Secret sharing machinery.
100 ### Shamir's secret sharing scheme uses polynomial interpolation. Let K be a
101 ### finite field, and let s -- the `secret' -- be an element of K. Let t --
102 ### the `threshold' -- be a positive integer. A degree-(t - 1) polynomial
103 ### p(x) over K has the general form a_{t-1} x^{t-1} + ... + a_1 x + a_0; we
104 ### then have p(0) = a_0. If we choose a_0 = s, and the other a_i uniformly
105 ### at random from K, then we have chosen a degree-(t - 1) polynomial
106 ### uniformly at random, subject to the restrictiion that it evaluates to s
107 ### at 0. We can issue shares of the form (x, p(x)) for 0 < x < #K.
109 ### Any collection of t shares can be used to reconstruct the polynomial. We
110 ### can use Lagrange interpolation to do this. Suppose that we're given t
111 ### different shares (x_i, y_i) for 0 <= i < t. Firstly, set
114 ### l_i(x) = | | ---------
119 ### Now notice that l_i(x_i) = 1 and l_i(x_j) = 0 if j /= i. Then
122 ### p(x) = > l_i(x) y_i
126 ### To see this, it suffices to note that (a) l_i(x) (and therefore the sum)
127 ### is a degree-(t - 1) polynomial, and (b) it agrees with p(x) at t distinct
128 ### points, namely each of the x_i. So, with t shares, we can recover the
129 ### original polynomial, and hence p(0) = s. With t - 1 shares, we can add a
130 ### fake `share' (0, s') for any guessed s' in K and interpolate
131 ### corresponding values for the remaining shares; we therefore have perfect
134 ### We use K = GF(2^8), as implemented above. Each byte of a long secret is
135 ### shared independently.
137 ### All of this machinery is the same as in my Catacomb library and Daniel
138 ### Silverstone's `gfshare' package.
140 class SecretSplit (object):
142 Create shares of a secret.
144 Initialize with a secret S and a threshold T; then issue shares with the
148 def __init__(me, s, t):
150 Initialize secret sharing.
152 S is a string containing the secret; T is the threshold, i.e., the number
153 of shares requierd to recover the secret.
156 ## The number of bytes in the secret is a useful thing to store.
159 ## Build an array of coefficients chosen at random.
160 me._coeffs = [OS.urandom(me._n) for i in xrange(t - 1)] + [s]
164 Issue the share with index X, returning it as a string.
166 The share will be binary, even if the original secret is plain text. The
167 index must be in the range 1 <= X <= 255.
170 ## Check that the index is in range.
172 raise ValueError, 'share index out of range'
174 ## Evaluate the polynomial at the chosen index.
176 for i in xrange(me._n):
179 y = gf28_mul(x, y) ^ ord(j[i])
183 class SecretJoin (object):
185 Recover a secret given a number of shares.
191 def insert(me, x, p):
193 Insert a share P with index X.
195 All the shares must have the same length. It doesn't matter what the
196 length of the first share is, but the others have to match it.
199 ## Check that the share index is in range.
201 raise ValueError, 'share index out of range'
203 ## Make sure that we haven't been given this one already.
205 raise ValueError, 'duplicate share index'
207 ## Check the length of the share. Store the length if this is the first
211 elif len(p) != me._n:
212 raise ValueError, 'bad share length'
219 Recover the secret from the shares provided, and return it as a string.
221 It's assumed that the caller provided the correct number of shares.
224 ## Iniitialize a map of l_i(0) values. These depend only on the share
225 ## indices provided, and are therefore independent of the secret length.
232 l = gf28_mul(l, gf28_div(xj, xi ^ xj))
235 ## Now do the interpolation.
237 for i in xrange(me._n):
240 y = y ^ gf28_mul(ll[xi], ord(me._m[xi][i]))
244 ###--------------------------------------------------------------------------
245 ### Key/value encoding.
247 ### We use a simple encoding for the data structures we handle. The encoding
250 ### LABEL:KEY=VALUE;KEY=VALUE;...
252 ### The LABEL just says what kind of structure this is meant to be. The KEYs
253 ### are short strings which identify the structure slots. We're doing this
254 ### on the cheap, so order is significant, and there are no creature comforts
255 ### like optional slots. The VALUEs are encodings of the slot values; the
256 ### details of the encoding depend on the VALUE type, which is statically
257 ### determined by the structure.
259 class StructureSlot (object):
261 Base class for structure slot types.
263 Subclasses must implement `encode' and `decode' methods.
265 def __init__(me, name):
266 """Initialize a structure slot, remembering its name."""
269 class IntegerSlot (StructureSlot):
274 def encode(me, value):
276 def decode(me, string):
279 class StringSlot (StructureSlot):
281 A string slot, unencoded. Be careful.
284 def encode(me, value):
286 def decode(me, string):
289 class BinarySlot (StructureSlot):
291 A binary slot, encoded as Base64.
294 def encode(me, value):
295 return B.b64encode(value)
296 def decode(me, string):
297 return B.b64decode(string)
299 class StructureType (object):
301 Base class for classes which can encode and decode their slots.
304 def __init__(me, *args, **kw):
306 Initialize a structure object.
308 The arguments correspond to slots. Positional arguments are read first,
309 and stored in the appropriate attributes. Keywords arguments are used to
310 set arbitrary attributes, but it is an error to set an attribute which
313 The `decoded' method is called after argument processing.
318 ## Decode the arguments.
319 if len(args) > len(me.SLOTS):
320 raise ValueError, 'too many positional arguments'
321 for i in xrange(len(args)):
322 slotdef, attr = me.SLOTS[i]
323 setattr(me, attr, args[i])
324 for k, v in kw.iteritems():
329 raise ValueError, 'already set attribute'
332 ## Initialize the object. (There's a hack here for the `decode' method.)
338 Encode an object as a string.
344 for slotdef, attr in me.SLOTS:
346 o.write('%s=%s' % (slotdef._name, slotdef.encode(getattr(me, attr))))
352 Write an encoded description of the object to PATH.
354 with open(path + '.new', 'w') as f:
357 OS.rename(path + '.new', path)
360 def decode(cls, string):
362 Decode a STRING, returning a new object.
364 The `decoded' method is called on the new object.
366 if not string.startswith(cls.LABEL + ':'):
367 raise SyntaxError, 'incorrect label'
368 slots = string[len(cls.LABEL) + 1:].split(';')
369 if len(slots) != len(cls.SLOTS):
370 raise SyntaxError, 'wrong number of slots'
371 obj = cls(_magic = True)
372 for i in xrange(len(slots)):
374 slotdef, attr = cls.SLOTS[i]
375 if not slot.startswith(slotdef._name + '='):
376 raise SyntaxError, 'incorrect slot name'
378 val = slotdef.decode(slot[len(slotdef._name) + 1:])
379 except (ValueError, SyntaxError):
380 raise SyntaxError, 'invalid %s value' % slotdef.TYPE
381 setattr(obj, attr, val)
388 Read an encoded description of a structure and return the decoded object.
390 with open(path, 'r') as f:
392 if cont and cont[-1] == '\n':
395 raise SyntaxError, 'encoding must be a single line'
396 return cls.decode(cont)
400 Hook for object initialization.
402 This is called by `__init__' and `decode'.
406 ###--------------------------------------------------------------------------
407 ### Higher-level share management.
409 ### We store shares together with a bunch of metadata about the sharing
410 ### parameters, and a hash of all of that together with the final secret.
411 ### Theoretically, storing a hash weakens the whole scheme from providing
412 ### unconditional security to merely providing computational security (based
413 ### on the assupmption that the hash function is one-way). In practice, the
414 ### secret will be used as a symmetric encryption key or similar, so it's
415 ### already only secure against computationally bounded adversaries.
417 class Secret (StructureType):
419 Represents a secret, either about to be shared or recently recovered.
421 LABEL = 'shamir-secret'
422 SLOTS = [(IntegerSlot('n'), 'count'),
423 (IntegerSlot('t'), 'thresh'),
424 (BinarySlot('s'), 'secret')]
426 def hash(me, hashfn):
428 Hash the secret and its parameters using the hash function HASHFN.
430 The hash is returned as a raw binary string. The HASHFN must be one of
431 the hash functions known to Python's `hashlib' module (essentially, hash
432 functions known to OpenSSL).
435 h.update(me.encode())
438 class Params (StructureType):
440 Represents secret sharing parameters.
442 LABEL = 'shamir-params'
443 SLOTS = [(IntegerSlot('n'), 'count'),
444 (IntegerSlot('t'), 'thresh'),
445 (StringSlot('f'), 'hashfn'),
446 (BinarySlot('h'), 'hash')]
448 class Share (StructureType):
450 Represents a share of a secret.
452 LABEL = 'shamir-share'
453 SLOTS = [(IntegerSlot('i'), 'index'),
454 (BinarySlot('y'), 'share')]
456 ###--------------------------------------------------------------------------
459 QUIS = OS.path.basename(SYS.argv[0])
462 SYS.stderr.write('%s: %s\n' % (QUIS, message))
464 def die(message, rc = 1):
468 class struct (object):
469 def __init__(me, **kw):
470 me.__dict__.update(kw)
472 class Subcommand (struct):
475 class SubcommandOptionParser (OPT.OptionParser, object):
477 def __init__(me, usage = '%prog [-OPTIONS] COMMAND [ARGS ...]',
479 super(SubcommandOptionParser, me).__init__(usage = usage, *args, **kw)
481 Subcommand(name = 'help', usage = '[COMMAND ...]', opts = [],
483 desc = 'Show help for %prog, or for the COMMANDs.')
485 me.disable_interspersed_args()
487 def add_subcommand(me, name, func, usage, desc = None, opts = []):
488 subcmd = Subcommand(name = name, func = func, usage = usage,
489 desc = desc, opts = opts)
490 me.subcommands.append(subcmd)
492 def lookup_subcommand(me, name):
494 for sub in me.subcommands:
497 elif sub.name.startswith(name):
501 elif len(match) == 0:
502 me.error("unknown command `%s'"% name)
504 me.error("ambiguous command `%s': could be any of %s"
505 % (name, ', '.join(["`%s'" % sub.name for sub in match])))
507 def subcmd_usage(me, sub):
509 return '%s %s' % (sub.name, sub.usage)
514 super(SubcommandOptionParser, me).print_help()
517 for sub in me.subcommands:
518 if sub.usage is None:
520 print ' ' + me.subcmd_usage(sub)
522 def make_subcommand_optparser(me, sub):
523 op = OPT.OptionParser(usage = 'usage: %%prog %s' % me.subcmd_usage(sub),
524 description = sub.desc)
529 def cmd_help(me, gopts, opts, args):
534 sub = me.lookup_subcommand(name)
535 op = me.make_subcommand_optparser(sub)
538 def dispatch(me, args = None):
541 gopts, args = me.parse_args(args)
543 me.error('missing command')
544 sub = me.lookup_subcommand(args[0])
545 op = me.make_subcommand_optparser(sub)
546 opts, args = op.parse_args(args[1:])
547 sub.func(gopts, opts, args)
549 def subcommand(name, usage, desc, opts = []):
551 OPTPARSE.add_subcommand(name, func, usage, desc, opts)
555 OPTPARSE = SubcommandOptionParser(
556 usage = '%prog SUBCOMMAND [ARGS ...]',
557 version = '%%prog, %s version %s' % (PACKAGE, VERSION),
559 Split and recombine secrets using Shamir's secret sharing system.
562 ###--------------------------------------------------------------------------
563 ### Issue shares given a secret.
565 def parse_sharing(string):
566 slash = string.find('/')
568 raise ValueError, "no `/'"
569 return int(string[slash + 1:]), int(string[:slash])
571 @subcommand('issue', '[-H HASHFN] THRESH/COUNT [SECRET-FILE]',
573 Issue shares of a secret. The secret (as a binary lump) is read from
574 SECRET-FILE, or standard input if SECRET-FILE is omitted. COUNT + 1 lines
575 are written to standard output: the first line contains the sharing
576 parameters, and does not need to be kept especially secret (though it
577 contains a hash of the secret, so it compromises the unconditional security
578 of the secret sharing scheme); the remaining lines are the shares, one per
579 line. All of the lines are plain text.""",
580 [OPT.make_option('-H', '--hash-function', type = 'string',
581 action = 'store', dest = 'hashfn', default = 'sha256',
582 help = 'Hash function for verifying the share.')])
583 def cmd_issue(gopts, opts, args):
585 if len(args) < 1 or len(args) > 2:
586 OPTPARSE.error('bad arguments')
589 count, thresh = parse_sharing(args[0])
590 except ValueError, err:
591 OPTPARSE.error("bad sharing parameters `%s': %s"
592 % (args[0], err.message))
594 if len(args) < 2 or args[1] == '-':
595 sec = SYS.stdin.read()
597 with open(args[1], 'rb') as f:
600 secret = Secret(count, thresh, sec)
601 h = secret.hash(opts.hashfn)
602 params = Params(count, thresh, opts.hashfn, h)
603 print params.encode()
605 ss = SecretSplit(sec, thresh)
606 for i in xrange(count):
607 share = Share(i, ss.issue(i + 1))
610 ###--------------------------------------------------------------------------
611 ### Recover a secret from shares.
613 @subcommand('recover', '',
615 Standard input should contain a number of lines. The first line should
616 contain the secret sharing parameters (the first line output by `issue');
617 the remaining lines should contain shares. The shares may be supplied in
618 any order. The secret is written to the OUTPUT file, or standard output.""",
619 [OPT.make_option('-o', '--output', action = 'store', type = 'string',
620 dest = 'file', default = '-',
621 help = 'Write the secret to FILE.')])
622 def cmd_recover(gopts, opts, args):
624 ## Keep track of how well we've done so far. Report all of the things
629 ## Read the system parameters.
630 line = SYS.stdin.readline()
631 if line.endswith('\n'):
634 params = Params.decode(line)
635 except ValueError, err:
636 moan("parameters (line %d) invalid: %s" % (lineno, err.message))
639 ## Now read the shares. Pay attention to syntax errors, but not to much
640 ## else. Build the maps for the sharing parameters.
643 for line in SYS.stdin:
645 if line.endswith('\n'):
648 share = Share.decode(line)
649 except ValueError, err:
650 moan("share invalid (line %d): %s" % (lineno, err.message))
653 share.lineno = lineno
654 if share.index in shmap:
655 moan("duplicate share index "
656 "(line %d, index %d previously on line %d)" %
657 (lineno, share.index, shmap[share.index].lineno))
659 elif share.index < 0 or share.index >= params.count:
660 moan("share index %d out of range (line %d)" % (share.index, lineno))
662 elif len(shmap) >= params.thresh:
663 moan("enough shares already (line %d)" % lineno)
666 sj.insert(share.index + 1, share.share)
667 shmap[share.index] = share
669 if len(shmap) < params.thresh:
670 moan("not enough shares")
676 ## Reassemble the secret.
677 secret = Secret(params.count, params.thresh, sj.recover())
679 if params.hash != secret.hash(params.hashfn):
680 die('secret doesn\'t match hash')
683 SYS.stdout.write(secret.secret)
685 with open(opts.file, 'wb') as f:
686 f.write(secret.secret)
688 ###----- That's all, folks --------------------------------------------------
690 if __name__ == '__main__':