initial checkin: still somewhat sketchy
[distorted-keys] / shamir.in
1 #! @PYTHON@
2 ###
3 ### Shamir secret sharing
4 ###
5 ### (c) 2011 Mark Wooding
6 ###
7
8 ###----- Licensing notice ---------------------------------------------------
9 ###
10 ### This program is free software; you can redistribute it and/or modify
11 ### it under the terms of the GNU General Public License as published by
12 ### the Free Software Foundation; either version 2 of the License, or
13 ### (at your option) any later version.
14 ###
15 ### This program is distributed in the hope that it will be useful,
16 ### but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ### GNU General Public License for more details.
19 ###
20 ### You should have received a copy of the GNU General Public License
21 ### along with this program; if not, write to the Free Software Foundation,
22 ### Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
23
24 from __future__ import with_statement
25
26 import sys as SYS
27 import os as OS
28 import hashlib as H
29 import base64 as B
30 from cStringIO import StringIO
31 import optparse as OPT
32
33 ###--------------------------------------------------------------------------
34 ### Arithmetic in GF(2^8).
35 ###
36 ### These functions implement arithmetic in a field containing exactly 256
37 ### elements. This is convenient, because there's exactly one field element
38 ### for each one-byte value.
39 ###
40 ### Actually, we only implement multiplication and division, because addition
41 ### and subtraction are trivial -- they're the XOR operation that Python
42 ### already has. Multiplication and (especially) division are fiddly if you
43 ### do them directly, but the field is easily small enough that log tables
44 ### are a feasible alternative.
45 ###
46 ### There are many (isomorphic) representations of the field GF(2^8). The
47 ### one we use here is a field of residue classes of binary polynomials:
48 ### GF(2)[x]/(x^8 + x^4 + x^3 + x^2 + 1). Usefully, the element [x] (i.e.,
49 ### the residue class containing x) generates the multiplicative group of the
50 ### field: every nonzero element can be written in the form x^k for some
51 ### integer 0 <= k < 255. Multiplication by x is especially easy, so this is
52 ### a good choice as a base of logarithms.
53 ###
54 ### More precisely, we use an integer encoding of the residue classes. Each
55 ### class contains exactly one polynomial of degree less than 8, called the
56 ### principal representative. If the principal representative of a residue
57 ### class is a_7 x^7 + ... + a_1 x + a_0, with each a_i either 0 or 1, then
58 ### the encoding of the representative is 2^7 a_7 + ... + 2 a_1 + a_0. It is
59 ### easily verified that this mapping is invertible.
60
61 def gf28_log_tables():
62 """Build the log tables used to implement GF(2^8) arithmetic."""
63 global GF28_LOG, GF28_EXP
64 log = [0] * 256
65 exp = [0] * 256
66 x = 1
67 for i in xrange(255):
68 exp[i] = x
69 log[x] = i
70 x <<= 1
71 if x & 0x100:
72 x ^= 0x11d
73 GF28_LOG = log
74 GF28_EXP = exp[:255] * 2
75 gf28_log_tables()
76
77 def gf28_mul(x, y):
78 """Multiply two elements X and Y of GF(2^8), retturning their product."""
79 if x == 0 or y == 0: return 0
80 else: return GF28_EXP[GF28_LOG[x] + GF28_LOG[y]]
81
82 def gf28_div(x, y):
83 """
84 Divide element X by Y in GF(2^8), returning the quotient.
85
86 Raise `ZeroDivisionError' if Y is zero.
87 """
88 if y == 0: raise ZeroDivisionError
89 elif x == 0: return 0
90 else: return GF28_EXP[GF28_LOG[x] + 255 - GF28_LOG[y]]
91
92 ###--------------------------------------------------------------------------
93 ### Secret sharing machinery.
94 ###
95 ### Shamir's secret sharing scheme uses polynomial interpolation. Let K be a
96 ### finite field, and let s -- the `secret' -- be an element of K. Let t --
97 ### the `threshold' -- be a positive integer. A degree-(t - 1) polynomial
98 ### p(x) over K has the general form a_{t-1} x^{t-1} + ... + a_1 x + a_0; we
99 ### then have p(0) = a_0. If we choose a_0 = s, and the other a_i uniformly
100 ### at random from K, then we have chosen a degree-(t - 1) polynomial
101 ### uniformly at random, subject to the restrictiion that it evaluates to s
102 ### at 0. We can issue shares of the form (x, p(x)) for 0 < x < #K.
103 ###
104 ### Any collection of t shares can be used to reconstruct the polynomial. We
105 ### can use Lagrange interpolation to do this. Suppose that we're given t
106 ### different shares (x_i, y_i) for 0 <= i < t. Firstly, set
107 ###
108 ### ------- x - x_j
109 ### l_i(x) = | | ---------
110 ### | | x_i - x_j
111 ### 0 <= j < t
112 ### j /= i
113 ###
114 ### Now notice that l_i(x_i) = 1 and l_i(x_j) = 0 if j /= i. Then
115 ###
116 ### ----
117 ### p(x) = > l_i(x) y_i
118 ### ----
119 ### 0 <= i < t
120 ###
121 ### To see this, it suffices to note that (a) l_i(x) (and therefore the sum)
122 ### is a degree-(t - 1) polynomial, and (b) it agrees with p(x) at t distinct
123 ### points, namely each of the x_i. So, with t shares, we can recover the
124 ### original polynomial, and hence p(0) = s. With t - 1 shares, we can add a
125 ### fake `share' (0, s') for any guessed s' in K and interpolate
126 ### corresponding values for the remaining shares; we therefore have perfect
127 ### secrecy.
128 ###
129 ### We use K = GF(2^8), as implemented above. Each byte of a long secret is
130 ### shared independently.
131 ###
132 ### All of this machinery is the same as in my Catacomb library and Daniel
133 ### Silverstone's `gfshare' package.
134
135 class SecretSplit (object):
136 """
137 Create shares of a secret.
138
139 Initialize with a secret S and a threshold T; then issue shares with the
140 `issue' method.
141 """
142
143 def __init__(me, s, t):
144 """
145 Initialize secret sharing.
146
147 S is a string containing the secret; T is the threshold, i.e., the number
148 of shares requierd to recover the secret.
149 """
150
151 ## The number of bytes in the secret is a useful thing to store.
152 me._n = len(s)
153
154 ## Build an array of coefficients chosen at random.
155 me._coeffs = [OS.urandom(me._n) for i in xrange(t - 1)] + [s]
156
157 def issue(me, x):
158 """
159 Issue the share with index X, returning it as a string.
160
161 The share will be binary, even if the original secret is plain text. The
162 index must be in the range 1 <= X <= 255.
163 """
164
165 ## Check that the index is in range.
166 if x < 1 or x > 255:
167 raise ValueError, 'share index out of range'
168
169 ## Evaluate the polynomial at the chosen index.
170 o = StringIO()
171 for i in xrange(me._n):
172 y = 0
173 for j in me._coeffs:
174 y = gf28_mul(x, y) ^ ord(j[i])
175 o.write(chr(y))
176 return o.getvalue()
177
178 class SecretJoin (object):
179 """
180 Recover a secret given a number of shares.
181 """
182 def __init__(me):
183 me._n = None
184 me._m = {}
185
186 def insert(me, x, p):
187 """
188 Insert a share P with index X.
189
190 All the shares must have the same length. It doesn't matter what the
191 length of the first share is, but the others have to match it.
192 """
193
194 ## Check that the share index is in range.
195 if x < 1 or x > 255:
196 raise ValueError, 'share index out of range'
197
198 ## Make sure that we haven't been given this one already.
199 if x in me._m:
200 raise ValueError, 'duplicate share index'
201
202 ## Check the length of the share. Store the length if this is the first
203 ## share.
204 if me._n is None:
205 me._n = len(p)
206 elif len(p) != me._n:
207 raise ValueError, 'bad share length'
208
209 ## Store the share.
210 me._m[x] = p
211
212 def recover(me):
213 """
214 Recover the secret from the shares provided, and return it as a string.
215
216 It's assumed that the caller provided the correct number of shares.
217 """
218
219 ## Iniitialize a map of l_i(0) values. These depend only on the share
220 ## indices provided, and are therefore independent of the secret length.
221 ll = {}
222 kk = me._m.keys()
223 for xi in kk:
224 l = 1
225 for xj in kk:
226 if xi != xj:
227 l = gf28_mul(l, gf28_div(xj, xi ^ xj))
228 ll[xi] = l
229
230 ## Now do the interpolation.
231 o = StringIO()
232 for i in xrange(me._n):
233 y = 0
234 for xi in kk:
235 y = y ^ gf28_mul(ll[xi], ord(me._m[xi][i]))
236 o.write(chr(y))
237 return o.getvalue()
238
239 ###--------------------------------------------------------------------------
240 ### Key/value encoding.
241 ###
242 ### We use a simple encoding for the data structures we handle. The encoding
243 ### looks like this:
244 ###
245 ### LABEL:KEY=VALUE;KEY=VALUE;...
246 ###
247 ### The LABEL just says what kind of structure this is meant to be. The KEYs
248 ### are short strings which identify the structure slots. We're doing this
249 ### on the cheap, so order is significant, and there are no creature comforts
250 ### like optional slots. The VALUEs are encodings of the slot values; the
251 ### details of the encoding depend on the VALUE type, which is statically
252 ### determined by the structure.
253
254 class StructureSlot (object):
255 """
256 Base class for structure slot types.
257
258 Subclasses must implement `encode' and `decode' methods.
259 """
260 def __init__(me, name):
261 """Initialize a structure slot, remembering its name."""
262 me._name = name
263
264 class IntegerSlot (StructureSlot):
265 """
266 An integer slot.
267 """
268 TYPE = 'integer'
269 def encode(me, value):
270 return '%d' % value
271 def decode(me, string):
272 return int(string)
273
274 class StringSlot (StructureSlot):
275 """
276 A string slot, unencoded. Be careful.
277 """
278 TYPE = 'string'
279 def encode(me, value):
280 return value
281 def decode(me, string):
282 return string
283
284 class BinarySlot (StructureSlot):
285 """
286 A binary slot, encoded as Base64.
287 """
288 TYPE = 'base64'
289 def encode(me, value):
290 return B.b64encode(value)
291 def decode(me, string):
292 return B.b64decode(string)
293
294 class StructureType (object):
295 """
296 Base class for classes which can encode and decode their slots.
297 """
298
299 def __init__(me, *args, **kw):
300 """
301 Initialize a structure object.
302
303 The arguments correspond to slots. Positional arguments are read first,
304 and stored in the appropriate attributes. Keywords arguments are used to
305 set arbitrary attributes, but it is an error to set an attribute which
306 already has a value.
307
308 The `decoded' method is called after argument processing.
309 """
310
311 _magic = False
312
313 ## Decode the arguments.
314 if len(args) > len(me.SLOTS):
315 raise ValueError, 'too many positional arguments'
316 for i in xrange(len(args)):
317 slotdef, attr = me.SLOTS[i]
318 setattr(me, attr, args[i])
319 for k, v in kw.iteritems():
320 if k == '_magic':
321 _magic = v
322 continue
323 if hasattr(me, k):
324 raise ValueError, 'already set attribute'
325 setattr(me, k, v)
326
327 ## Initialize the object. (There's a hack here for the `decode' method.)
328 if not _magic:
329 me.decoded()
330
331 def encode(me):
332 """
333 Encode an object as a string.
334 """
335 o = StringIO()
336 o.write(me.LABEL)
337 o.write(':')
338 sep = ''
339 for slotdef, attr in me.SLOTS:
340 o.write(sep)
341 o.write('%s=%s' % (slotdef._name, slotdef.encode(getattr(me, attr))))
342 sep = ';'
343 return o.getvalue()
344
345 def save(me, path):
346 """
347 Write an encoded description of the object to PATH.
348 """
349 with open(path + '.new', 'w') as f:
350 f.write(me.encode())
351 f.write('\n')
352 OS.rename(path + '.new', path)
353
354 @classmethod
355 def decode(cls, string):
356 """
357 Decode a STRING, returning a new object.
358
359 The `decoded' method is called on the new object.
360 """
361 if not string.startswith(cls.LABEL + ':'):
362 raise SyntaxError, 'incorrect label'
363 slots = string[len(cls.LABEL) + 1:].split(';')
364 if len(slots) != len(cls.SLOTS):
365 raise SyntaxError, 'wrong number of slots'
366 obj = cls(_magic = True)
367 for i in xrange(len(slots)):
368 slot = slots[i]
369 slotdef, attr = cls.SLOTS[i]
370 if not slot.startswith(slotdef._name + '='):
371 raise SyntaxError, 'incorrect slot name'
372 try:
373 val = slotdef.decode(slot[len(slotdef._name) + 1:])
374 except (ValueError, SyntaxError):
375 raise SyntaxError, 'invalid %s value' % slotdef.TYPE
376 setattr(obj, attr, val)
377 obj.decoded()
378 return obj
379
380 @classmethod
381 def load(cls, path):
382 """
383 Read an encoded description of a structure and return the decoded object.
384 """
385 with open(path, 'r') as f:
386 cont = f.read()
387 if cont and cont[-1] == '\n':
388 cont = cont[:-1]
389 if '\n' in cont:
390 raise SyntaxError, 'encoding must be a single line'
391 return cls.decode(cont)
392
393 def decoded(me):
394 """
395 Hook for object initialization.
396
397 This is called by `__init__' and `decode'.
398 """
399 pass
400
401 ###--------------------------------------------------------------------------
402 ### Higher-level share management.
403 ###
404 ### We store shares together with a bunch of metadata about the sharing
405 ### parameters, and a hash of all of that together with the final secret.
406 ### Theoretically, storing a hash weakens the whole scheme from providing
407 ### unconditional security to merely providing computational security (based
408 ### on the assupmption that the hash function is one-way). In practice, the
409 ### secret will be used as a symmetric encryption key or similar, so it's
410 ### already only secure against computationally bounded adversaries.
411
412 class Secret (StructureType):
413 """
414 Represents a secret, either about to be shared or recently recovered.
415 """
416 LABEL = 'shamir-secret'
417 SLOTS = [(IntegerSlot('n'), 'count'),
418 (IntegerSlot('t'), 'thresh'),
419 (BinarySlot('s'), 'secret')]
420
421 def hash(me, hashfn):
422 """
423 Hash the secret and its parameters using the hash function HASHFN.
424
425 The hash is returned as a raw binary string. The HASHFN must be one of
426 the hash functions known to Python's `hashlib' module (essentially, hash
427 functions known to OpenSSL).
428 """
429 h = H.new(hashfn)
430 h.update(me.encode())
431 return h.digest()
432
433 class Params (StructureType):
434 """
435 Represents secret sharing parameters.
436 """
437 LABEL = 'shamir-params'
438 SLOTS = [(IntegerSlot('n'), 'count'),
439 (IntegerSlot('t'), 'thresh'),
440 (StringSlot('f'), 'hashfn'),
441 (BinarySlot('h'), 'hash')]
442
443 class Share (StructureType):
444 """
445 Represents a share of a secret.
446 """
447 LABEL = 'shamir-share'
448 SLOTS = [(IntegerSlot('i'), 'index'),
449 (BinarySlot('y'), 'share')]
450
451 ###--------------------------------------------------------------------------
452 ### Utilities.
453
454 QUIS = OS.path.basename(SYS.argv[0])
455
456 def moan(message):
457 SYS.stderr.write('%s: %s\n' % (QUIS, message))
458
459 def die(message, rc = 1):
460 moan(message)
461 SYS.exit(rc)
462
463 class struct (object):
464 def __init__(me, **kw):
465 me.__dict__.update(kw)
466
467 class Subcommand (struct):
468 pass
469
470 class SubcommandOptionParser (OPT.OptionParser, object):
471
472 def __init__(me, usage = '%prog [-OPTIONS] COMMAND [ARGS ...]',
473 *args, **kw):
474 super(SubcommandOptionParser, me).__init__(usage = usage, *args, **kw)
475 me.subcommands = [
476 Subcommand(name = 'help', usage = '[COMMAND ...]', opts = [],
477 func = me.cmd_help,
478 desc = 'Show help for %prog, or for the COMMANDs.')
479 ]
480 me.disable_interspersed_args()
481
482 def add_subcommand(me, name, func, usage, desc = None, opts = []):
483 subcmd = Subcommand(name = name, func = func, usage = usage,
484 desc = desc, opts = opts)
485 me.subcommands.append(subcmd)
486
487 def lookup_subcommand(me, name):
488 match = []
489 for sub in me.subcommands:
490 if sub.name == name:
491 return sub
492 elif sub.name.startswith(name):
493 match.append(sub)
494 if len(match) == 1:
495 return match[0]
496 elif len(match) == 0:
497 me.error("unknown command `%s'"% name)
498 else:
499 me.error("ambiguous command `%s': could be any of %s"
500 % (name, ', '.join(["`%s'" % sub.name for sub in match])))
501
502 def subcmd_usage(me, sub):
503 if sub.usage:
504 return '%s %s' % (sub.name, sub.usage)
505 else:
506 return sub.name
507
508 def print_help(me):
509 super(SubcommandOptionParser, me).print_help()
510 print
511 print 'Commands:'
512 for sub in me.subcommands:
513 if sub.usage is None:
514 continue
515 print ' ' + me.subcmd_usage(sub)
516
517 def make_subcommand_optparser(me, sub):
518 op = OPT.OptionParser(usage = 'usage: %%prog %s' % me.subcmd_usage(sub),
519 description = sub.desc)
520 for opt in sub.opts:
521 op.add_option(opt)
522 return op
523
524 def cmd_help(me, gopts, opts, args):
525 if not args:
526 me.print_help()
527 else:
528 for name in args:
529 sub = me.lookup_subcommand(name)
530 op = me.make_subcommand_optparser(sub)
531 op.print_help()
532
533 def dispatch(me, args = None):
534 if args is None:
535 args = SYS.argv[1:]
536 gopts, args = me.parse_args(args)
537 if not args:
538 me.error('missing command')
539 sub = me.lookup_subcommand(args[0])
540 op = me.make_subcommand_optparser(sub)
541 opts, args = op.parse_args(args[1:])
542 sub.func(gopts, opts, args)
543
544 OPTPARSE = SubcommandOptionParser(description = """\
545 Split and recombine secrets using Shamir's secret sharing system.
546 """)
547
548 ###--------------------------------------------------------------------------
549 ### Issue shares given a secret.
550
551 def parse_sharing(string):
552 slash = string.find('/')
553 if slash == -1:
554 raise ValueError, "no `/'"
555 return int(string[slash + 1:]), int(string[:slash])
556
557 def cmd_issue(gopts, opts, args):
558
559 if len(args) < 1 or len(args) > 2:
560 OPTPARSE.error('bad arguments')
561
562 try:
563 count, thresh = parse_sharing(args[0])
564 except ValueError, err:
565 OPTPARSE.error("bad sharing parameters `%s': %s"
566 % (args[0], err.message))
567
568 if len(args) < 2 or args[1] == '-':
569 sec = SYS.stdin.read()
570 else:
571 with open(args[1], 'rb') as f:
572 sec = f.read()
573
574 secret = Secret(count, thresh, sec)
575 h = secret.hash(opts.hashfn)
576 params = Params(count, thresh, opts.hashfn, h)
577 print params.encode()
578
579 ss = SecretSplit(sec, thresh)
580 for i in xrange(count):
581 share = Share(i, ss.issue(i + 1))
582 print share.encode()
583
584 OPTPARSE.add_subcommand(
585 'issue', cmd_issue,
586 '[-H HASHFN] THRESH/COUNT [SECRET-FILE]',
587 """\
588 Issue shares of a secret. The secret (as a binary lump) is read from
589 SECRET-FILE, or standard input if SECRET-FILE is omitted. COUNT + 1 lines
590 are written to standard output: the first line contains the sharing
591 parameters, and does not need to be kept especially secret (though it
592 contains a hash of the secret, so it compromises the unconditional security
593 of the secret sharing scheme); the remaining lines are the shares, one per
594 line. All of the lines are plain text.""",
595 [OPT.make_option('-H', '--hash-function', type = 'string',
596 action = 'store', dest = 'hashfn', default = 'sha256',
597 help = 'Hash function for verifying the share.')])
598
599 ###--------------------------------------------------------------------------
600 ### Recover a secret from shares.
601
602 def cmd_recover(gopts, opts, args):
603
604 ## Keep track of how well we've done so far. Report all of the things
605 ## which go wrong.
606 lose = False
607 lineno = 1
608
609 ## Read the system parameters.
610 line = SYS.stdin.readline()
611 if line.endswith('\n'):
612 line = line[:-1]
613 try:
614 params = Params.decode(line)
615 except ValueError, err:
616 moan("parameters (line %d) invalid: %s" % (lineno, err.message))
617 lose = True
618
619 ## Now read the shares. Pay attention to syntax errors, but not to much
620 ## else. Build the maps for the sharing parameters.
621 sj = SecretJoin()
622 shmap = {}
623 for line in SYS.stdin:
624 lineno += 1
625 if line.endswith('\n'):
626 line = line[:-1]
627 try:
628 share = Share.decode(line)
629 except ValueError, err:
630 moan("share invalid (line %d): %s" % (lineno, err.message))
631 lose = True
632 continue
633 share.lineno = lineno
634 if share.index in shmap:
635 moan("duplicate share index "
636 "(line %d, index %d previously on line %d)" %
637 (lineno, share.index, shmap[share.index].lineno))
638 lose = True
639 elif share.index < 0 or share.index >= params.count:
640 moan("share index %d out of range (line %d)" % (share.index, lineno))
641 lose = True
642 elif len(shmap) >= params.thresh:
643 moan("enough shares already (line %d)" % lineno)
644 lose = True
645 else:
646 sj.insert(share.index + 1, share.share)
647 shmap[share.index] = share
648
649 if len(shmap) < params.thresh:
650 moan("not enough shares")
651 lose = True
652
653 if lose:
654 SYS.exit(1)
655
656 ## Reassemble the secret.
657 secret = Secret(params.count, params.thresh, sj.recover())
658
659 if params.hash != secret.hash(params.hashfn):
660 die('secret doesn\'t match hash')
661
662 if opts.file == '-':
663 SYS.stdout.write(secret.secret)
664 else:
665 with open(opts.file, 'wb') as f:
666 f.write(secret.secret)
667
668 OPTPARSE.add_subcommand(
669 'recover', cmd_recover,
670 '',
671 """\
672 Standard input should contain a number of lines. The first line should
673 contain the secret sharing parameters (the first line output by `issue');
674 the remaining lines should contain shares. The shares may be supplied in
675 any order. The secret is written to the OUTPUT file, or standard output.""",
676 [OPT.make_option('-o', '--output', action = 'store', type = 'string',
677 dest = 'file', default = '-',
678 help = 'Write the secret to FILE.')])
679
680 ###----- That's all, folks --------------------------------------------------
681
682 if __name__ == '__main__':
683 OPTPARSE.dispatch()