mdw@git.distorted.org.uk Git - autoys/blob - flaccrip/flaccrip-discid

   1 #! /bin/bash
   2
   3 set -e
   4 : ${JBDIR=/mnt/jb}
   5
   6 ###--------------------------------------------------------------------------
   7 ### CD identification algorithms.
   8 ###
   9 ### 1. CDDB
  10 ###
  11 ### CCLLLLNN [NTRACK TRACK-START... LENGTH]
  12 ###
  13 ### CC is a checksum of the track start times; LLLL is the offset of the
  14 ### leadout track, in seconds (rounded down), and NN is the total number of
  15 ### tracks.  All of these are in hexadecimal, and include the 150-frame (2
  16 ### second) pre-gap.  All of these are in hexadecimal.  Since a CD can have
  17 ### at most 99 tracks, and can contain no more than 90 minutes of audio (!),
  18 ### the other two items fit without needing reduction.
  19 ###
  20 ### The checksum is the sum of the decimal digits of the track start times,
  21 ### in seconds, reduced modulo 255.
  22 ###
  23 ### NTRACK is the number of tracks; LENGTH is the offset of the leadout in
  24 ### seconds.  These are the same as in the checksum, so repeating them is
  25 ### pointless, but it's done anyway.  The TRACK-STARTs are the track start
  26 ### offsets, in frames.
  27 ###
  28 ###
  29 ### 2. AccurateRip
  30 ###
  31 ### DA1-DA2-CDDBID
  32 ###
  33 ### CDDBID is the CDDB id as described above.  DA1 is simply the sum of the
  34 ### track starts, including the lead-out track; DA2 is the sum of the
  35 ### products TRACKNO * OFFSET for the audio tracks only, but including the
  36 ### final lead-out -- so a data track makes the last audio track look very
  37 ### long.  Another wrinkle: the OFFSET for the first track is forced to 1 if
  38 ### it's zero (to avoid the entry being lost, I presume, though I'm not sure
  39 ### why this is ever so useful).
  40 ###
  41 ###
  42 ### 3. MusicBrainz
  43 ###
  44 ### The MusicBrainz identification is a base64-encoded SHA-1 hash of the
  45 ### table of contents.  The base64 encoding uses `.', `_' and `-' in place of
  46 ### `+', `/' and `=', because the standard characters /all/ have special
  47 ### meanings in URL query strings.  (Duh.  And I'm not quite sure why we
  48 ### still need the trailing marker.)
  49 ###
  50 ### The message to be hashed is FIRST LAST LENGTH TRACK-START..., where FIRST
  51 ### and LAST are the first and last track numbers, LENGTH is the offset of
  52 ### the lead-out, in frames, and the TRACK-STARTs are the start offsets of
  53 ### the tracks, in order, also in frames.  The track numbers are two
  54 ### uppercase hex digits; the frame offsets are eight.  All of these are
  55 ### simply concatenated together.
  56 ###
  57 ### MusicBrainz only concerns itself with the audio tracks.  If there's a
  58 ### data track, then we ignore it, and the lead-out is considered to be 11400
  59 ### frames before the data track.
  60
  61 ###--------------------------------------------------------------------------
  62 ### Command line.
  63
  64 format=cddb
  65 while getopts "acCm" opt; do
  66   case "$opt" in
  67     a) format=accuraterip ;;
  68     c) format=cddb ;;
  69     C) format=cddb-tracks ;;
  70     m) format=musicbrainz ;;
  71     *) exit 1 ;;
  72   esac
  73 done
  74 shift $((OPTIND - 1))
  75
  76 case $# in
  77   0)
  78     ;;
  79   1)
  80     if [ -r "$1/.discid" ]; then
  81       exec <"$1/.discid"
  82     else
  83       exec < <($JBDIR/bin/flaccrip-toc "$1")
  84     fi
  85     ;;
  86   *)
  87     echo >&2 "Usage: $0 [-acCm] [DIRECTORY]"
  88     exit 1
  89     ;;
  90 esac
  91
  92 ###--------------------------------------------------------------------------
  93 ### Main work.
  94
  95 ## Initial setup.
  96 cddbck=0
  97 cddbtracks=""
  98 nt=0 nat=0
  99 da=0 db=0
 100 mbtracks=""
 101
 102 ## Wander through the table of contents picking up unconsidered trifles.
 103 while read type offset; do
 104
 105   ## Bump the track numbers here.  Most things want 1-based numbering, so
 106   ## this is right.  Don't bump for the end marker.  Those who care
 107   ## (AccurateRip) will sort it out for themselves.
 108   case "$type" in
 109     T) nt=$((nt + 1)) nat=$((nat + 1));;
 110     D) nt=$((nt + 1)) ;;
 111   esac
 112
 113   ## Update the CDDB state.  This is common to several formats.
 114   case "$type" in
 115     [TD])
 116       o=$((offset + 150))
 117       s=$((o/75))
 118       cddbtracks="${cddbtracks:+$cddbtracks }$o"
 119       while :; do
 120         case "$s" in
 121           ?*) cddbck=$((cddbck + ${s:0:1})); s=${s#?} ;;
 122           *) break ;;
 123         esac
 124       done
 125       ;;
 126     E)
 127       final=$offset
 128       ;;
 129   esac
 130
 131   ## Update other bits of information.
 132   case "$type" in
 133     T)
 134       da=$((da + offset))
 135       db=$((db + nat*(offset > 0 ? offset : 1)))
 136       mbtracks="$mbtracks$(printf "%08X" $((offset + 150)))"
 137       ;;
 138     D)
 139       mbfinal=$((offset - 11250))
 140       ;;
 141     E)
 142       da=$((da + offset))
 143       db=$((db + (nat + 1)*(offset > 0 ? offset : 1)))
 144       case "${mbfinal+t}" in
 145         t) ;;
 146         *) mbfinal=$((offset + 150)) ;;
 147       esac
 148       ;;
 149   esac
 150 done
 151
 152 ## Sort out the CDDB id.
 153 cddbid=$(printf "%02x%04x%02x" $((cddbck%255)) $((final/75)) $nt)
 154
 155 ###--------------------------------------------------------------------------
 156 ### Produce the answer.
 157
 158 case "$format" in
 159   cddb)
 160     echo "$cddbid"
 161     ;;
 162   cddb-tracks)
 163     echo "$cddbid $nt $cddbtracks $((final/75 + 2))"
 164     ;;
 165   accuraterip)
 166     printf "%03d-%08x-%08x-%s\n" $nat $da $db $cddbid
 167     ;;
 168   musicbrainz)
 169     mb=$(printf "%02X%02X%08X%s" 1 $nat $mbfinal $mbtracks)
 170     for ((i = nat; i < 99; i++)); do
 171       mb="${mb}00000000"
 172     done
 173     printf "%s" $mb |
 174       openssl dgst -sha1 -binary |
 175       openssl base64 | tr '+/=' '._-'
 176     ;;
 177 esac