mdw@git.distorted.org.uk Git - termux-packages/blame_incremental

... / ...

Commit	Line	Data
	1	TERMUX_PKG_HOMEPAGE=https://github.com/tesseract-ocr/tesseract
	2	TERMUX_PKG_DESCRIPTION="Tesseract is probably the most accurate open source OCR engine available"
	3	TERMUX_PKG_VERSION=3.05.00
	4	TERMUX_PKG_DEPENDS="libtool, libuuid, leptonica"
	5	TERMUX_PKG_SRCURL=https://github.com/tesseract-ocr/tesseract/archive/${TERMUX_PKG_VERSION}.tar.gz
	6	TERMUX_PKG_SHA256=3fe83e06d0f73b39f6e92ed9fc7ccba3ef734877b76aa5ddaaa778fac095d996
	7	TERMUX_PKG_FOLDERNAME=tesseract-${TERMUX_PKG_VERSION}
	8
	9	termux_step_pre_configure() {
	10	# http://blog.matt-swain.com/post/26419042500/installing-tesseract-ocr-on-mac-os-x-lion
	11	export LIBLEPT_HEADERSDIR=${TERMUX_PREFIX}/include/leptonica
	12
	13	perl -p -i -e 's\|ADD_RT], true\|ADD_RT], false\|g' configure.ac
	14	./autogen.sh
	15	}
	16
	17	termux_step_post_make_install() {
	18	# download english trained data
	19	cd "${TERMUX_PREFIX}/share/tessdata"
	20	rm -f eng.*
	21	for f in cube.{bigrams,fold,lm,nn,params,size,word-freq} tesseract_cube.nn traineddata; do
	22	f=eng.$f
	23	# From the tessdata README: "These language data files only work with
	24	# Tesseract 4. They are based on the sources in tesseract-ocr/langdata on GitHub.
	25	# Get language data files for Tesseract 3.04 or 3.05 from the 3.04 tree."
	26	termux_download \
	27	https://raw.githubusercontent.com/tesseract-ocr/tessdata/3.04.00/$f \
	28	$f
	29	done
	30	}