guix-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

02/02: gnu: tesseract-ocr: Update to 5.2.0.


From: guix-commits
Subject: 02/02: gnu: tesseract-ocr: Update to 5.2.0.
Date: Fri, 12 Aug 2022 00:47:12 -0400 (EDT)

apteryx pushed a commit to branch master
in repository guix.

commit f7c027617da44ed684bbb9b35c603b790365fdbf
Author: Maxim Cournoyer <maxim.cournoyer@gmail.com>
AuthorDate: Thu Aug 11 17:53:21 2022 -0400

    gnu: tesseract-ocr: Update to 5.2.0.
    
    * gnu/packages/ocr.scm (tesseract-ocr): Update to 5.2.0.
    [inputs, native-inputs]: Move after arguments.  Use new style inputs.
    [arguments]: Use gexps.
    [configure-flags]: Add --disable-static.
    [phases]{fix-docbook}: Replace phase with...
    {do-not-override-xml-catalog-files}: ... this new phase.
    {build-training}: Move after build phase.  Enable parallel build.
    {trailing-install}: Move after install phase.
    [native-inputs]: Add libxml2.
---
 gnu/packages/ocr.scm | 124 ++++++++++++++++++++++++++-------------------------
 1 file changed, 63 insertions(+), 61 deletions(-)

diff --git a/gnu/packages/ocr.scm b/gnu/packages/ocr.scm
index 11595e87e5..e28bd17668 100644
--- a/gnu/packages/ocr.scm
+++ b/gnu/packages/ocr.scm
@@ -5,6 +5,7 @@
 ;;; Copyright © 2019 Alex Vong <alexvong1995@gmail.com>
 ;;; Copyright © 2021 Andy Tai <atai@atai.org>
 ;;; Copyright © 2021, 2022 Nicolas Goaziou <mail@nicolasgoaziou.fr>
+;;; Copyright © 2022 Maxim Cournoyer <maxim.cournoyer@gmail.com>
 ;;;
 ;;; This file is part of GNU Guix.
 ;;;
@@ -74,71 +75,72 @@ it produces text in 8-bit or UTF-8 formats.")
     (license license:gpl3+)))
 
 (define-public tesseract-ocr
-  ;; There are useful commits beyond the last official stable release.
-  (let ((commit "97079fa353557af6df86fd20b5d2e0dff5d8d5df")
-        (revision "1"))
-    (package
-      (name "tesseract-ocr")
-      (version (git-version "4.1.1" revision commit))
-      (source
-       (origin
-         (method git-fetch)
-         (uri (git-reference
-               (url "https://github.com/tesseract-ocr/tesseract";)
-               (commit commit)))
-         (file-name (git-file-name name version))
-         (sha256
-          (base32
-           "11137a4aaay7qp64vdjd83hz1l089nzi5a0ql0qgk8gn79pyhi98"))))
-      (build-system gnu-build-system)
-      (inputs
-       `(("cairo" ,cairo)
-         ("icu" ,icu4c)
-         ("leptonica" ,leptonica)
-         ("pango" ,pango)
-         ("python-wrapper" ,python-wrapper)))
-      (native-inputs
-       `(("asciidoc" ,asciidoc)
-         ("autoconf" ,autoconf)
-         ("automake" ,automake)
-         ("docbook-xsl" ,docbook-xsl)
-         ("libarchive" ,libarchive)
-         ("libcurl" ,curl)
-         ("libtool" ,libtool)
-         ("libtiff" ,libtiff)
-         ("pkg-config" ,pkg-config)
-         ("xsltproc" ,libxslt)))
-      (arguments
-       `(#:configure-flags
-         (let ((leptonica (assoc-ref %build-inputs "leptonica")))
-           (list (string-append "LIBLEPT_HEADERSDIR=" leptonica "/include")))
-         #:tests? #f ; Tests currently result in a segfault
-         #:phases
-         (modify-phases %standard-phases
-           (add-after 'unpack 'fix-docbook
-             (lambda* (#:key inputs #:allow-other-keys)
-               ;; Don't attempt to download XSL schema.
-               (substitute* "doc/Makefile.am"
-                 
(("http://docbook.sourceforge.net/release/xsl/current/manpages/docbook.xsl";)
-                  (string-append (assoc-ref inputs "docbook-xsl")
-                                 "/xml/xsl/docbook-xsl-"
-                                 ,(package-version docbook-xsl)
-                                 "/manpages/docbook.xsl")))))
-           (add-after 'install 'build-training
-             (lambda _
-               (invoke "make" "training")))
-           (add-after 'build-training 'install-training
-             (lambda _
-               (invoke "make" "training-install"))))))
-      (home-page "https://github.com/tesseract-ocr/tesseract";)
-      (synopsis "Optical character recognition engine")
-      (description
-       "Tesseract is an optical character recognition (OCR) engine with very
+  (package
+    (name "tesseract-ocr")
+    (version "5.2.0")
+    (source
+     (origin
+       (method git-fetch)
+       (uri (git-reference
+             (url "https://github.com/tesseract-ocr/tesseract";)
+             (commit version)))
+       (file-name (git-file-name name version))
+       (sha256
+        (base32
+         "0dai539h07lqj8lyhznd3wbwdpqr78qrsczq78rsmsryqvmdbyaa"))))
+    (build-system gnu-build-system)
+    (arguments
+     (list
+      #:configure-flags
+      #~(list (string-append "LIBLEPT_HEADERSDIR="
+                             #$(this-package-input "leptonica") "/include")
+              "--disable-static")       ;avoid 6 MiB static archive
+      ;; The unit tests are disabled because they require building bundled
+      ;; third party libraries.
+      #:tests? #f
+      #:phases
+      #~(modify-phases %standard-phases
+          (add-after 'unpack 'do-not-override-xml-catalog-files
+            (lambda _
+              (substitute* "configure.ac"
+                (("AC_SUBST\\(\\[XML_CATALOG_FILES])")
+                 ""))))
+          (add-after 'build 'build-training
+            (lambda* (#:key parallel-build? #:allow-other-keys)
+              (define n (if parallel-build? (number->string
+                                             (parallel-job-count))
+                            "1"))
+              (invoke "make" "-j" n "training")))
+          (add-after 'install 'install-training
+            (lambda _
+              (invoke "make" "training-install"))))))
+    (native-inputs
+     (list asciidoc
+           autoconf
+           automake
+           curl
+           docbook-xsl
+           libarchive
+           libtiff
+           libtool
+           libxml2                      ;for XML_CATALOG_FILES
+           libxslt
+           pkg-config))
+    (inputs
+     (list cairo
+           icu4c
+           leptonica
+           pango
+           python-wrapper))
+    (home-page "https://github.com/tesseract-ocr/tesseract";)
+    (synopsis "Optical character recognition engine")
+    (description
+     "Tesseract is an optical character recognition (OCR) engine with very
 high accuracy.  It supports many languages, output text formatting, hOCR
 positional information and page layout analysis.  Several image formats are
 supported through the Leptonica library.  It can also detect whether text is
 monospaced or proportional.")
-      (license license:asl2.0))))
+    (license license:asl2.0)))
 
 (define-public gimagereader
   (package



reply via email to

[Prev in Thread] Current Thread [Next in Thread]