summaryrefslogtreecommitdiffstats
path: root/graphics/tesseract/DETAILS
blob: f7d6dd4bd4e488518d39f471914532b2603cf8a2 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
source $GRIMOIRE/CMAKE_FUNCTIONS
           SPELL=tesseract
         VERSION=4.1.1
         LANGVER=4.00
     SOURCE_HASH=sha512:017723a2268be789fe98978eed02fd294968cc8050dde376dee026f56f2b99df42db935049ae5e72c4519a920e263b40af1a6a40d9942e66608145b3131a71a2
             URI=https://github.com/tesseract-ocr/tesseract/archive
DAT=https://github.com/tesseract-ocr/tessdata/raw/$LANGVER
          SOURCE=$SPELL-$VERSION.tar.gz
   SOURCE_URL[0]=$URI/$VERSION.tar.gz

for LL in $TESS_LANG; do
 case $LL in
   eng)
         SOURCE2=$LL.traineddata-$LANGVER
  SOURCE2_URL[0]=$DAT/$LL.traineddata
    SOURCE2_HASH=sha512:dec017b60c7ee7c8dfe1c4cef562a02c8699c2856d6562641a7077c1a0b5493a26085fa81e7dfff97323054276f24eb51f64833bc2fe7bbe91b1955c6f719618
    ;;
   fra)
         SOURCE3=$LL.traineddata-$LANGVER
  SOURCE3_URL[0]=$DAT/$LL.traineddata
    SOURCE3_HASH=sha512:ecf1ded66b3e5c903161377903b5a693ea8221b155ac61fa88b0e2ba1fdb34cce8d8e6d0270a45ed822baa012d7d24ee15c6032a31b62d9b773106e39bd18603
    ;;
   ita)
         SOURCE4=$LL.traineddata-$LANGVER
  SOURCE4_URL[0]=$DAT/$LL.traineddata
    SOURCE4_HASH=sha512:0e32802292edb6bde1300a28af43e720e8cd6ea28e236865b01f8f5ff7b2e04b88d5d1ff2c32bf4fe20691c01aee4e524411c4cabb612e881e7573dc836ed0e6
    ;;
   nld)
         SOURCE5=$LL.traineddata-$LANGVER
  SOURCE5_URL[0]=$DAT/$LL.traineddata
    SOURCE5_HASH=sha512:d617f5c5b826640b2871dbe3d7973bcc5e66fafd837921a20e009d683806ed50f0f258aa455019d99fc54f5cb65c2fa0380e3a3c92b39ab0684b8799c730b09d
    ;;
   spa)
         SOURCE6=$LL.traineddata-$LANGVER
  SOURCE6_URL[0]=$DAT/$LL.traineddata
    SOURCE6_HASH=sha512:5d009401faa71d3a8e9c4ff32dd1efc5e38c02b8c4170653ebb33874e6c489f7556c67ce6ca7cee27500496f657cf4361314563576176ff5e545b8079722bb8f
    ;;
   deu)
         SOURCE7=$LL.traineddata-$LANGVER
  SOURCE7_URL[0]=$DAT/$LL.traineddata
    SOURCE7_HASH=sha512:23326861284f096e40834bac31ef4e9344507b9332c8bbdc10dced0c60743bb2fbeae79d888c80c56e796661a2232998f1f7ba64ff1e0c9bc79ff13bd0a2663c
    ;;
   *)
    message language $LL not yet supported
    ;;
  esac
done
SOURCE_DIRECTORY=$BUILD_DIRECTORY/$SPELL-$VERSION
        WEB_SITE=https://github.com/tesseract-ocr/tesseract
      LICENSE[0]=APACHE
         ENTERED=20060509
      PATCHLEVEL=2
        KEYWORDS="ocr"
           SHORT="ocr engine"
cat << EOF
This package contains an OCR engine - libtesseract and a command line 
program - tesseract. Tesseract 4 adds a new neural net (LSTM) based OCR 
engine which is focused on line recognition, but also still supports the 
legacy Tesseract OCR engine of Tesseract 3 which works by recognizing 
character patterns. Compatibility with Tesseract 3 is enabled by using the 
Legacy OCR Engine mode (--oem 0). It also needs traineddata files which 
support the legacy engine, for example those from the tessdata repository.
EOF