PDF Latex OCR

OCR

Tesseract

sudo apt install tesseract-ocr
sudo apt install tesseract-ocr-fra
sudo apt install tesseract-ocr-ita
sudo apt install tesseract-ocr-frk      # allemand
sudo apt install imagemagick

# generate result.txt
tesseract test.jpg result -l ita 

# generate result.txt, using english + italian
tesseract test.jpg result -l eng+ita 

# Assume a single uniform block of text
tesseract MullerWritersP1.jpg result --psm 6

for i in $(ls *.jpg); do tesseract $i $i --psm 6; done;
tesseract MullerWritersP1.jpg result --psm 6

gocr

gocr -i test.jpg -o test.txt

for i in $(ls *.pbm); do gocr -i $i -o ${i/.pbm/.txt}; done;

PDF

A trier
sudo apt install pdfgrep

Overleaf
TeXample.net
ShareLaTeX

djvu - Good converter
https://djvu2pdf.com/

xreader - view pdf

sudo apt install xreader
xreader myfile.pdf

pdfimages - extract images from pdf

# -j => output in jpg format
pdfimages -j ClefI-II-III.pdf basename

pdftotext - convert pdf to text

sudo apt install poppler-utils
pdftotext -layout input.pdf output.txt

mupdf-tools - split pages vertically

(nb of generated pages = original nb * 2)
sudo apt install mupdf-tools
mutool poster -x 2 castille2004-2.pdf test.pdf

wkhtmltopdf - convert pdf to html

sudo apt install wkhtmltopdf
wkhtmltopdf myfile.html myfile.pdf

Conversion latex / pdf

www.sharelatex.com
sudo apt install texlive
sudo apt install texlive-lang-french
sudo apt install texlive-latex-extra

###### tex to pdf :
pdflatex myfile.tex 

pdftk

sudo apt install pdftk-java
https://www.pdflabs.com/docs/pdftk-man-page/

pdftk < input PDF files | - | PROMPT >
[ input_pw < input PDF owner passwords | PROMPT > ]
[ < operation > < operation arguments > ]
[ output < output filename | - | PROMPT > ]
[ encrypt_40bit | encrypt_128bit ]
[ allow < permissions > ]
[ owner_pw < owner password | PROMPT > ]
[ user_pw < user password | PROMPT > ]
[ flatten ] [ need_appearances ]
[ compress | uncompress ]
[ keep_first_id | keep_final_id ]
[ drop_xfa ]
[ verbose ]
[ dont_ask | do_ask ]

< operation > can be empty, or:
    cat
    shuffle
    burst
    rotate
    generate_fdf
    fill_form
    background
    multibackground
    stamp
    multistamp
    dump_data
    dump_data_utf8
    dump_data_fields
    dump_data_fields_utf8
    dump_data_annots
    update_info
    update_info_utf8
    attach_files
    unpack_files

###### extract slice(s) of a pdf and create a pdf
pdftk category-theory-for-scientists.pdf cat 58-112  output category-theory-for-scientists-chap3.pdf
pdftk category-theory-for-scientists.pdf cat 1 3 5 7-57  output out.pdf

###### merge multiple pdf in a single file
pdftk file1.pdf file2.pdf file3.pdf cat output newfile.pdf

###### rotate the document to the left
pdftk test.pdf rotate 1-endwest output out.pdf 
[ <begin page number> [ -<end page number> [ <qualifier> ] ] ] [ <page rotation> ] 
- qualifier : even or odd
- page rotation : north, south, east, west, left, right, or down. 

###### adding metadata
# - create a file metadata.txt with :
InfoKey: Title
InfoValue: Wild Flowers of Kashmir
InfoKey: Author
InfoValue: Coventry, B. O.
InfoKey: Keywords
InfoValue: London,1923,Raithby Lawrence and Company
# - then :
pdftk 2000-2-bayes-theorem.pdf update_info metadata.txt output test.pdf

tikz - drawing in latex

tikzcd to draw categories

% https://tikzcd.yichuanshen.de
\documentclass{article} 
\nofiles        % do not generate .aux .log and .out files
\usepackage{tikz-cd}
\begin{document}
\begin{tikzcd}
A   \arrow[loop, "id_A"', distance=2em, in=305, out=235]
    \arrow[rr, "g  o  f", bend left=49]
    \arrow[r, "f"', bend left]
& B \arrow[loop, "id_B"', distance=2em, in=305, out=235]
    \arrow[r, "g"', bend left]
& C \arrow[loop, "id_C"', distance=2em, in=305, out=235]
\end{tikzcd}
\end{document}

\documentclass{article} 
\usepackage{tikz-cd}
\begin{document}

\begin{tikzcd}
Z    \arrow[r,"b"]
     \arrow[d,"g"]
     \arrow[loop, distance=2em, in=215, out=145]
& C  \arrow[r,"\alpha"]
     \arrow[d,"\gamma",pos=0.4,swap]
& D  \arrow[d,"\beta"]\\
B    \arrow[rru,"h",pos=0.7,swap]
& B' \arrow[r,"\omega"]
& D'
\end{tikzcd}

\begin{tikzcd}
A   \arrow[d, "g"]
    \arrow[r, "f"]
& B \arrow[r, "\alpha"]
    \arrow[d, "\gamma"]
& D
    \arrow[d, "\beta"] \\
C   \arrow[rru, "h"]
& B'    \arrow[r, "\lambda"]
& D'
\end{tikzcd}

\end{document}

-------------------------------------------------------------------------------
\documentclass{article}
\usepackage{tikz-cd}
\begin{document}
\begin{tikzcd}
A   \arrow[r,"b"]
    \arrow[d,"g"]
& C \arrow[r,"\alpha"]
    \arrow[d,"\beta"]
& D \arrow[d,"\gamma"]
\\ B \arrow[rru,"h"]
& B' \arrow[r,"\omega"]
& D' 
\end{tikzcd}
\end{document}

-------------------------------------------------------------------------------
%%%%%%% loop %%%%%%%% https://tikzcd.yichuanshen.de/
\begin{tikzcd}
A   \arrow[loop, distance=2em, in=215, out=145]
    \arrow[rr, "f", bend left]
&
& B \arrow[loop, distance=2em, in=35, out=325]
    \arrow[ll, "g", bend left]
\end{tikzcd}