Hello,
I am hardly trying to understand the camlpdf-library, because I want to
write a function which gets a pdf-file and returns a list of pairs of
boundingbox-coordinates and associated images of its content.
In my function first I decompress all streams with
---8><--8><---
let decompress_pdf pdf =
Pdf.map_stream (
fun x -> Pdfcodec.decode_pdfstream_until_unknown pdf x ;
x
) pdf
;;
---><8--><8---
Then I try to decode the pdf-objects like this to get a feeling how
camlpdf works.
---8><--8><--
let rec decode_pdfobject obj =
match obj with
| Pdf.Array v -> decode_pdfobjects v
| Pdf.Boolean v -> Printf.printf "%b" v
| Pdf.Indirect v -> Printf.printf "%i" v
| Pdf.Integer v -> Printf.printf "%i" v
| Pdf.Name v -> Printf.printf "%s" v
| Pdf.Null -> ()
| Pdf.Real v -> Printf.printf "%f" v
| Pdf.String v -> Printf.printf "%s" v
| Pdf.Stream v -> ()
| Pdf.Dictionary v -> List.iter (
fun pair ->
let (s,obj) = pair in
Printf.printf "(%s:" s;
decode_pdfobject obj;
Printf.printf ")"
) v
and decode_pdfdoc doc =
let contents = List.map (fun page -> page.Pdfdoc.content) doc in
List.iter ( fun lst -> decode_pdfobjects lst ) contents
and decode_pdfobjects (lst:Pdf.pdfobject list) =
List.iter (
fun pdfobject ->
decode_pdfobject pdfobject
) lst
;;
---><8--><8---
I think I am totally wrong, but the camlpdf-documentation is very
lowlevel in case of PDF-manipulation functions and very highlevel in
case of understanding.
Please, could you send me an example how to extract parts of an
PDF-page? Or could you point me to a more novice-approved introduction
understanding camlpdf? Or there still exists another PDF-Library for
Ocaml?
Thanks in advance,
Bye Andreas
--
Andreas Romeyke <
andreas.romeyke@...>
Deutsche Zentralbücherei für Blinde
[Non-text portions of this message have been removed]