antoniop
committed
on 15 Mar 23
Document the decision of not testing .ppt text extraction anymore
openacs-4/.../tcl/test/search-procs.tcl (+11 -1)
1 1 ad_library {
2 2
3 3     Search Test Procs
4 4
5 5 }
6 6
7 7 aa_register_case \
8 8     -cats {api smoke} \
9 9     -procs {
10 10         search::convert::binary_to_text
11 11     } \
12 12     convert_binary_to_text {
13 13
14 14         Test the conversion of various file types to plain text for
15 15         indexing.
16 16
17 17         The test files all contain the word "OpenACS". We test if this
18 18         is correctly extracted.
19 19
20 20     } {
  21         #
  22         # .ppt conversion is currently only best-effort, as the
  23         # underlying tool catppt seems to be unreliable even for a
  24         # trivial document as the one we test here.
  25         #
  26         # We comment this test until a better solution is found,
  27         # e.g. one based on LibreOffice, unoconv or other similar
  28         # tools.
  29         #
  30         # ppt application/mspowerpoint
  31         #
21 32         foreach {extension mime_type} {
22 33             txt text/plain
23 34             html text/html
24 35             doc application/msword
25 36             xls application/msexcel
26               ppt application/mspowerpoint
27 37             pdf application/pdf
28 38             odt application/vnd.oasis.opendocument.text
29 39             ott application/vnd.oasis.opendocument.text-template
30 40             odp application/vnd.oasis.opendocument.presentation
31 41             otp application/vnd.oasis.opendocument.presentation-template
32 42             ods application/vnd.oasis.opendocument.spreadsheet
33 43             ots application/vnd.oasis.opendocument.spreadsheet-template
34 44             docx application/vnd.openxmlformats-officedocument.wordprocessingml.document
35 45             xlsx application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
36 46             pptx application/vnd.openxmlformats-officedocument.presentationml.presentation
37 47         } {
38 48             set filename [acs_root_dir]/packages/search/tcl/test/data/test.$extension
39 49             set text [search::convert::binary_to_text \
40 50                           -filename $filename \
41 51                           -mime_type $mime_type]
42 52             set ok_p [expr {[string first "OpenACS" $text] >= 0}]
43 53             aa_true "Text was extracted correctly for '.$extension'/'$mime_type'" $ok_p
44 54             if {!$ok_p} {
45 55                 aa_log "Extracted text: [ns_quotehtml $text]"
46 56             }