| |
1 |
1 |
ad_library { |
| |
2 |
2 |
|
| |
3 |
3 |
Search Test Procs |
| |
4 |
4 |
|
| |
5 |
5 |
} |
| |
6 |
6 |
|
| |
7 |
7 |
aa_register_case \ |
| |
8 |
8 |
-cats {api smoke} \ |
| |
9 |
9 |
-procs { |
| |
10 |
10 |
search::convert::binary_to_text |
| |
11 |
11 |
} \ |
| |
12 |
12 |
convert_binary_to_text { |
| |
13 |
13 |
|
| |
14 |
14 |
Test the conversion of various file types to plain text for |
| |
15 |
15 |
indexing. |
| |
16 |
16 |
|
| |
17 |
17 |
The test files all contain the word "OpenACS". We test if this |
| |
18 |
18 |
is correctly extracted. |
| |
19 |
19 |
|
| |
20 |
20 |
} { |
| |
|
21 |
# |
| |
|
22 |
# .ppt conversion is currently only best-effort, as the |
| |
|
23 |
# underlying tool catppt seems to be unreliable even for a |
| |
|
24 |
# trivial document as the one we test here. |
| |
|
25 |
# |
| |
|
26 |
# We comment this test until a better solution is found, |
| |
|
27 |
# e.g. one based on LibreOffice, unoconv or other similar |
| |
|
28 |
# tools. |
| |
|
29 |
# |
| |
|
30 |
# ppt application/mspowerpoint |
| |
|
31 |
# |
| |
21 |
32 |
foreach {extension mime_type} { |
| |
22 |
33 |
txt text/plain |
| |
23 |
34 |
html text/html |
| |
24 |
35 |
doc application/msword |
| |
25 |
36 |
xls application/msexcel |
| |
26 |
|
ppt application/mspowerpoint |
| |
27 |
37 |
pdf application/pdf |
| |
28 |
38 |
odt application/vnd.oasis.opendocument.text |
| |
29 |
39 |
ott application/vnd.oasis.opendocument.text-template |
| |
30 |
40 |
odp application/vnd.oasis.opendocument.presentation |
| |
31 |
41 |
otp application/vnd.oasis.opendocument.presentation-template |
| |
32 |
42 |
ods application/vnd.oasis.opendocument.spreadsheet |
| |
33 |
43 |
ots application/vnd.oasis.opendocument.spreadsheet-template |
| |
34 |
44 |
docx application/vnd.openxmlformats-officedocument.wordprocessingml.document |
| |
35 |
45 |
xlsx application/vnd.openxmlformats-officedocument.spreadsheetml.sheet |
| |
36 |
46 |
pptx application/vnd.openxmlformats-officedocument.presentationml.presentation |
| |
37 |
47 |
} { |
| |
38 |
48 |
set filename [acs_root_dir]/packages/search/tcl/test/data/test.$extension |
| |
39 |
49 |
set text [search::convert::binary_to_text \ |
| |
40 |
50 |
-filename $filename \ |
| |
41 |
51 |
-mime_type $mime_type] |
| |
42 |
52 |
set ok_p [expr {[string first "OpenACS" $text] >= 0}] |
| |
43 |
53 |
aa_true "Text was extracted correctly for '.$extension'/'$mime_type'" $ok_p |
| |
44 |
54 |
if {!$ok_p} { |
| |
45 |
55 |
aa_log "Extracted text: [ns_quotehtml $text]" |
| |
46 |
56 |
} |