Revision | dcc972c4aa27a52f4caa7382fcb5b816f47b1fea (tree) |
---|---|
Time | 2008-04-11 21:32:10 |
Author | iselllo |
Commiter | iselllo |
I added the file search_pdf_create_txt.sh which is a simple script to
search pdf files by keywords.
@@ -0,0 +1,104 @@ | ||
1 | +#!/bin/bash | |
2 | +# place this file in the directory of pdf files | |
3 | +# you want to search | |
4 | + | |
5 | +echo "Do you want to: " | |
6 | +echo "1- create/update index" | |
7 | +echo "2- search biblio" | |
8 | +echo "_________ " | |
9 | +echo "NOTE: index only need to be created once" | |
10 | +read choice | |
11 | +echo "your choice [$choice]" | |
12 | + | |
13 | +if [ $choice -eq 1 ] | |
14 | +then | |
15 | +if [ -f ./tmp_menu_file ] | |
16 | +then | |
17 | +rm ./tmp_menu_file | |
18 | +fi | |
19 | +if [ -f ./log_treat ] | |
20 | +then | |
21 | +rm ./log_treat | |
22 | +fi | |
23 | +if [ -d ./text_format ] | |
24 | +then | |
25 | +echo "an index has already been created" | |
26 | +echo "update in progress" | |
27 | +else | |
28 | +mkdir ./text_format | |
29 | +fi | |
30 | +count=0 | |
31 | +echo "reading directory" | |
32 | +for file in `ls -1 *.pdf` | |
33 | +do | |
34 | +count=`expr $count + 1` | |
35 | +echo "$file" >>tmp_menu_file | |
36 | +done | |
37 | +echo "reading done" | |
38 | +echo "[$count] pdf files" | |
39 | +filename=`cut -d"." -f 1 tmp_menu_file` | |
40 | +count=0 | |
41 | +for file in `echo $filename` | |
42 | +do | |
43 | +count=`expr $count + 1` | |
44 | +echo "loading $file" >>log_treat | |
45 | +echo "loading $file" | |
46 | +if [ -f ./text_format/$file.txt ] | |
47 | +then | |
48 | +echo "file [$file] already treated " | |
49 | +else | |
50 | +pdftotext $file.pdf ./text_format/$file.txt | |
51 | +echo "file [$count,$file] treated ">>log_treat | |
52 | +echo "file [$count,$file] treated " | |
53 | +fi | |
54 | +done | |
55 | +fi | |
56 | + | |
57 | +if [ $choice -eq 2 ] | |
58 | +then | |
59 | +cd ./text_format/ | |
60 | +fin=0 | |
61 | +key1="" | |
62 | +key2="" | |
63 | +key3="" | |
64 | +while [ $fin -eq 0 ] | |
65 | +do | |
66 | +echo "enter up to 3 keyword(s) [AND]" | |
67 | +echo "_____________[crtl-c for end search]" | |
68 | +read key1 key2 key3 | |
69 | +echo "keywords: [$key1,$key2,$key3]" | |
70 | +if [ -f ./tmp ] | |
71 | +then | |
72 | +rm ./tmp | |
73 | +fi | |
74 | +echo `egrep -i -l "$key1" *.txt` >>tmp | |
75 | +echo "results for [$key1] only:" | |
76 | +echo `egrep -i -l "$key1" *.txt` | |
77 | +if [ -n "$key2" ] | |
78 | +then | |
79 | +echo "results for [$key2] only:" | |
80 | +echo `egrep -i -l "$key2" *.txt` | |
81 | +for file in `cat tmp` | |
82 | +do | |
83 | +echo `egrep -i -l "$key2" $file`>>tmp2 | |
84 | +done | |
85 | +echo "results for [$key1] and [$key2]:" | |
86 | +echo `cat tmp2` | |
87 | +rm ./tmp | |
88 | +if [ -n "$key3" ] | |
89 | +then | |
90 | +echo "results for [$key3] only:" | |
91 | +echo `egrep -i -l "$key3" *.txt` | |
92 | +for file in `cat tmp2` | |
93 | +do | |
94 | +echo `egrep -i -l "$key3" $file`>>tmp | |
95 | +done | |
96 | +rm ./tmp2 | |
97 | +echo "results for [$key1] and [$key2] and [$key3]:" | |
98 | +echo `cat tmp` | |
99 | +rm ./tmp | |
100 | +fi | |
101 | +fi | |
102 | +done | |
103 | +exit | |
104 | +fi |