/ [texdoc] / trunk / score.tlu
To checkout: svn checkout http://svn.gnu.org.ua/sources/texdoc/trunk/score.tlu
Puszcza

Contents of /trunk/score.tlu

Parent Directory Parent Directory | Revision Log Revision Log


Revision 2 - (show annotations)
Thu Mar 2 10:41:29 2017 UTC (4 years, 9 months ago) by cereda
File size: 9468 byte(s)
First commit
1 -- score.tlu: scoring functions for texdoc
2 --
3 -- Manuel Pégourié-Gonnard, GPLv3, see texdoclib.tlu for details
4
5 -- shared variables
6 local global_adjscore, spec_adjscore = {}, {}
7
8 ------------------------- configuration directives -------------------------
9
10 -- interpret a confline as a score directive or return false
11 function confline_to_score(line, file, pos)
12 local keyw, pat, val
13 -- try global adjscore
14 pat, val = string.match(line, '^adjscore%s+([%w%p]+)%s*=%s*([%d+-.]+)')
15 if pat and val then
16 return set_score_table(global_adjscore, pat, val)
17 end
18 -- try keyword specific adjscore
19 keyw, pat, val = string.match(line,
20 '^adjscore%(([%w%p]+)%)%s+([%w%p]+)%s*=%s*([%d+-.]+)')
21 if keyw and pat and val then
22 keyw = string.lower(keyw)
23 spec_adjscore[keyw] = spec_adjscore[keyw] or {}
24 return set_score_table(spec_adjscore[keyw], pat, val)
25 end
26 return false
27 end
28
29 -- set key in score table to val, without overriding
30 function set_score_table(tab, key, val)
31 local k = string.lower(key)
32 local v = tonumber(val)
33 if v then
34 if tab[k] == nil then tab[k] = v end
35 return true
36 end
37 return false
38 end
39
40 ---------------------------- score computation -----------------------------
41
42 -- set the scores for a doclist
43 function set_list_scores(list, original_kw)
44 for _, df in ipairs(list) do
45 set_score(df, original_kw)
46 end
47 end
48
49 -- set the score of a docfile
50 function set_score(df, original_kw)
51 -- scoring is case-insenstitive (patterns are already lowercased)
52 local name = string.lower(df.shortname)
53 deb_print('score', '----------')
54 deb_print('score', 'Start scoring '..df.realpath)
55 deb_print('score', 'Name used: '..name)
56 -- get score from patterns
57 local score = -10
58 for _, pat in ipairs(df.matches) do
59 local s = -10
60 local p = string.lower(pat.name)
61 if pat.original then
62 s = df.tree > -1 and heuristic_score(name, p) or 1
63 elseif is_exact(name, p) then
64 local bonus, msg = 0, ''
65 if pat.locale then
66 bonus, msg = 5, ', (language-based)'
67 end
68 s = (pat.score or 10) + bonus -- default alias score is 10
69 deb_print('score', string.format(
70 "Matching alias '%s', score: %g%s", pat.name, s, msg))
71 end
72 if s > score then score = s end
73 end
74 deb_print('score', 'Max pattern score: '..tostring(score))
75 -- get score from tlp associations
76 if score == -10 and df.tlptodoc then
77 score = -1
78 deb_print('score', 'New score: -2 from package name association')
79 end
80 if score == -10 and df.runtodoc then
81 score = -5
82 deb_print('score', 'New score: -6 from sty/cls association')
83 end
84 -- bonus for metadata
85 if df.details then
86 if string.find(string.lower(df.details), 'readme') then
87 score = score + 0.1
88 deb_print('score', 'Catalogue "readme" bonus: +0.1')
89 else
90 score = score + 1.5
91 deb_print('score', 'Catalogue details bonus: +1.5')
92 end
93 end
94 -- adjust from keyword-specific tables
95 if df.tree > -1 and spec_adjscore[original_kw] then
96 for pat, val in pairs(spec_adjscore[original_kw]) do
97 if val and is_subword('/'..name, pat) then
98 score = score + val
99 deb_print('score', string.format(
100 "Adjust by %g from specific pattern '%s'", val, pat))
101 end
102 end
103 end
104 -- adjust from global tables
105 if df.tree > -1 then
106 for pat, val in pairs(global_adjscore) do
107 if val and is_subword('/'..name, pat) then
108 if score > -10 or val < 0 then score = score + val end
109 deb_print('score', string.format(
110 "Adjust by %g from global pattern '%s'", val, pat))
111 end
112 end
113 end
114 deb_print('score', 'Final score: '..tostring(score))
115 df.score = score
116 end
117
118 -- compute a heuristic score -10 <= s < 10
119 function heuristic_score(file, pat)
120 deb_print('score', 'Start heuristic scoring with pattern: '..pat)
121 -- score management
122 local score = -10
123 local function upscore(s, reason, force)
124 if s > score or force then
125 score = s
126 deb_print('score', 'New heuristic score: '..tostring(s)
127 ..'. Reason: '..reason)
128 end
129 end
130 local slash = not not string.find(pat, '/', 1, true)
131 -- look for exact or subword match
132 if is_exact_locale(file, pat) then
133 upscore(5, 'exact match with correct locale')
134 elseif is_exact(file, pat) then
135 upscore(4, 'exact match')
136 elseif is_subword(file, pat) then
137 upscore(1, 'subword match')
138 end
139 -- try derivatives unless pat contains a slash
140 if not slash then
141 for _, suffix in ipairs(config.suffix_list) do
142 local deriv = pat..suffix
143 if is_exact(file, deriv) then
144 upscore(3, 'exact match for derived pattern: '..deriv)
145 elseif is_subword(file, deriv) then
146 upscore(2, 'subword match for derived pattern: '..deriv)
147 end
148 end
149 end
150 -- if extension is bad, score becomes an epsilon
151 local ext = config.ext_list[ext_pos(file)]
152 if ext and config.badext_list_inv[ext] and score > 0 then
153 upscore(0.1, 'bad extension', true)
154 end
155 -- if basename is bad, score gets < 0
156 if has_bad_basename(file) and score > 0 then
157 upscore(0.1, 'bad basename', true)
158 end
159 -- bonus for being in the right directory
160 if string.find('/'..file, '/'..pat..'/', 1, true) and not slash then
161 upscore(score + 1.5, 'directory bonus')
162 end
163 -- done
164 deb_print('score', 'Final heuristic score: '..tostring(score))
165 return score
166 end
167
168 -- says if file is an exact match for pat
169 function is_exact(file, pat)
170 file = parse_zip(file)
171 local slashes = string.gsub(pat, '[^/]+', '[^/]+')
172 basename = string.match(file, slashes..'$')
173 if not basename then return nil end
174 if basename == pat then return true end
175 for _, ext in ipairs(config.ext_list) do
176 if ext ~= '' and ext ~= '*' and basename == pat..'.'..ext then
177 return true
178 end
179 end
180 return false
181 end
182
183 -- says if file is an exact match for pat and the current locale
184 function is_exact_locale(file, pat)
185 if string.match(pat, '%-%l%l%l?$') then
186 -- don't match if the pattern appears to include a language code
187 return false
188 end
189 local lang = config.lang
190 if lang then
191 return is_exact(file, pat .. '-' .. lang)
192 or is_exact(file, lang .. '-' .. pat)
193 end
194 return false
195 end
196
197 -- say if pat is a "subword" of str
198 function is_subword(str, pat)
199 local i, j = string.find(str, pat, 1, true)
200 return not not (i and j
201 and (i == 1 or is_delim(str, i) or is_delim(str, i-1))
202 and (j == #str or is_delim(str, j) or is_delim(str, j+1)))
203 end
204
205 -- say if character i of str is a delimiter (ponctuation)
206 function is_delim(str, i)
207 return not not string.find(string.sub(str, i, i), '%p')
208 end
209
210 -- say if a filename has a bad basename
211 function has_bad_basename(file)
212 file = file:gsub('.*/', '')
213 for _, b in ipairs(config.badbasename_list) do
214 if file:find('^'..b..'$') or file:find('^'..b..'%.') then
215 return true
216 end
217 end
218 return false
219 end
220
221 -- compare two docfile's: (see search.tlu for structure)
222 -- 1. by score
223 -- 2. then by extensions (ordered as in ext_list),
224 -- 3. then lexicographically by filename.
225 -- 4. then by tree.
226 -- return true if a is better than b
227 function docfile_order (a, b)
228 if a.score > b.score then return true
229 elseif a.score < b.score then return false
230 elseif a.ext_pos < b.ext_pos then return true
231 elseif a.ext_pos > b.ext_pos then return false
232 elseif a.basename < b.basename then return true
233 elseif a.basename > b.basename then return false
234 else return (a.tree > b.tree)
235 end
236 end
237
238 ----------------------------- public functions -----------------------------
239
240 -- returns the index of the most specific extension of file in ext_list,
241 -- or config.ext_list_max + 1
242 function ext_pos(file)
243 -- remove zipext if applicable
244 file = parse_zip(file)
245 -- now find the extension
246 local p, e, pos, ext
247 for p, e in ipairs(config.ext_list) do
248 if (e == '*') and (ext == nil) then
249 pos, ext = p, e
250 elseif (e == '') and not string.find(file, '.', 1, true) then
251 pos, ext = p, e
252 elseif string.sub(file, -string.len(e)-1) == '.'..e then
253 if (ext == nil) or (ext == '*')
254 or (string.len(e) > string.len(ext)) then
255 pos, ext = p, e
256 end
257 end
258 end
259 return pos or (config.ext_list_max + 1)
260 end
261
262 -- return the "quality" of docfile
263 function docfile_quality(df)
264 if df.score > 0 then
265 return 'good'
266 elseif df.score > -100 then
267 return 'bad'
268 else
269 return 'killed'
270 end
271 end
272
273 -- sort a doclist
274 function sort_doclist(dl, original_kw)
275 dl:stop()
276 set_list_scores(dl, original_kw)
277 table.sort(dl, docfile_order)
278 end
279
280 return {
281 sort_doclist = sort_doclist,
282 docfile_quality = docfile_quality,
283 ext_pos = ext_pos,
284 is_exact = is_exact,
285 confline_to_score = confline_to_score,
286 }

Send suggestions and bug reports to Sergey Poznyakoff
ViewVC Help
Powered by ViewVC 1.1.20