[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
02/04: hydra/goggles: Add support for log search.
From: |
Ricardo Wurmus |
Subject: |
02/04: hydra/goggles: Add support for log search. |
Date: |
Fri, 24 Apr 2020 08:04:43 -0400 (EDT) |
rekado pushed a commit to branch master
in repository maintenance.
commit 59c6c541013f7e51dca5a46753bdd30b5e678ed9
Author: Ricardo Wurmus <address@hidden>
AuthorDate: Fri Apr 24 14:00:59 2020 +0200
hydra/goggles: Add support for log search.
* hydra/goggles.scm (%log-xapian-db): New variable.
(index-text!*, index-channel-logs, parse-query*, search): New
procedures.
---
hydra/goggles.scm | 93 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 92 insertions(+), 1 deletion(-)
diff --git a/hydra/goggles.scm b/hydra/goggles.scm
index 3b4f64d..0daf534 100755
--- a/hydra/goggles.scm
+++ b/hydra/goggles.scm
@@ -11,10 +11,15 @@
(srfi srfi-26)
(ice-9 ftw)
(ice-9 match)
+ (ice-9 rdelim)
(ice-9 regex)
- (ice-9 textual-ports))
+ (ice-9 textual-ports)
+ (xapian wrap)
+ (xapian xapian))
(define %log-root "/var/www/.well-known/all-logs/")
+(define %log-xapian-db "/var/cache/logs.xapian/")
+
(define %config
'((host . "0.0.0.0")
(port . 3333)
@@ -34,6 +39,92 @@
(lambda (name)
(not (member name '("." ".." "index.html")))))))
+
+(define* (index-text!* term-generator text #:key (wdf-increment 1) prefix)
+ (apply TermGenerator-index-text-without-positions
+ term-generator text wdf-increment
+ (if prefix (list prefix) '())))
+
+(define (index-channel-logs channel)
+ "Index all messages in the logs for CHANNEL."
+ (format (current-error-port) "Indexing ~a~%" channel)
+ (call-with-writable-database %log-xapian-db
+ (lambda (db)
+ (for-each
+ (lambda (file-name)
+ (define stamp (basename file-name ".log"))
+ (define (index-message line count)
+ (match (string-split line #\space)
+ ((time "***" . msg) #f)
+ ((time nick . msg)
+ (let* ((idterm (string-append "Q" channel stamp (number->string
count)))
+ (doc (make-document
+ #:data (call-with-output-string
+ (lambda (port)
+ (write
+ `((stamp . ,stamp)
+ (id . ,(string-filter
char-set:digit time))
+ (text . ,line))
+ port)))
+ #:terms `((,idterm . 0))))
+ (term-generator
+ (make-term-generator #:stem (make-stem "en")
+ #:document doc)))
+ (index-text!* term-generator channel #:prefix "B")
+ (index-text!* term-generator nick #:prefix "A")
+ (index-text!* term-generator line)
+ (replace-document! db idterm doc)))
+ (_ #f)))
+ (format (current-error-port)
+ " Indexing file ~a~%" file-name)
+ (with-input-from-file file-name
+ (lambda ()
+ (let loop ((line (read-line))
+ (count 0))
+ (unless (eof-object? line)
+ (index-message line count)
+ (loop (read-line) (1+ count)))))))
+ (map (cut string-append %log-root "/#" channel "/" <>)
+ (channel-files channel)))))
+ (format (current-error-port) "Indexing ~a complete!~%" channel))
+
+(define* (parse-query* querystring #:key stemmer stemming-strategy
+ (prefixes '())
+ (boolean-prefixes '()))
+ (let ((queryparser (new-QueryParser)))
+ (QueryParser-set-stemmer queryparser stemmer)
+ (when stemming-strategy
+ (QueryParser-set-stemming-strategy queryparser stemming-strategy))
+ (for-each (match-lambda
+ ((field . prefix)
+ (QueryParser-add-prefix queryparser field prefix)))
+ prefixes)
+ (for-each (match-lambda
+ ((field . prefix)
+ (QueryParser-add-boolean-prefix queryparser field prefix)))
+ boolean-prefixes)
+ (let ((query (QueryParser-parse-query queryparser querystring)))
+ (delete-QueryParser queryparser)
+ query)))
+
+(define* (search querystring #:key (pagesize 100))
+ (call-with-database %log-xapian-db
+ (lambda (db)
+ (let* ((query (parse-query* querystring
+ #:stemmer (make-stem "en")
+ #:boolean-prefixes
+ '(("channel" . "B")
+ ("nick" . "A")))))
+ (mset-fold (lambda (item acc)
+ (cons (call-with-input-string
+ (document-data (mset-item-document item))
+ read)
+ acc))
+ '()
+ (enquire-mset (enquire db query)
+ #:maximum-items pagesize))))))
+
+
(define (render-html sxml)
(list '((content-type . (text/html)))
(lambda (port)