guix-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Accuracy of importers?


From: Ludovic Courtès
Subject: Re: Accuracy of importers?
Date: Fri, 29 Oct 2021 23:57:33 +0200
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/27.2 (gnu/linux)

Hello!

Thanks everyone for your feedback!

Lars-Dominik Braun <lars@6xq.net> skribis:

> Would it be possible to just run the importer again for existing packages
> and compare the result (minus synopsis/description) with what’s
> available in Guix? That should give you much more accurate numbers than
> our guesswork.

Turned out to be trickier than we could hope, primarily because the
relevant importers did not support imports of a specific version.  I
fixed it for CRAN and PyPI here:

  https://issues.guix.gnu.org/51493

With the attached script plus the changes above, I can already get some
insight.  Here’s what I get for a sample of 200 PyPI packages and 200
CRAN packages:

--8<---------------cut here---------------start------------->8---
$ SAMPLE_SIZE=200 ./pre-inst-env guile 
~/src/guix-debugging/importer-accuracy.scm
[…]
Accuracy for 'pypi' (200 packages):
  accurate: 58 (29%)
  different inputs: 142 (71%)
  different source: 0 (0%)
  inconclusive: 0 (0%)
Accuracy for 'cran' (200 packages):
  accurate: 176 (88%)
  different inputs: 23 (12%)
  different source: 1 (0%)
  inconclusive: 0 (0%)
--8<---------------cut here---------------end--------------->8---

(It’s quite expensive to run because it downloads a whole bunch of
things and tries many 404 URLs in the case of CRAN before finding the
right one.)

The script doesn’t do anything useful for crates because they have their
own way of representing inputs.  It doesn’t account for changes in
‘arguments’ like zimoun suggested, meaning it’s overestimating accuracy.

It’d be nice to run it on gems but that importer doesn’t support
versioning either.

To be continued…

Thanks,
Ludo’.

;;; Released under the GNU GPLv3 or any later version.
;;; Copyright © 2021 Ludovic Courtès <ludo@gnu.org>

(use-modules (guix)
             (gnu packages)
             (guix import cran)
             (guix import crate)
             (guix import pypi)
             ((guix import print) #:select (package->code))
             ((guix upstream) #:select (url-predicate))
             (guix diagnostics)
             (guix i18n)
             (srfi srfi-1)
             (srfi srfi-9)
             (srfi srfi-9 gnu)
             (ice-9 match))

(define-record-type <reimporter>
  (reimporter name pred import)
  reimporter?
  (name   reimporter-name)
  (pred   reimporter-predicate)
  (import reimporter-import))

(define (find-reimporter package)
  (find (lambda (reimporter)
          ((reimporter-predicate reimporter) package))
        %reimporters))

(define (accurate-import? package)
  (define (sexp-field sexp field)
    (match sexp
      (((or 'package 'origin) fields ...)
       (match (assoc field fields)
         ((key value) value)
         (_ #f)))
      (('define-public _ exp)
       (sexp-field exp field))))

  (define (same-source? sexp1 sexp2)
    (equal? (sexp-field (sexp-field sexp1 'source) 'sha256)
            (sexp-field (sexp-field sexp2 'source) 'sha256)))

  (define canonicalize-input
    ;; 'package->code' creates '@' references but importers don't.  Remove
    ;; the '@' to allow comparison.
    (match-lambda
      (("gfortran" _)
       ;; 'package->code' emits nonsense for the value associated with this
       ;; one, so trust the label.
       `("gfortran" ,(list 'unquote 'gfortran)))
      ((label ('unquote ('@ _ variable)) . rest)
       `(,label ,(list 'unquote variable) ,@rest))
      (x x)))

  (define (equivalent-inputs? inputs1 inputs2)
    (if (and inputs1 inputs2)
        (lset= equal?
               (match inputs1
                 (('quasiquote inputs)
                  (map canonicalize-input inputs)))
               (match inputs2
                 (('quasiquote inputs)
                  (map canonicalize-input inputs))))
        (equal? inputs1 inputs2)))

  (let* ((reimporter (find-reimporter package))
         (imported   ((reimporter-import reimporter) package))
         (actual     (package->code package)))
    (define (same-inputs? field)
      (equivalent-inputs? (sexp-field imported field)
                          (sexp-field actual field)))

    (if imported
        (if (and (same-inputs? 'inputs)
                 (same-inputs? 'native-inputs)
                 (same-inputs? 'propagated-inputs))
            (if (same-source? actual imported)
                'accurate
                (begin
                  (warning (package-location package)
                           (G_ "~a: source differs from upstream~%")
                           (package-full-name package))
                  'different-source))
            (begin
              (warning (package-location package)
                       (G_ "~a: inputs differ from upstream~%")
                       (package-full-name package))
              'different-inputs))
        'inconclusive)))

;; Stats.
(define-record-type <accuracy>
  (accuracy accurate different-inputs different-source inconclusive)
  accuracy?
  (accurate         accuracy-accurate)
  (different-inputs accuracy-different-inputs)
  (different-source accuracy-different-source)
  (inconclusive     accuracy-inconclusive))

(define (display-accuracy reimporter accuracy port)
  (define total
    (letrec-syntax ((sum (syntax-rules ()
                           ((_) 0)
                           ((_ get rest ...)
                            (+ (get accuracy) (sum rest ...))))))
      (sum accuracy-accurate
           accuracy-different-inputs
           accuracy-different-source
           accuracy-inconclusive)))

  (define (% fraction)
    (inexact->exact (round (* 100. fraction))))

  (format port (G_ "Accuracy for '~a' (~a packages):~%")
          (reimporter-name reimporter) total)
  (format port (G_ "  accurate: ~a (~d%)~%")
          (accuracy-accurate accuracy)
          (% (/ (accuracy-accurate accuracy) total)))
  (format port (G_ "  different inputs: ~a (~d%)~%")
          (accuracy-different-inputs accuracy)
          (% (/ (accuracy-different-inputs accuracy) total)))
  (format port (G_ "  different source: ~a (~d%)~%")
          (accuracy-different-source accuracy)
          (% (/ (accuracy-different-source accuracy) total)))
  (format port (G_ "  inconclusive: ~a (~d%)~%")
          (accuracy-inconclusive accuracy)
          (% (/ (accuracy-inconclusive accuracy) total))))

(define (random-seed)
  (logxor (getpid) (car (gettimeofday))))

(define shuffle                           ;copied from (guix scripts offload)
  (let ((state (seed->random-state (random-seed))))
    (lambda (lst)
      "Return LST shuffled (using the Fisher-Yates algorithm.)"
      (define vec (list->vector lst))
      (let loop ((result '())
                 (i (vector-length vec)))
        (if (zero? i)
            result
            (let* ((j (random i state))
                   (val (vector-ref vec j)))
              (vector-set! vec j (vector-ref vec (- i 1)))
              (loop (cons val result) (- i 1))))))))


;;;
;;; Reimporters.
;;;

(define pypi-package?                         ;copied from (guix import pypi)
  (url-predicate
   (lambda (url)
     (or (string-prefix? "https://pypi.org/"; url)
         (string-prefix? "https://pypi.python.org/"; url)
         (string-prefix? "https://pypi.org/packages"; url)
         (string-prefix? "https://files.pythonhosted.org/packages"; url)))))

(define guix-package->pypi-name
  (@@ (guix import pypi) guix-package->pypi-name))

(define* (package-sample reimporter
                         #:optional (size (or (and=> (getenv "SAMPLE_SIZE")
                                                     string->number)
                                              20)))
  (let ((pred (reimporter-predicate reimporter)))
    (take (shuffle
           (fold-packages (lambda (package lst)
                            (if (and (pred package)
                                     (not (package-superseded package))
                                     (not (string-prefix? "python2-"
                                                          (package-name 
package))))
                                (cons package lst)
                                lst))
                          '()))
          size)))

(define-syntax-rule (increment record field)
  (set-field record (field) (+ 1 (field record))))

(define (import-accuracy packages)
  (fold (lambda (package accuracy)
          (match (accurate-import? package)
            ('accurate (increment accuracy accuracy-accurate))
            ('different-inputs (increment accuracy accuracy-different-inputs))
            ('different-source (increment accuracy accuracy-different-source))
            ('inconclusive (increment accuracy accuracy-inconclusive))))
        (accuracy 0 0 0 0)
        packages))

(define (package->cran-name package)          ;copied from (guix import cran)
  "Return the upstream name of the PACKAGE."
  (let ((upstream-name (assoc-ref (package-properties package) 'upstream-name)))
    (if upstream-name
        upstream-name
        (match (package-source package)
          ((? origin? origin)
           (match (origin-uri origin)
             ((or (? string? url) (url _ ...))
              (let ((end   (string-rindex url #\_))
                    (start (string-rindex url #\/)))
                ;; The URL ends on
                ;; (string-append "/" name "_" version ".tar.gz")
                (and start end (substring url (+ start 1) end))))
             (_ #f)))
          (_ #f)))))


(define %pypi-reimporter
  (reimporter 'pypi pypi-package?
              (lambda (package)
                (pypi->guix-package
                 (guix-package->pypi-name package)
                 #:version (package-version package)))))

(define %cran-reimporter
  (reimporter 'cran cran-package?
              (lambda (package)
                (cran->guix-package
                 (package->cran-name package)
                 #:version (package-version package)))))

(define crate-package?
  (url-predicate (@@ (guix import crate) crate-url?)))

(define %crate-reimporter
  (reimporter 'crate crate-package?
              (lambda (package)
                (crate->guix-package
                 (guix-package->crate-name package)
                 #:version (package-version package)))))

(define %reimporters
  (list %pypi-reimporter
        %cran-reimporter

        ;; XXX: Useless since Rust packages don't use the normal inputs
        ;; fields.
        ;; %crate-reimporter
        ))

(let ((results (map (compose import-accuracy package-sample) %reimporters)))
  (for-each (lambda (reimporter result)
              (display-accuracy reimporter result
                                (current-output-port)))
            %reimporters
            results))


reply via email to

[Prev in Thread] Current Thread [Next in Thread]