lilypond-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

LSR dump


From: Werner LEMBERG
Subject: LSR dump
Date: Thu, 03 Oct 2019 13:02:09 +0200 (CEST)

Folks,


attached you can find a text dump of the current version of the LSR
database, together with the script that I've written to create it.


    Werner
# lsrdump.pl
#
# Written 2019 by Werner Lemberg <address@hidden>


# Convert the LSR database dump available from
#
#   http://lsr.di.unimi.it/download/
#
# into plain text, omitting the images.
#
#
# Usage:
#
#   perl lsrdump.pl < lsr.mysqldump > lsrdump.txt


use strict;
use warnings;

# Access mysqldump files without the need of mysql tools.
use MySQL::Dump::Parser::XS;


# We open the LSR database dump in binary mode since it contains PNG images. 
binmode(STDIN);


my $parser = MySQL::Dump::Parser::XS->new;

my %tables;
my %tags;

my $table;


# Parse input and store all table entries in the `%tables' hash.
while (my $line = <STDIN>) {
  my @entries = $parser->parse($line);
  my $table_name = $parser->current_target_table();

  push @{$tables{$table_name}} => @entries if $table_name;
}


# Access entries of `tag' table and build a hash to map ID numbers onto the
# corresponding tag names.
$table = $tables{"tag"};

foreach my $entry (@{$table}) {
  $tags{$entry->{"id"}} = $entry->{"name"};
}


# Access entries of `snippet' table.
$table = $tables{"snippet"};

# Tag fields are called `id_tag0_tag', `id_tag1_tag', etc.
sub add_tag_name {
  my ($idx, $entry) = @_;

  if (defined($entry->{"id_tag${idx}_tag"})) {
    my $tag = $entry->{"id_tag${idx}_tag"};
    $entry->{"id_tag${idx}_tag"} = $tags{$tag};
  }
}

# Replace tag IDs with tag names.
foreach my $entry (@{$table}) {
  foreach my $idx (0 .. 6) {
    add_tag_name($idx, $entry);
  }
}

# Emit a sorted dump of all snippets.
my @column_names = $parser->columns("snippet");

foreach my $entry (sort { $a->{"id"} <=> $b->{"id"} } @{$table}) {
  for my $name (@column_names) {
    # Ignore binary data.
    next if $name eq "image";
    next if $name eq "largeimage";

    # Ignore unset fields.
    next if !defined($entry->{$name});

    my $tag = "$name: ";
    print $tag;

    my $data = $entry->{$name};

    # Make line endings uniform.
    $data =~ s/(\015\012?|\012)/\n/g;

    # Remove trailing whitespace from every line.
    $data =~ s/ +$//gm;

    # Remove leading and trailing empty lines.
    $data =~ s/^\n+//;
    $data =~ s/\n+$//;

    # Insert a prefix to indicate continuation lines for nicer reading.
    my $prefix = " " x (length($tag) - 2) . "| ";
    my $n = 0;
    $data =~ s/^/$n++ ? "$prefix" : $&/gme; # Skip the first match.

    print "$data\n";
  }

  print "\n";
}

print "END OF DUMP\n"

# eof

Attachment: lsr-2019-10-03.txt.xz
Description: Binary data


reply via email to

[Prev in Thread] Current Thread [Next in Thread]