[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [patch 04/19] temporary file for array of cases
From: |
John Darrington |
Subject: |
Re: [patch 04/19] temporary file for array of cases |
Date: |
Thu, 7 Jun 2007 08:05:00 +0800 |
User-agent: |
Mutt/1.5.13 (2006-08-11) |
No problems.
On Tue, Jun 05, 2007 at 11:27:31PM -0700, address@hidden wrote:
There are several places where we want an array of cases stored on
disk. This adds a low-level on-disk case array data structure.
Index: merge/src/data/automake.mk
===================================================================
--- merge.orig/src/data/automake.mk 2007-06-03 01:21:11.000000000
-0700
+++ merge/src/data/automake.mk 2007-06-03 16:46:10.000000000 -0700
@@ -24,6 +24,8 @@
src/data/fastfile-factory.h \
src/data/fastfile-factory.c \
src/data/case.h \
+ src/data/case-tmpfile.c \
+ src/data/case-tmpfile.h \
src/data/category.c \
src/data/category.h \
src/data/data-in.c \
Index: merge/src/data/case-tmpfile.c
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ merge/src/data/case-tmpfile.c 2007-06-03 17:21:17.000000000 -0700
@@ -0,0 +1,254 @@
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+#include <config.h>
+
+#include <data/case-tmpfile.h>
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <libpspp/assertion.h>
+#include <libpspp/taint.h>
+
+#include "error.h"
+#include "xalloc.h"
+
+#include "gettext.h"
+#define _(msgid) gettext (msgid)
+
+/* A temporary file that stores an array of cases. */
+struct case_tmpfile
+ {
+ struct taint *taint; /* Taint. */
+ FILE *file; /* Underlying file. */
+ size_t value_cnt; /* Number of `union value's per case. */
+
+ /* Current byte offset in file. We track this manually,
+ instead of using ftello, because in glibc ftello flushes
+ the stream buffer, making the common case of sequential
+ access to cases unreasonably slow. */
+ off_t position;
+ };
+
+/* Creates and returns a new case_tmpfile. */
+struct case_tmpfile *
+case_tmpfile_create (size_t value_cnt)
+{
+ struct case_tmpfile *ctf = xmalloc (sizeof *ctf);
+ ctf->taint = taint_create ();
+ ctf->file = tmpfile ();
+ if (ctf->file == NULL)
+ {
+ error (0, errno, _("failed to create temporary file"));
+ taint_set_taint (ctf->taint);
+ }
+ ctf->value_cnt = value_cnt;
+ ctf->position = 0;
+ return ctf;
+}
+
+/* Destroys case_tmpfile CTF.
+ Returns true if CTF was tainted, which is caused by an I/O
+ error on case_tmpfile access or by taint propagation to the
+ case_tmpfile. */
+bool
+case_tmpfile_destroy (struct case_tmpfile *ctf)
+{
+ bool ok = true;
+ if (ctf != NULL)
+ {
+ struct taint *taint = ctf->taint;
+ if (ctf->file != NULL)
+ fclose (ctf->file);
+ free (ctf);
+ ok = taint_destroy (taint);
+ }
+ return ok;
+}
+
+/* Returns true if CTF is tainted, which is caused by an I/O
+ error on case_tmpfile access or by taint propagation to the
+ case_tmpfile. */
+bool
+case_tmpfile_error (const struct case_tmpfile *ctf)
+{
+ return taint_is_tainted (ctf->taint);
+}
+
+/* Marks CTF as tainted. */
+void
+case_tmpfile_force_error (struct case_tmpfile *ctf)
+{
+ taint_set_taint (ctf->taint);
+}
+
+/* Returns CTF's taint object. */
+const struct taint *
+case_tmpfile_get_taint (const struct case_tmpfile *ctf)
+{
+ return ctf->taint;
+}
+
+/* Seeks CTF's underlying file to the start of `union value'
+ VALUE_IDX within case CASE_IDX.
+ Returns true if the seek is successful and CTF is not
+ otherwise tainted, false otherwise. */
+static bool
+do_seek (const struct case_tmpfile *ctf_,
+ casenumber case_idx, size_t value_idx)
+{
+ struct case_tmpfile *ctf = (struct case_tmpfile *) ctf_;
+
+ if (!case_tmpfile_error (ctf))
+ {
+ off_t value_ofs = value_idx + (off_t) ctf->value_cnt * case_idx;
+ off_t byte_ofs = sizeof (union value) * value_ofs;
+
+ if (ctf->position == byte_ofs)
+ return true;
+ else if (fseeko (ctf->file, byte_ofs, SEEK_SET) == 0)
+ {
+ ctf->position = byte_ofs;
+ return true;
+ }
+ else
+ {
+ error (0, errno, _("seeking in temporary file"));
+ case_tmpfile_force_error (ctf);
+ }
+ }
+
+ return false;
+}
+
+/* Reads BYTES bytes from CTF's underlying file into BUFFER.
+ CTF must not be tainted upon entry into this function.
+ Returns true if successful, false upon an I/O error (in which
+ case CTF is marked tainted). */
+static bool
+do_read (const struct case_tmpfile *ctf_, size_t bytes, void *buffer)
+{
+ struct case_tmpfile *ctf = (struct case_tmpfile *) ctf_;
+
+ assert (!case_tmpfile_error (ctf));
+ if (fread (buffer, bytes, 1, ctf->file) != 1)
+ {
+ case_tmpfile_force_error (ctf);
+ if (ferror (ctf->file))
+ error (0, errno, _("reading temporary file"));
+ else if (feof (ctf->file))
+ error (0, 0, _("unexpected end of file reading temporary file"));
+ else
+ NOT_REACHED ();
+ return false;
+ }
+ ctf->position += bytes;
+ return true;
+}
+
+/* Writes BYTES bytes from BUFFER into CTF's underlying file.
+ CTF must not be tainted upon entry into this function.
+ Returns true if successful, false upon an I/O error (in which
+ case CTF is marked tainted). */
+static bool
+do_write (struct case_tmpfile *ctf, size_t bytes, const void *buffer)
+{
+ assert (!case_tmpfile_error (ctf));
+ if (fwrite (buffer, bytes, 1, ctf->file) != 1)
+ {
+ case_tmpfile_force_error (ctf);
+ error (0, errno, _("writing to temporary file"));
+ return false;
+ }
+ ctf->position += bytes;
+ return true;
+}
+
+/* Reads VALUE_CNT values into VALUES, from the case numbered
+ CASE_IDX starting START_VALUE values into that case.
+ Returns true if successful, false if CTF is tainted or an I/O
+ error occurs during the operation.
+
+ The results of this function are undefined if any of the
+ values read have not been previously written to CTF. */
+bool
+case_tmpfile_get_values (const struct case_tmpfile *ctf,
+ casenumber case_idx, size_t start_value,
+ union value values[], size_t value_cnt)
+{
+ assert (value_cnt <= ctf->value_cnt);
+ assert (value_cnt + start_value <= ctf->value_cnt);
+
+ return (do_seek (ctf, case_idx, start_value)
+ && do_read (ctf, sizeof *values * value_cnt, values));
+}
+
+/* Reads the case numbered CASE_IDX from CTF into C.
+ Returns true if successful, false if CTF is tainted or an I/O
+ error occurs during the operation.
+
+ The results of this function are undefined if the case read
+ from CTF had not previously been written. */
+bool
+case_tmpfile_get_case (const struct case_tmpfile *ctf, casenumber
case_idx,
+ struct ccase *c)
+{
+ case_create (c, ctf->value_cnt);
+ if (case_tmpfile_get_values (ctf, case_idx, 0,
+ case_data_all_rw (c), ctf->value_cnt))
+ return true;
+ else
+ {
+ case_destroy (c);
+ case_nullify (c);
+ return false;
+ }
+}
+
+/* Writes VALUE_CNT values from VALUES, into the case numbered
+ CASE_IDX starting START_VALUE values into that case.
+ Returns true if successful, false if CTF is tainted or an I/O
+ error occurs during the operation. */
+bool
+case_tmpfile_put_values (struct case_tmpfile *ctf,
+ casenumber case_idx, size_t start_value,
+ const union value values[], size_t value_cnt)
+
+{
+ assert (value_cnt <= ctf->value_cnt);
+ assert (value_cnt + start_value <= ctf->value_cnt);
+
+ return (do_seek (ctf, case_idx, start_value)
+ && do_write (ctf, sizeof *values * value_cnt, values));
+}
+
+/* Writes C to CTF as the case numbered CASE_IDX.
+ Returns true if successful, false if CTF is tainted or an I/O
+ error occurs during the operation. */
+bool
+case_tmpfile_put_case (struct case_tmpfile *ctf, casenumber case_idx,
+ struct ccase *c)
+{
+ bool ok = case_tmpfile_put_values (ctf, case_idx, 0,
+ case_data_all (c), ctf->value_cnt);
+ case_destroy (c);
+ return ok;
+}
+
Index: merge/src/data/case-tmpfile.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ merge/src/data/case-tmpfile.h 2007-06-03 17:02:16.000000000 -0700
@@ -0,0 +1,55 @@
+/* PSPP - computes sample statistics.
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA. */
+
+/* Manager for temporary files, each of which stores an array of
+ like-size cases.
+
+ Partial and whole cases may be read from and written to a
+ case_tmpfile in random order. The indexes of the cases
+ written in a case_tmpfile need not be sequential or start from
+ 0 (although this will be inefficient if the file system does
+ not support sparse files). The case_tmpfile does not track
+ which cases have been written, so the client is responsible
+ for reading data only from cases (or partial cases) that have
+ previously been written. */
+
+#ifndef DATA_CASE_TMPFILE_H
+#define DATA_CASE_TMPFILE_H 1
+
+#include <data/case.h>
+
+struct case_tmpfile *case_tmpfile_create (size_t value_cnt);
+bool case_tmpfile_destroy (struct case_tmpfile *);
+
+bool case_tmpfile_error (const struct case_tmpfile *);
+void case_tmpfile_force_error (struct case_tmpfile *);
+const struct taint *case_tmpfile_get_taint (const struct case_tmpfile *);
+
+bool case_tmpfile_get_values (const struct case_tmpfile *,
+ casenumber, size_t start_value,
+ union value[], size_t value_cnt);
+bool case_tmpfile_get_case (const struct case_tmpfile *,
+ casenumber, struct ccase *);
+
+bool case_tmpfile_put_values (struct case_tmpfile *,
+ casenumber, size_t start_value,
+ const union value[], size_t value_cnt);
+bool case_tmpfile_put_case (struct case_tmpfile *,
+ casenumber, struct ccase *);
+
+#endif /* data/case-tmpfile.h */
--
_______________________________________________
pspp-dev mailing list
address@hidden
http://lists.gnu.org/mailman/listinfo/pspp-dev
--
PGP Public key ID: 1024D/2DE827B3
fingerprint = 8797 A26D 0854 2EAB 0285 A290 8A67 719C 2DE8 27B3
See http://pgp.mit.edu or any PGP keyserver for public key.
signature.asc
Description: Digital signature
- Re: [patch 11/19] fix assertion in binary_search, (continued)
- [patch 18/19] fairly experimental changes based on what seemed to make sense, blp, 2007/06/06
- [patch 10/19] new function value_cnt_from_width, blp, 2007/06/06
- [patch 02/19] new function get_workspace_cases, blp, 2007/06/06
- [patch 06/19] sliding window of cases, blp, 2007/06/06
- [patch 04/19] temporary file for array of cases, blp, 2007/06/06
- Re: [patch 04/19] temporary file for array of cases,
John Darrington <=
- [patch 09/19] casegrouper implementation, blp, 2007/06/06
- [patch 01/19] move casenumber to case.h, blp, 2007/06/06
- [patch 13/19] add case initialization infrastructure, blp, 2007/06/06
- [patch 17/19] Start writing developers guide., blp, 2007/06/06