# DBUPLOAD.pm
# copyright (c) 1999 akopia, inc.
#
########################################################################
#    This program is free software; you can redistribute it and/or
#    modify it under the terms of version 2 of the GNU General Public
#    License as published by the Free Software Foundation.
#    
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
#    General Public License for more details.
########################################################################

#
# This package has not been completed.  At least, not the way I envisioned
# it.  See the section titled 'status of test suite' below for more info.
#
# Wow.  This turned into a scripter.  Sort of.
# Ahem. 
# Doh!
# ...sigh.
#

#
# 
# AVAILABLE FUNCTIONS AND VARIABLES DEFINED IN THIS PACKAGE:
#
# $errmsg
#
# &dbupload_save_file_to_disk
# &dbupload_save_data_to_disk
#
# &dbupload_get_columns
# &dbupload_rip_it
# &dbupload_create_instruction_set_from_form
# &dbupload_setup_initial_form
# &dbupload_get_table_o_data
#
#
# =============================================================================
#
# The philosophy of the database uploader:
#
# We expect a table of data; one record per line, and column delimited in
# some way.  The delimiter is, of course, specified by the user.
#
# The problem is that we have no idea what format the data will be coming
# in as.  We want this system to be able to deal with as many formats as
# possible.  We have therefore created a "formatless" upload.  The user
# can upload the data in whatever format he wants, and then we try to make
# the best sense of it that we can.  As part of the process, the user has
# to specify what every column represents.
#
# The general idea is to do something reasonable with the data.  In other 
# words, we're trying to implement a "do what I mean" uploader.  That means
# that, in general, the system will behave in the way that you expect it to.
# In order to get that kind of behavior, I had to jump through all sorts of
# hoops;  the result is a big, unwieldly beast of thing.  A lot of the 
# structure and organization of the code was determined by asking "Well,
# what if the user's data looks like _this_.  That would be reasonable.  And
# if it _did_ come in like that, we would have to process it thusly..."
#
# We put in features to allow the user to do pretty much anything he
# wants to.  The problem is that all sorts of confusing and illegal constructs
# are possible, to which the system may or may not react favorably.
#
# Your best bet is to enter the data in a way that you think is fairly simple
# and useful, and let the system do its thing.  After all, that's what it
# was designed to do...
#
# And so, there is no clear algorithmic structure to the thing.  The result
# is a lot of little loopholes in the algorithm -- you can abuse the 
# functionality in a way that doesn't make (common) sense, but, because
# of the way it works, may do something useful.  That's why I said that
# it was almost a scripter.  The test data suite has some examples of what
# I mean.
#
# Hopefully, if people need to do more than what is available here, they
# will be able to modify the code relatively easily... or at least, borrow
# a lot of it, so they won't have to write their own uploader.
#
# Caveat user.
#

#
# =============================================================================
#
# The status of the test suite and the uploader:
#
#   There are 17 test upload files, which are in varying stages of completion.
# The ones that are finished, and work like they say they work, have an 'X'
# next to them.  The rest list what exactly is missing:
#
# td.1.txt   X
# td.2.txt     (needs 's' matcher)
# td.3.txt   X
# td.4.txt     (needs snippet match, put in page)
# td.5.txt     (needs 's')
# td.6.txt     (needs 's', put in page)
# td.7.txt   X
# td.8.txt   X 
# td.9.txt     (needs variant match)
# td.10.txt    (needs variant match)
# td.11.txt  X
# td.12.txt    (needs stock match)
# td.13.txt    (needs stock match)
# td.14.txt    (needs 's', differentiate 'c' and 'u' on snippets)
# td.15.txt    (the test case itself is incomplete)
# td.16.txt  X
# td.17.txt  X
#
# As you can see, there are really only a few major things that we still
# need to do.  However, those are the really hard things.
#
# The 's' match and the 'put in page' idea is good, but unfortunately,
# due to the nature of the way the site organizer works, it looks like
# it's going to be impossible to get working.  We have no idea what
# it means to 'in a page,' or 'in a group.'  I suppose we could have the
# user specify it... either at a row level, or in the header.  Hmmmmmm.
# We'll have to wait for Jason to finish the graph thing for that to work.
#
# Stock, variant and snippet matches each have their own problems.  
# I'm actually not so convinced that any of them are a good idea.
#
# Also, I would like to add functionality to match on an ID of some sort.
# That way, you could change the name (if you wanted to).  Later.
#
# =============================================================================
#
# Instruction set grammar specification
# 
# An instruction set is a set of key-value pairs.  Each column has it's
# own instructions, which operate (relatively) independently of each other.
#
# The first two characters of an instruction line _must_ be '##', or it will
# not be recognized as an instruction line.
#
# Below are all of the tokens that can appear for each column.  Each should
# be prefixed by the column number to which it applies (starting the counting
# at 1).  For example:
#
## 1_type=item; 1_field=Name;   1_attr=mc
## 2_type=stock; 2_field=price; 2_attr=u
#
# Some notes:
#
#   the single letters on the 'attr' field stand for match ('m'), 
#     create ('c'), update ('u'), and subthings ('s'; this will select the 
#     children of a page or group).
#
#   the single letters on the 'clean' field stand for all extra white space
#     ('a'), leading/trailing white space ('l'), single quotes ('s'), double 
#     quotes ('q'), non alpha-numeric characters ('n'), and non-digit 
#     characters ('d').
#
#   the single letters on the 'ignoretype' and 'matchtype' fields stand for
#     substring ('s'), exact ('e'), and case-insensitive ('i').
#

# -----------------------------------------------------------------------------

# type=item
# field={Name | id | <any snippet>}
# thingtype_name=<string>
# thingtype_label=<string>
# sniptype_label=<string>
# sniptype_default=<string>
# sniptype_widgettype={normal | checkbox | selectionbox | radio | textarea | 
#                      password | hidden}
# sniptype_widgetinfo=<string>
# attr={any combination of m , c , u}     
# clean={any combination of u , a , l , s , q , n , d}
# ignorestr=<string>
# ignoretype={any combination of s , e , i}
# matchtype={any combination of s , e , i}

# -----------------------------------------------------------------------------

# type={group | page}
# field={Name | id | <any snippet>}
# thingtype_name=<string>
# thingtype_label=<string>
# sniptype_label=<string>
# sniptype_default=<string>
# sniptype_widgettype={normal | checkbox | selectionbox | radio | textarea | 
#                      password | hidden}
# sniptype_widgetinfo=<string>
# attr={any combination of m , c , u , s}     
# clean={any combination of u , a , l , s , q , n , d}
# ignorestr=<string>
# ignoretype={any combination of s , e , i}
# matchtype={any combination of s , e , i}

# -----------------------------------------------------------------------------

# type=stock
# field={stock_id | sku | price | weight | volume | atp | rt | drq}
# attr={any combination of m , c}     
# clean={any combination of u , a , l , s , q , n , d}
# ignorestr=<string>
# ignoretype={any combination of s , e , i}
# matchtype={any combination of s , e , i}

# -----------------------------------------------------------------------------

# type={item | group | page}_sniptype
# field={name | label | default | widgettype | widgetinfo}
# clean={any combination of u , a , l , s , q , n , d}
# ignorestr=<string>
# ignoretype={any combination of s , e , i}

# -----------------------------------------------------------------------------

# type={item | group | page}_thingtype
# field={name | label}
# clean={any combination of u , a , l , s , q , n , d}
# ignorestr=<string>
# ignoretype={any combination of s , e , i}

# -----------------------------------------------------------------------------

# type=matrix

# -----------------------------------------------------------------------------

# type=dim
# attr=c
# dim=<dimension name>
# matrix=<matrix name>
# split=<character to use as a splitter>

# -----------------------------------------------------------------------------

# type=range
# attr={m | c}
# dim=<dimension name>
# matrix=<matrix name>

# -----------------------------------------------------------------------------

#
# XXX added these for the database exporter:
#     (this is really a hack, and should be redone [correctly] at some point)
# type=cknarkey
# type=cknarval
# type=cknargroup
# type=cmatrix
# type=cdim
# type=crange
# type=citemtype_name
# type=citemtype_label
# type=cpagetype_name
# type=cpagetype_label
# type=cgrouptype_name
# type=cgrouptype_label
#

#
# =============================================================================
#

package DBUPLOAD;

use strict;
require ITEM;
require PAGE;
require DBLIB;
require GROUP;
require STOCK;
require MATRIX;
require THINGTYPE;
require SEC;

# this determines whether or not debug output is generated.  1=yes, 0=no

my $GEN_DEBUG_LOG = 1;

# this will force the debug log to be shown, regardless of whether or not
# a show_debug_log instruction is specified. (good for debugging, bad for
# production.

my $FORCE_SHOW_DEBUG_LOG = 0;

# this is so that the debug_log is erased once per session.
# XXX how does this work with mod_perl?

my $DEBUG_LOG_PRIMED = 0;

# this is the current line num in the input file

my $linenum;

# these strings are used to implement required parameters (such as name),
# when the user hasn't specified the parameter.

my $NEWOBJSTR          = 'DBUPLOAD -- NEW';
my $NEWTHINGTYPE_NAME  = 'NEW GENERIC THINGTYPE';
my $NEWTHINGTYPE_LABEL = 'NEW GENERIC THINGTYPE LABEL';
my $NEWMATRIX          = 'NEW GENERIC MATRIX';

# this variable contains an error message, which is accessible by the
# calling document.

use vars qw($errmsg %stat_set $items_to_be_analyzed);

#
# you know what?  I wonder why I'm not using global variables for the
# instruction set, and the object set, and the columns.   I bet it doesn't
# perform very well, having to pass those around all the time.
#

#
# -----------------------------------------------------------------------------
#
# expects an open file handle to be passed; writes the data to 
# the standard database upload file.
#

sub dbupload_save_file_to_disk {
  my ($fh, $fname, $buffer, $total_bytes, $bytes_read);

  $fh = shift;
  $fname = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.txt";
  SEC::untaint_ref(\$fname);

  if (!open(FILE, ">$fname")) {
    $errmsg = "Could not open file $fname for writing: $!";
    return(0);
  }

  $total_bytes = 0;
  $bytes_read = read($fh, $buffer, 32768);

  while ($bytes_read != 0) {
    $total_bytes += $bytes_read;
    print FILE $buffer;
    $bytes_read = read($fh, $buffer, 32768);
  }

  close FILE;

  if ($total_bytes == 0) {
    $errmsg = "0 bytes read!  Check your CGI.pm version!";
    return(0);
  } else {
    return(1);
  }
}

#
# -----------------------------------------------------------------------------
#
# expects a scalar; writes the scalar to the standard database upload file.
#

sub dbupload_save_data_to_disk {
  my ($fname, $data);

  $data = shift;
  $fname = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.txt";
  SEC::untaint_ref(\$fname);

  if (!open(FILE, ">$fname")) {
    $errmsg = "Could not open file $fname for writing: $!";
    return(0);
  }

  print FILE $data;

  close FILE;

  if (length $data == 0) {
    $errmsg = "0 bytes written!  Did you give me the right variable? Was there any data to write?";
    return(0);
  } else {
    return(1);
  }
}

#
# -----------------------------------------------------------------------------
#

sub debug_log {
  my($i, $j, @a, $fname);

  # this analyzes the stack level, so that we can automatically indent
  # calls to the logger!

  while (do { { package DB; @a = caller($i++) } } ) { };

  $i -= 4;

  if ($GEN_DEBUG_LOG == 1) {
    my($msg) = shift;
    $fname = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.debuglog.txt";
    SEC::untaint_ref(\$fname);
    open(HOHUM, ">>$fname");
    for ($j=0; $j < $i; $j++) {
      print HOHUM "  ";
    }
    print HOHUM "$msg\n";
    close(HOHUM);
  }
}

#
# -----------------------------------------------------------------------------
#
# this just erase whatever was there previously

sub debug_log_remove {
  if (($GEN_DEBUG_LOG == 1) && (!$DEBUG_LOG_PRIMED)) {
    my($fname) = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.debuglog.txt";
    SEC::untaint_ref(\$fname);

    open(HOHUM, ">$fname");
    close(HOHUM);

    $DEBUG_LOG_PRIMED=1;
  }
}

#
# -----------------------------------------------------------------------------
#
# returns the contents of the debug log in a single scalar.

sub debug_log_get {
  my($results) = "";
  my($fname) = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.debuglog.txt";
  SEC::untaint_ref(\$fname);

  open(HOHUM, "<$fname");

  while (<HOHUM>) {
    $results .= $_;
  }

  close(HOHUM);
  return($results);
}

#
# -----------------------------------------------------------------------------
#
# logs all of the instructions to the debug log

sub debug_log_iset {
  my($i_set_ref) = shift;
  my($key);

  my($fname) = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.debuglog.txt";
  SEC::untaint_ref(\$fname);

  open(HOHUM, ">>$fname");

  print HOHUM "===== Instruction set =====\n";

  foreach $key (sort(keys(%$i_set_ref))) {
    if (ref(${$i_set_ref}{$key}) eq "ARRAY") {
      print HOHUM "[$key][arrayref] => [" . join(", ", @{${$i_set_ref}{$key}}) . "]\n";
    } else {
      print HOHUM "[$key] => [" . ${$i_set_ref}{$key} . "]\n";
    }
  }

  close(HOHUM);
}

#
# =============================================================================
#
# =============================================================================
#
# Actually rips the data.
#
# An instruction set is a set of parameters that tells dbupload how to 
# rip the data.  It is represented by a hash, but only a reference to the
# hash is actually passed around to the various subroutines.  An object set
# is a set of objects upon which to operate.  It is also a hash, although
# only a reference to it is passed about.  The same thing is true for a
# statistics set.
#

#
# this function expects a reference to an intial [analyzed] instruction set.  
# Typically, it will have been created from form data.  It can also be empty, 
# but nothing will be done with the data in the database file until an inline
# instruction set is encountered.
#

sub dbupload_rip_it {
  my($csep_t, $csep, $i_set_ref, $object_set, $fname);
  my(@cols, $i, $line);

  my(@dave);

  debug_log_remove();

  # open the file
  $fname = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.txt";
  SEC::untaint_ref(\$fname);

  if (!open(FILE, "$fname")) {
    stat_set_add_error("Couldn't open upload file: $!");
    return(\%stat_set);
  }

  $csep_t = shift;
  $csep = get_csep_given_type($csep_t);

  $i_set_ref = shift;
  initialize_stat_set();
  DBLIB::db_transact_begin();

  $items_to_be_analyzed = {};

  $linenum=0;
  LINE: while ($line = <FILE>) {
    $linenum++;
    $stat_set{'total_lines'}++;

    # pre-process the row, checking for comments, new instruction sets, 
    # and empty rows.

    if (!pre_process_line($csep, \$line, $i_set_ref)) {
      next LINE; 
    }

    # split each line
    # see the 'perlfunc' man page entry on split for why we use the -1.
    # basically, it's so that trailing null fields aren't stripped.

    @cols = split(/$csep/, $line, -1);
    unshift @cols, 'blank';

    # check to see if we should ignore this row

    if ( ignore_row($i_set_ref, @cols) ) {
      next LINE;
    }

    $stat_set{'data_lines'}++;
    debug_log('*** data line ***');

    # clean this row

    @cols = clean_row($i_set_ref, @cols);

    for ($i=0; $i<$#cols +1; $i++) {
      debug_log("col $i [" . $cols[$i] . "]");
    }

    # create a set of objects that match the specified parameters

    $object_set = match_and_create_objects($i_set_ref, @cols);
 
    # update matching objects according to instructions specified

    update_objects($object_set, $i_set_ref, @cols);

    # XXX implement the database exporter hack

    exporter_hack($object_set, $i_set_ref, @cols);

  }

  analyze_all_items_that_need_to_analyzed();

  DBLIB::db_transact_end();

  close (FILE);

  # unlink the dbupload file

  unlink($fname);

  # unlink the debuglog file
  
  if ($GEN_DEBUG_LOG == 1) {
    if (${$i_set_ref}{'show_debug_log'} || ($FORCE_SHOW_DEBUG_LOG == 1)) {
      $stat_set{'debuglog_file_contents'} = debug_log_get();
    }
  }

  if ($stat_set{'in_iset'} == 1) {
    stat_set_add_warning("the system encountered inline instruction lines at 
                          the end of the file, but no data lines followed 
                          them!  Perhaps your file was incomplete?");
  }

  return(\%stat_set);
}

#
# -----------------------------------------------------------------------------
#
# checks for instruction, comment, and empty lines; gets rid of some
# nasty characters, etc.
#

sub pre_process_line {
  my($csep, $line_ref, $i_set_ref, $tmpref) = @_;

  # is it an instruction line?

  if ($$line_ref =~ /^##/) {
    $stat_set{'instruction_lines'}++;
    if (!defined($stat_set{'in_iset'}) || ($stat_set{'in_iset'} == 0)) {
      wipe_out_instruction_set($i_set_ref);
      $stat_set{'in_iset'} = 1;
    }
    $tmpref = parse_instruction_set_line($csep, $$line_ref, $i_set_ref);
    %$i_set_ref = %$tmpref;
    return 0;

  # is it a comment line? ignore if so.

  } elsif ($$line_ref =~ /^#/) {
    $stat_set{'comment_lines'}++;
    debug_log('comment line');
    return 0;
  }

  # is it an empty line? ignore.

  chomp $$line_ref;
  $$line_ref =~ s/\r//g;
  if ($$line_ref eq "") { 
    $stat_set{'empty_lines'}++;
    debug_log('empty line');
    return 0; 
  }

  # well, if it's none of those, and we've added new instruction items,
  # post process the instruction set -- set up ignore_cols, clean_cols, etc.

  if ($stat_set{'in_iset'} == 1) {
    if (!analyze_instruction_set($i_set_ref)) {
      # XXX throw an error condition here.  Might not have verified properly.
    }
    $stat_set{'in_iset'} = 0;
  }

  return (1);
}

#
# -----------------------------------------------------------------------------
#
# just splits all of the instructions on the line, and populates the 
# instruction hash.  basically.
#
# each instruction is a key-value pair.  pairs are delimited by a
# semi-colon, followed by whitespace.  that's so that if you need to use a 
# semi-colon as a value, you can do something like this: 
#   5_split=;; 
# the same thing goes for the equals sign; after splitting on semi-colons, 
# it splits on = signs, but will only split into two items.  you can say
#   5_split==;
# if you need to.
#
# special instructions may be specified in line as well.  The following
# special instructions are recognized: ignore_inline_instructions,
# dont_unescape_headers, disable_matching, and show_debug_log.
#
# Format is just like this:
#
## ignore_inline_instructions; show_debug_log;
#
# XXX currently, there is no way to _disable_ those special flags once they've
# been defined.
#

sub parse_instruction_set_line {
  my($csep, $line, $old_i_set_ref) = @_;
  my($token, $key, $val, @icols, %i_hash, $wnum, $c, $tok);

  if (${$old_i_set_ref}{'ignore_inline_instructions'}) {
    debug_log('ignoring instruction line');
    return;
  }

  debug_log('parsing instruction line');

  %i_hash = %$old_i_set_ref;

  $line =~ s/^##//;

  @icols = split(/;(\s)+/, $line);
  foreach $token (@icols) {

    # first of all, process special instructions.

    if (($token =~ /show_debug_log/i) ||
        ($token =~ /ignore_inline_instructions/i) ||
        ($token =~ /dont_unescape_headers/i) ||
        ($token =~ /disable_matching/i)) {
      $token =~ s/^(\s)*//;  # clean leading and trailing whitespace from key
      $token =~ s/(\s)*$//;
      $token =~ tr/A-Z/a-z/;
      SEC::untaint_ref(\$token);    
      $i_hash{$token} = 1;
      next;
    }

    # next, process regular instructions.

    ($key, $val) = split(/=/, $token, 2);
    $key =~ s/^(\s)*//;  # clean leading and trailing whitespace from key
    $key =~ s/(\s)*$//;
    $val =~ s/^(\s)*//;  # clean leading and trailing whitespace from val
    $val =~ s/(\s)*$//;
    if (!($i_hash{'dont_unescape_headers'} == 1)) {
      $val =~ s/%(..)/pack("c",hex($1))/ge; # unescape header
    }

    if ($key =~ /\S/) {  # make sure that the key actually contains something

      # is there already something there?  well, that's probably a user
      # error of some sort... a mistyped column number, perhaps?  just
      # warn 'em.

      if (defined($i_hash{$key})) {
        stat_set_add_warning("possibly mistyped column specifier 
                              found on line # $linenum -- $key = $val");
      }

      # check and make sure it's a recognized instruction

      ($c, $tok) = split(/_/,$key,2);
      if (($tok ne 'type') && ($tok ne 'field') && ($tok ne 'attr') &&
          ($tok ne 'clean') && ($tok ne 'split') && ($tok ne 'dim') &&
          ($tok ne 'matrix') && ($tok ne 'ignorestr') &&
          ($tok ne 'ignoretype') && ($tok ne 'matchtype') &&
          ($tok ne 'thingtype_name') && ($tok ne 'thingtype_label') &&
          ($tok ne 'sniptype_label') && ($tok ne 'sniptype_default') &&
          ($tok ne 'sniptype_widgettype') && ($tok ne 'sniptype_widgetinfo')) {

        stat_set_add_warning("skipping unrecognized instruction found on 
                              line # $linenum -- $key = $val");

      } elsif ($c !~ /\d+/) {

        stat_set_add_warning("bad column number in instruction found on
                              line # $linenum -- $key = $val");

      } else {
        SEC::untaint_ref(\$key);
        $i_hash{$key} = $val;
      }

    }

    # we use this to tell us how many columns we need to process when
    # we're analyzing the instruction set.

    $key =~ /(\d+)_(.+)/;
    if (!defined($i_hash{'biggest_field_found'}) ||
        ($1 > $i_hash{'biggest_field_found'})) {
      $i_hash{'biggest_field_found'} = $1;
    }
    
  }

  return (\%i_hash);
}

#
# -----------------------------------------------------------------------------
#

sub wipe_out_instruction_set {
  my($i_set_ref, %hohum);
  $i_set_ref = shift;
  %$i_set_ref = %hohum;
}

#
# -----------------------------------------------------------------------------
#
# are there any columns in this row that would cause us to ignore this row?
#
# here, there are three possible characters for the instruction:
#   's' -- substring match
#   'e' -- exact match
#   'i' -- case insensitive match
#
# 's' and 'e' are mutually exclusive.  if both are specified, the substring
# match will be executed.  there is no compelling reason that the substring
# match is the default... that's just the way I coded it.  oh well.
# if neither are specified, it will execute an exact match.
#

sub ignore_row {
  my($i_set_ref, @cols) = @_;
  my($i, $j, $type, $istr, @ignore_cols, $datastr);

  @ignore_cols = @{${$i_set_ref}{'ignore_cols'}};

  for ($i=0; $i < $#ignore_cols+1; $i++) {

    $j = $ignore_cols[$i];
    $type = ${$i_set_ref}{"${j}_ignoretype"};
    $datastr = $cols[$j];
    $istr = ${$i_set_ref}{"${j}_ignorestr"};

    # will it be a case-insensitive match?
    # XXX what about internationalization here???

    if ($type =~ /i/) {
      $datastr =~ tr/A-Z/a-z/;
      $istr =~ tr/A-Z/a-z/;
    }

    # is it a substring match?

    if ($type =~ /s/) {
      if ( $datastr =~ /$istr/) { 
        $stat_set{'ignored_lines'}++;
        debug_log("line ignored -- [$datastr] =~ /$istr/"); 
        return 1; 
      }

    # is it an exact match?

# by commenting this out, it allows the system to perform an exact
# match in case neither 'e' nor 's' was specified.
#    } elsif ($type =~ /e/) {

    } else {
      if ( $datastr eq $istr) { 
        $stat_set{'ignored_lines'}++;
        debug_log("line ignored -- [$datastr] eq [$istr]"); 
        return 1;
      }

    }

  }

  return 0;
}

#
# -----------------------------------------------------------------------------
#
# here, we clean each column in the row.
#
#  u = url-style unescape
#  a = all extra white space
#  l = leading and trailing white space 
#  s = single quotes
#  q = double quotes
#  n = non-alpha numeric characters
#  d = non-digit characters
#

sub clean_row {
  my($i_set_ref, @cols) = @_;
  my($i, $j, @clean_cols, $command);

  @clean_cols = @{${$i_set_ref}{'clean_cols'}};

  debug_log('cleaning columns');

  for ($i=0; $i < $#clean_cols+1; $i++) {

    $j = $clean_cols[$i];

    debug_log("$j");

    $command = ${$i_set_ref}{"${j}_clean"};
    study($command);  # necessary?

    #  url-style unescape
    if ($command =~ /u/) { $cols[$j] =~ s/%(..)/pack("c",hex($1))/ge; }

    # condense multi-character white space to a single space
    if ($command =~ /a/) { $cols[$j] =~ s/(\s+)/ /g; }

    # trim leading and trailing white space
    if ($command =~ /l/) { 
      $cols[$j] =~ s/^(\s+)//;
      $cols[$j] =~ s/(\s+)$//;
    }

    # single quotes
    if ($command =~ /s/) { $cols[$j] =~ s/'//g; }

    # double quotes
    if ($command =~ /q/) { $cols[$j] =~ s/"//g; }

    # non alpha-numeric characters
    if ($command =~ /n/) { $cols[$j] =~ s/\W//g; }

    # non digit characters
    if ($command =~ /d/) { $cols[$j] =~ s/[^.0123456789]//g; }

  }

  return @cols;
}

#
# =============================================================================
#
# This function is half of the meat of the database upload.  It will match
# and / or create any necessary objects given an [analyzed] instruction set
# and a row of data.  The result is a prepared object set, which is then
# passed to the "update_objects" function, where attributes are slapped on.
#
# Note that the algorithm here is very specific; it is designed to work
# in a certain manner.  Be careful about changing it without understanding
# what it does and why it does it.
#

sub match_and_create_objects {
  my($i_set_ref, @cols) = @_;
  my(%o_set, $o_set_ref, $itemid, @itemids, $stockid, $id);
  my($col, @c_cols);

  # Initialize the object_set

  $o_set_ref = \%o_set;

  $o_set{'first_match_done'} = 0;
  $o_set{'page_list'} = [];
  $o_set{'group_list'} = [];
  $o_set{'item_list'} = [];
  $o_set{'stock_list'} = [];

  $itemid  = 0;

  #
  # Start matching things.  We start at the broadest types of matches, 
  # and gradually work our way down to specific variants of items.
  #
  # We interleave the match and create operations so that things work
  # like you think they ought to, in accordance with the design goal.
  # There are problems when an instruction set says to "match or create an
  # item," and then "match or create these ranges."  What happens if 
  # the item doesn't exist?  The match operation fails, and the range
  # match will match _everything_ in the database with those variants.
  # Probably not what you wanted.
  #

  create_sniptypes($i_set_ref, $o_set_ref, @cols);

  match_children($i_set_ref, $o_set_ref, @cols);

  #
  # match and create pages
  #

  match_page_attrs($i_set_ref, $o_set_ref, @cols);

  if (${$i_set_ref}{'create_page'} && ($#{$o_set{'page_list'}} == -1)) {
    debug_log('trying to create pages...');
    @c_cols = @{${$i_set_ref}{'c_page_obj_cols'}};
    foreach $col (@c_cols) {
      debug_log("$col");
      $id = create_new_page($col, $i_set_ref, @cols);
      add_obj_to_object_set('page', $id, $o_set_ref);
    }
  }

  #
  # match and create groups
  #

  match_group_attrs($i_set_ref, $o_set_ref, @cols);

  if (${$i_set_ref}{'create_group'} && ($#{$o_set{'group_list'}} == -1)) {
    debug_log('trying to create groups...');
    @c_cols = @{${$i_set_ref}{'c_group_obj_cols'}};
    foreach $col (@c_cols) {
      debug_log("$col");
      $id = create_new_group($col, $i_set_ref,@cols);
      add_obj_to_object_set('group', $id, $o_set_ref);
    }
  }

  #
  # match and create items and variants
  #

  match_item_attrs($i_set_ref, $o_set_ref, @cols);
  match_variants($i_set_ref, $o_set_ref, @cols);

  # find or create item type

  # create item and variants, if necessary
 
  if (${$i_set_ref}{'create_item'} && ($#{$o_set{'item_list'}} == -1)) {

    debug_log('trying to create items...');
    @c_cols = @{${$i_set_ref}{'c_item_obj_cols'}};

    # we want to wipe out the stock list if we're going to be creating
    # any items with dimensions or ranges...  think about it this way: if
    # we just want to update the price of all items to a certain value,
    # we want an item match to match all of the stock cells of a mixed item.
    # (see td.7.txt).  if we're creating one variant at a time, and we're
    # updating the price of the created stock cells, we want _only_ the
    # created stock cells to be updated (see td.8.txt)

    if ($#c_cols > -1 && ( ${$i_set_ref}{'create_range'} ||
                           ${$i_set_ref}{'create_dimension'})) {
      debug_log('wiping out stock_list');
      $#{${$o_set_ref}{'stock_list'}} = -1;
    }

    foreach $col (@c_cols) {
      debug_log("$col");
      $itemid = create_new_item($col, $i_set_ref, @cols);
      add_obj_to_object_set('item', $itemid, $o_set_ref);

      # create variants

      if (${$i_set_ref}{'create_dimension'}) {
        create_dimensions($i_set_ref, $o_set_ref, $itemid, @cols);

      } elsif (${$i_set_ref}{'create_range'}) {
        create_ranges($i_set_ref, $o_set_ref, $itemid, @cols);

      } else {
        $stockid = create_new_stock($itemid);
        ITEM::item_set_mixid_and_stockid($itemid, 0, $stockid);
        add_obj_to_object_set('stock', $stockid, $o_set_ref);
      }
    }

  } else {  # (do we need to create an item?)

    # create variants for all items in the object_set

    if (${$i_set_ref}{'create_dimension'}) {

      @itemids = @{$o_set{'item_list'}};

      if ($#itemids > -1) {
        debug_log('wiping out stock_list');
        $#{${$o_set_ref}{'stock_list'}} = -1;
      }

      foreach $itemid (@itemids) {
        create_dimensions($i_set_ref, $o_set_ref, $itemid, @cols);
      }

    } elsif (${$i_set_ref}{'create_range'}) {

      @itemids = @{$o_set{'item_list'}};

      if ($#itemids > -1) {
        debug_log('wiping out stock_list');
        $#{${$o_set_ref}{'stock_list'}} = -1;
      }

      foreach $itemid (@itemids) {
        create_ranges($i_set_ref, $o_set_ref, $itemid, @cols);
      }

    }

  }

  match_stock_attrs($i_set_ref, $o_set_ref, @cols);

  add_items_to_analysis_list($o_set_ref);

  return($o_set_ref);
}

#
# -----------------------------------------------------------------------------
#

sub create_sniptypes {
  my($i_set_ref, $o_set_ref, @cols) = @_;

  if (${$i_set_ref}{'create_item_sniptype'}) {
    real_create_sniptypes($i_set_ref, $o_set_ref, $THING::item, 'item', @cols);
  }

  if (${$i_set_ref}{'create_page_sniptype'}) {
    real_create_sniptypes($i_set_ref, $o_set_ref, $THING::page, 'page', @cols);
  }

  if (${$i_set_ref}{'create_group_sniptype'}) {
    real_create_sniptypes($i_set_ref, $o_set_ref, $THING::group,'group',@cols);
  }
}

#
# -----------------------------------------------------------------------------
#

sub real_create_sniptypes {
  my($i_set_ref, $o_set_ref, $obj_type, $obj_str, @cols) = @_;
  my($thing_id, $sniptype_id, $col);
  my($fstr, $label, $default, $wtype, $winfo);

  #
  # first of all, figure out what the thingtype should be...
  #

  # they have to specify the thingtype at a row level.

  if (!defined(${$i_set_ref}{"${obj_str}_thingtype_name_col"})) {
    stat_set_add_warning("no ${obj_str}_thingtype_name_col specified!");
    return;
  }

  $col = ${$i_set_ref}{"${obj_str}_thingtype_name_col"};

  $thing_id = get_thing_id($obj_type, $obj_str, $i_set_ref, $col, @cols);

  #
  # good. now get the rest of the information...
  #

  $fstr = $cols[${$i_set_ref}{"${obj_str}_sniptype_name"}];
  $fstr = DBLIB::db_string_clean($fstr);
  $sniptype_id = SNIPTYPE::sniptype_get_id($obj_type, $fstr);

  if (!defined($sniptype_id) || ($sniptype_id == 0)) {

    # sniptype doesn't exist; create it.

    ($label, $default, $wtype, $winfo)
      = get_sniptype_info_by_row($obj_str, $i_set_ref, @cols);

    debug_log("creating sniptype: $obj_str $label $default $wtype $winfo");

    $sniptype_id = SNIPTYPE::sniptype_create($obj_type, $thing_id, $fstr,
                     $label, $default, 'local', $wtype, $winfo);

    $stat_set{'snippettypes'}++;

  } else {

    my($sniptype, $sniptypes);

    # sniptype exists; append it to the current thingtype (but only
    # if it isn't already in there)

    $sniptypes = THINGTYPE::thingtype_get_sniptypes($obj_type, $thing_id);
    foreach $sniptype (@$sniptypes) {
      if ($sniptype == $sniptype_id) {
        return;
      }
    }

    THINGTYPE::thingtype_append($obj_type, $thing_id, $sniptype_id);

  }
}

#
# =============================================================================
#
#
# Here's the create_dimensions algorithm:
# 
# wipe out whatever was there before!
#   if there's a mix, delete existing cells
#   if there is no mix, create one and convert to mixed product
#
# foreach dimension column
#   create the matrix, if it doesn't exist.
#     (matrix specified should be the same for all range columns!!!)
#   create the dimension, if it doesn't exist.
#   split dimension value
#   foreach split dim val
#     create the range, if it doesn't exist
#
# take all dimensions, and multiplex them.  
#   foreach resultant variation
#     create a stock cell
#     create a cell
#     create the nodes
#
# Of course, any stock cells created go into the object_set.
#

sub create_dimensions {
  my($i_set_ref, $o_set_ref, $item_id, @cols) = @_;

  my($matrix, $matrix_id, $matrix_cstr);
  my($dim, $dim_id, $dim_cstr);
  my($range, $range_id, $range_cstr, @ranges);
  my($dimcol, @dimcols);
  my($hohum, %rangehash, $i);

  $matrix_id = 0;

  debug_log("creating dimensions for $item_id");    

  #
  # first, wipe out whatever was there before
  #

  my($label, $sku, $price, $weight, $volume, $atp, $rt, $drq, $stock_id, 
     $mix_id, $cell_id) = ITEM::item_get_info($item_id);

  if ($mix_id != 0) {
    ITEM::item_set_mixid_and_stockid($item_id, 0, 0);
    ITEM::mix_delete($mix_id); 
  }

  if ($stock_id != 0) {
    ITEM::item_set_mixid_and_stockid($item_id, 0, 0);
    STOCK::deassoc($stock_id);
  }

  # remember, item_get_info returns the stock_id it finds.  if the item
  # is freshly created (in other words, if it has neither a stock or a
  # mix), we don't want to use the string 'unknown' as the sku... that
  # could be confusing for debugging purposes.  set it to some known value.

  if ($stock_id == 0) {
    $sku = $NEWOBJSTR;
  }

  $mix_id = ITEM::mix_create();
  $stat_set{'mixes'}++;

  ITEM::item_set_mixid_and_stockid($item_id, $mix_id, 0);

  #
  # Now iterate over all dimension columns, and create the matricies,
  # dimensions and ranges that will be necessary for actually creating
  # the variants.
  #

  my($total_dims) = 0;
  my(@dim_len, $len);

  @dimcols = @{${$i_set_ref}{'dimension_cols'}};

  # find the matrix that we'll be using. the -1 check is because this 
  # function will sometimes create a default matrix; we don't really want
  # to create anything if there aren't any dimension columns.

  if ($#dimcols > -1) {
    $matrix_id = what_is_the_matrix($i_set_ref, 
                   ${$i_set_ref}{'dimension_cols'}, @cols);
  }

  DIMCOL: foreach $dimcol (@dimcols) {

    $dim = ${$i_set_ref}{"${dimcol}_dim"};
    $dim_cstr = DBLIB::db_string_clean($dim);
    $dim_id = MATRIX::dim_find_matrix($dim_cstr, $matrix_id);

    if (!defined($dim_id) || ($dim_id eq "")) {
      $dim_id = MATRIX::dim_create_matrix($dim_cstr, $matrix_id);
      $stat_set{'dimensions'}++;
    }

    # empty column?  skip.

    if (!defined($cols[$dimcol]) || $cols[$dimcol] eq "") {
      next DIMCOL;
    }
 
    @ranges = split(/${$i_set_ref}{"${dimcol}_split"}/, $cols[$dimcol]);

    $rangehash{$total_dims} = [];  # create a reference to an anonymous array
    $hohum = $rangehash{$total_dims};

    $len = 0;
    foreach $range (@ranges) {
      $range_cstr = DBLIB::db_string_clean($range);
      $range_id = MATRIX::range_find_dim($range_cstr, $dim_id);

      if (!defined($range_id) || ($range_id eq "")) {
        $range_id = MATRIX::range_create_dim($range_cstr, $dim_id);
        $stat_set{'ranges'}++;
      }

      push @$hohum, $range_id;
      $len++;
    }
    
    $dim_len[$total_dims] = $len;
    $total_dims++;

  }

  #
  # Now, multiplex all of the ranges (that is, combine all of the specified
  # ranges in all possible combinations), and create the specific variations
  # necessary.
  #
  # We use for loops here to optimize performance; we use an eval because
  # we don't know beforehand how many loops we'll need.
  #
  # Above, we call "item_get_info," which returns a lot of stuff.  We
  # pass it all along to "real_create," which will populate the stock cells
  # it creates with that info.  This is especially handy if we've converted
  # from a single-stock item to a multi-stock item, because then all of the
  # new stock cells will have default values that correspond to the old
  # stock cell.  These can, of course, be overridden by the "update_objects"
  # function.
  #

  my($var, $str, $str1, $str2, $str3);

  $var = 'a';
  $str1 = '';
  $str2 = '';
  $str3 = '';

  for ($i=0; $i<$total_dims; $i++) {
    $str1 .= "my(\$$var);\n";
    $str2 .= "for (\$$var=0; \$$var< $dim_len[$i] ; \$$var++) { \n";
    $str3 .= "\$val[$i] = \${\$rangehash{$i}}[\$$var];\n";
    $var++;
  }

  $str = $str1 . $str2 . $str3;
  $str .= 'real_create($mix_id, $o_set_ref, $sku, $price, $weight, $volume, $atp, $rt, $drq, @val);';

  for ($i=0; $i<$total_dims; $i++) {
    $str .= "\n}";
  }

  my(@val);

  if (!defined(eval($str))) {
    debug_log("error: $@");
  }
}

#
# -----------------------------------------------------------------------------
#

sub real_create {
  my($mixid, $o_set_ref, $sku, $price, $weight, $volume, 
     $atp, $rt, $drq, @vars) = @_;

  debug_log("creating variation " . join(",",@vars));    

  my($i, $stockid, $invid, $cellid);

  $invid = STOCK::inv_create_local($sku, $price, $weight, $volume, $atp, $rt,
                                   $drq);

  $stockid = STOCK::assoc($invid, 'LOCAL');
  $cellid = ITEM::cell_create_mix($mixid, $stockid);  

  add_obj_to_object_set('stock', $stockid, $o_set_ref);

  $stat_set{'stocks'}++;
  $stat_set{'cells'}++;

  for ($i=0; $i < $#vars+1; $i++) {
    ITEM::node_create_cell($cellid, $vars[$i]);
    $stat_set{'nodes'}++;
  }
}

#
# -----------------------------------------------------------------------------
#
# Here's the create_ranges algorithm:
#
# If there's no mix, create one and convert to mixed product
#
# create a mix for the item, if it doesn't exist
# create a stock cell for the item
# create a cell for the item
#
# foreach range column
#   create the matrix, if it doesn't exist.
#   (matrix specified should be the same for all range columns!!!)
#   create the dimension, if it doesn't exist.
#   create the range, if it doesn't exist.
#   create the nodes
#
# Of course, any stock cells created go into the object_set.
#

sub create_ranges {
  my($i_set_ref, $o_set_ref, $item_id, @cols) = @_;

  my($matrix, $matrix_id, $matrix_cstr, $inv_id);
  my($dim, $dim_id, $dim_cstr);
  my($range, $range_id, $range_cstr);
  my($rcol, @rcols);

  debug_log("creating ranges for $item_id");

  #
  # first, wipe out whatever was there before
  #

  my($label, $sku, $price, $weight, $volume, $atp, $rt, $drq, $stock_id, 
     $mix_id, $cell_id) = ITEM::item_get_info($item_id);

  if ($mix_id == 0 && $stock_id == 0) {
    $mix_id = ITEM::mix_create();
    ITEM::item_set_mixid_and_stockid($item_id, $mix_id, 0);
    $stat_set{'mixes'}++;
  }

  if ($stock_id != 0 && $mix_id == 0) {
    ITEM::item_set_mixid_and_stockid($item_id, 0, 0);
    STOCK::deassoc($stock_id);
    $mix_id = ITEM::mix_create();
    ITEM::item_set_mixid_and_stockid($item_id, $mix_id, 0);
    $stat_set{'mixes'}++;
  }

  if ($stock_id == 0) {
    $sku = $NEWOBJSTR;
  }

  $inv_id = STOCK::inv_create_local($sku, $price, $weight, $volume, $atp, $rt,
                                    $drq);
  $stock_id = STOCK::assoc($inv_id, 'LOCAL');
  $cell_id = ITEM::cell_create_mix($mix_id, $stock_id);  
  add_obj_to_object_set('stock', $stock_id, $o_set_ref);

  $stat_set{'stocks'}++;
  $stat_set{'cells'}++;

  $matrix_id = 0;

  @rcols = @{${$i_set_ref}{'range_cols'}};

  # the -1 check here is because this function will sometimes create a 
  # default matrix; don't do it if there aren't any range columns...

  if ($#rcols > -1) {
    $matrix_id = what_is_the_matrix($i_set_ref, 
                   ${$i_set_ref}{'range_cols'}, @cols);
  }

  RCOL: foreach $rcol (@rcols) {

    $dim = ${$i_set_ref}{"${rcol}_dim"};
    $dim_cstr = DBLIB::db_string_clean($dim);
    $dim_id = MATRIX::dim_find_matrix($dim_cstr, $matrix_id);

    if (!defined($dim_id) || ($dim_id eq "")) {
      $dim_id = MATRIX::dim_create_matrix($dim_cstr, $matrix_id);
      $stat_set{'dimensions'}++;
    }

    $range = $cols[$rcol];

    # blank column?  skip.

    if (!defined($range) || $range eq "") {
      next RCOL;
    }

    $range_cstr = DBLIB::db_string_clean($range);
    $range_id = MATRIX::range_find_dim($range_cstr, $dim_id);

    if (!defined($range_id) || ($range_id eq "")) {
      $range_id = MATRIX::range_create_dim($range_cstr, $dim_id);
      $stat_set{'ranges'}++;
    }

    ITEM::node_create_cell($cell_id, $range_id);
    $stat_set{'nodes'}++;

  }

}

#
# -----------------------------------------------------------------------------
#
# try to find the matrix for a given object.
# first, check for a matrix that has been specified on a per-row basis.
# if that fails, check for a matrix defined for the whole column.
# if that fails, check for the default matrix.
#
# cols_to_check is a reference to an array of columns to check for a 
# valid column level matrix specifier.  note that only the first one
# encountered is used; the system doesn't know how to deal with multiple
# specified matrices!
#

sub what_is_the_matrix {
  my($i_set_ref, $cols_to_check, @cols) = @_;
  my($matrix_id, $matrix_cstr, $matrix, $matrix_col);
  my(@cols_to_check, $col);

  $matrix = "";
  $matrix_id = 0;
  $matrix_col = ${$i_set_ref}{'matrix_col'};

  # row-level check

  if (defined($matrix_col) && defined($cols[$matrix_col]) && 
      ($cols[$matrix_col] ne "")) {

    $matrix = $cols[$matrix_col];

  } else {

  # column-level check

    @cols_to_check = @$cols_to_check;
    foreach $col (@cols_to_check) {
      if (defined(${$i_set_ref}{"${col}_matrix"})) {
        $matrix = ${$i_set_ref}{"${col}_matrix"};
      }
    }
  }

  # no luck?  create a default matrix type.

  if (!defined($matrix) || $matrix eq "") {
    $matrix = $NEWMATRIX;
  }

  #
  # search for the specified matrix.  if it doesn't exist, create it.
  #

  $matrix_cstr = DBLIB::db_string_clean($matrix);

  $matrix_id = MATRIX::matrix_find($matrix_cstr);

  if (!defined($matrix_id) || ($matrix_id == 0)) {
    $matrix_id = MATRIX::matrix_create($matrix_cstr);
    $stat_set{'matrices'}++;
  }

  # ...nobody can tell you what the matrix is.
  # you have to see it for yourself.

  return $matrix_id;
}


#
# -----------------------------------------------------------------------------
#
# An object set consists of a single list of objects for each major object
# type (page, group, item, and stock).  
#

sub add_obj_to_object_set {
  my($type, $id, $o_set_ref) = @_;
  my(@cols);

  if ($id == 0) { 
    # XXX hmmm. should we throw an error here?
    return;
  }

  debug_log("adding $type $id to object set");

  @cols = @{${$o_set_ref}{"${type}_list"}};

  push @cols, $id;

  ${$o_set_ref}{"${type}_list"} = \@cols;
}

#
# =============================================================================
#
# Create operations for basic data types
#

sub create_new_page {
  my($col, $i_set_ref, @cols) = @_;

  $stat_set{'pages'}++;
  return (general_object_create(\&PAGE::page_create, $THING::page, 'page', 
                                $col, $i_set_ref, @cols));
}

sub create_new_group {
  my($col, $i_set_ref, @cols) = @_;

  $stat_set{'groups'}++;
  return (general_object_create(\&GROUP::group_create, $THING::group, 'group', 
                                $col, $i_set_ref, @cols));
}

sub create_new_item {
  my($col, $i_set_ref, @cols) = @_;

  $stat_set{'items'}++;
  return (general_object_create(\&ITEM::item_create, $THING::item, 'item', 
                                $col, $i_set_ref, @cols));
}

sub create_new_stock {
  my($item_id) = shift;
  my($inv_id, $stock_id);

  debug_log("creating new stock");

  # parameters: $sku, $price, $weight, $volume, $atp, $rt, $drq
  $inv_id = STOCK::inv_create_local($NEWOBJSTR,0,0,0,0,0,0);
  $stock_id = STOCK::assoc($inv_id,'LOCAL');
  $stat_set{'stocks'}++;
  return $stock_id;
}

#
# -----------------------------------------------------------------------------
#
# general object create is really just a wrapper for the specified 
# create function ($cfunc, which is a code ref) that ensures that
# appropriate thingtypes and snippettypes exist
#

sub general_object_create {
  my($cfunc, $obj_type, $obj_str, $col, $i_set_ref, @cols) = @_;
  my($thing_id, $nstr, $obj_id);

  if (${$i_set_ref}{"${col}_field"} =~ /^id$/i) {
    $nstr = $NEWOBJSTR;
  } else {
    $nstr = $cols[$col];
  }

  $nstr = DBLIB::db_string_clean($nstr);

  $thing_id = get_thing_id($obj_type, $obj_str, $i_set_ref, $col, @cols);

  #
  # alright.  now we have a valid thingtype; let's create the actual object.
  #

  debug_log("creating new $obj_str");
  $obj_id = &$cfunc($nstr, $thing_id);
  return $obj_id;
}

#
# -----------------------------------------------------------------------------
#

sub get_thing_id {
  my($obj_type, $obj_str, $i_set_ref, $col, @cols) = @_;
  my($thing_id, $thing_name, $thing_label, $found);

  #
  # check for a per-row thingtype.
  #   if the string exists, but the type hasn't been found / created,
  #   find or create it.
  # if there is no per-row thingtype, use a column-level thingtype
  #   if the string exists, but the type hasn't been found / created,
  #   find or create it.
  #

  #
  # row-level check
  #

  $thing_id = 0;
  $found = 0;

  if (defined(${$i_set_ref}{"${obj_str}_thingtype_name_col"})) {

    $thing_name = $cols[${$i_set_ref}{"${obj_str}_thingtype_name_col"}];

    if (defined($thing_name) && ($thing_name ne "")) {
      $thing_name = DBLIB::db_string_clean($thing_name);
      $thing_id = THINGTYPE::thingtype_get_id($obj_type, $thing_name);

      if (!defined($thing_id) || ($thing_id == 0)) {
        if (defined(${$i_set_ref}{"${obj_str}_thingtype_label_col"}) &&
            defined($cols[${$i_set_ref}{"${obj_str}_thingtype_label_col"}])) {
          $thing_label = $cols[${$i_set_ref}{"${obj_str}_thingtype_label_col"}];
          $thing_label = DBLIB::db_string_clean($thing_label);
        } else {
          $thing_label = $NEWTHINGTYPE_LABEL;
        }

        $thing_id = THINGTYPE::thingtype_create($obj_type, $thing_name, 
                                                $thing_label);
        $stat_set{'thingtypes'}++;
      }
      $found = 1;
    }
  }

  #
  # column-level check
  #

  if ((!$found) && defined(${$i_set_ref}{"${col}_thingtype_id"})) {
    $thing_id = ${$i_set_ref}{"${col}_thingtype_id"};
    $found = 1;
  }

  if ((!$found) && defined(${$i_set_ref}{"${col}_thingtype_name"})) {

    $thing_name = ${$i_set_ref}{"${col}_thingtype_name"};

    if (defined($thing_name) && ($thing_name ne "")) {
      $thing_name = DBLIB::db_string_clean($thing_name);
      $thing_id = THINGTYPE::thingtype_get_id($obj_type, $thing_name);

      if (!defined($thing_id) || ($thing_id == 0)) {

        if (defined(${$i_set_ref}{"${col}_thingtype_label"}) && 
            defined($cols[${$i_set_ref}{"${col}_thingtype_label"}])) {
          $thing_label = $cols[${$i_set_ref}{"${col}_thingtype_label"}];
          $thing_label = DBLIB::db_string_clean($thing_label);
        } else {
          $thing_label = $NEWTHINGTYPE_LABEL;
        }

        $thing_id = THINGTYPE::thingtype_create($obj_type, $thing_name,
                                                $thing_label);
        $stat_set{'thingtypes'}++;
      }

      ${$i_set_ref}{"${col}_thingtype_id"} = $thing_id;
      $found = 1;
    }
  }

  #
  # no luck?  hmmm.  well, create a default object type.
  #

  if ($found == 0) {
    $thing_id = THINGTYPE::thingtype_get_id($obj_type, $NEWTHINGTYPE_NAME);

    if (!defined($thing_id) || $thing_id == 0) {
      $thing_id = THINGTYPE::thingtype_create($obj_type,
                    $NEWTHINGTYPE_NAME, $NEWTHINGTYPE_LABEL);
      $stat_set{'thingtypes'}++;
    }
  }

  ensure_snippet_types_exist($obj_type, $obj_str, $thing_id, $i_set_ref);

  return($thing_id);
}

#
# -----------------------------------------------------------------------------
#
# Sometimes, when we create a new object, we also create a new thingtype
# to go along with it.  When that happens, we have to make sure that the
# thingtype contains all of the snippettypes necessary for us to be able
# to update the snippets later on.
#
# this function is called whenever a new thingtype is created or looked up.
#

sub ensure_snippet_types_exist {
  my($obj_type, $obj_str, $thing_id, $i_set_ref) = @_;
  my($col, @cols);
  my($sniptype_id, $fstr);
  my($label, $default, $winfo, $wtype);

  @cols = @{${$i_set_ref}{"c_${obj_str}_cols"}};
  foreach $col (@cols) {
    $fstr = ${$i_set_ref}{"${col}_field"};

    $sniptype_id = SNIPTYPE::sniptype_get_id($obj_type, $fstr);

    if (!defined($sniptype_id) || ($sniptype_id == 0)) {

      ($label, $default, $wtype, $winfo) 
        = get_sniptype_info_by_column($col, $i_set_ref);

      $fstr = DBLIB::db_string_clean($fstr);
      $sniptype_id = SNIPTYPE::sniptype_create($obj_type, $thing_id, $fstr,
                       $label, $default, 'local', $wtype, $winfo);

      $stat_set{'snippettypes'}++;
    }

    ${$i_set_ref}{"${col}_sniptype_id"} = $sniptype_id;
  }
}

#
# -----------------------------------------------------------------------------
#
# Whenever a new sniptype is created, it needs certain values supplied. This
# function will either return handy defaults, or it will check the instruction
# set for user-supplied values.
#

sub get_sniptype_info_by_column {
  my($col, $i_set_ref) = @_;
  my($label, $default, $widgettype, $widgetinfo, $str);

  $label = '(none)';
  $default = '';
  $widgettype = $SNIPTYPE::normal;
  $widgetinfo = '';

  if (defined(${$i_set_ref}{"${col}_sniptype_label"})) {
    $label = ${$i_set_ref}{"${col}_sniptype_label"};
    $label = DBLIB::db_string_clean($label);
  }

  if (defined(${$i_set_ref}{"${col}_sniptype_default"})) {
    $default = ${$i_set_ref}{"${col}_sniptype_default"};
    $default = DBLIB::db_string_clean($default); 
  }

  if (defined(${$i_set_ref}{"${col}_sniptype_widgettype"})) {
    $str = ${$i_set_ref}{"${col}_sniptype_widgettype"};
    $str =~ tr/A-Z/a-z/;

    if ($str eq "normal") {
      $widgettype = $SNIPTYPE::normal;

    } elsif ($str eq "checkbox") {
      $widgettype = $SNIPTYPE::checkbox;

    } elsif ($str eq "selectionbox") {
      $widgettype = $SNIPTYPE::selectionbox;

    } elsif ($str eq "radio") {
      $widgettype = $SNIPTYPE::radio;

    } elsif ($str eq "textarea") {
      $widgettype = $SNIPTYPE::textarea;

    } elsif ($str eq "password") {
      $widgettype = $SNIPTYPE::password;

# XXX not yet implemented
#    } elsif ($str eq "file_upload") {
#      $widgettype = $SNIPTYPE::file_upload;

    } elsif ($str eq "hidden") {
      $widgettype = $SNIPTYPE::hidden;
    }
  }

  if (defined(${$i_set_ref}{"${col}_sniptype_widgetinfo"})) {
    $widgetinfo = ${$i_set_ref}{"${col}_sniptype_widgetinfo"};
    $widgetinfo = DBLIB::db_string_clean($widgetinfo);
  }

  # label can't be null in the database!
  if ($label eq "") {
    $label = '(none)';
  }

  return ($label, $default, $widgettype, $widgetinfo);
}

#
# -----------------------------------------------------------------------------
#
# same as the above, only operates on the row data.
#

sub get_sniptype_info_by_row {
  my($obj_str, $i_set_ref, @cols) = @_;
  my($label, $default, $widgettype, $widgetinfo, $str);
  my($splitstr, @vals, $splitcol);

  $label = '(none)';
  $default = '';
  $widgettype = $SNIPTYPE::normal;
  $widgetinfo = '';

  if (defined(${$i_set_ref}{"${obj_str}_sniptype_label"})) {
    $label = $cols[${$i_set_ref}{"${obj_str}_sniptype_label"}];
    $label = DBLIB::db_string_clean($label);
  }

  if (defined(${$i_set_ref}{"${obj_str}_sniptype_default"})) {
    $default = $cols[${$i_set_ref}{"${obj_str}_sniptype_default"}];
    $default = DBLIB::db_string_clean($default); 
  }

  if (defined(${$i_set_ref}{"${obj_str}_sniptype_widgettype"})) {
    $str = $cols[${$i_set_ref}{"${obj_str}_sniptype_widgettype"}];
    $str =~ tr/A-Z/a-z/;

    if ($str eq "normal") {
      $widgettype = $SNIPTYPE::normal;

    } elsif ($str eq "checkbox") {
      $widgettype = $SNIPTYPE::checkbox;

    } elsif ($str eq "selectionbox") {
      $widgettype = $SNIPTYPE::selectionbox;

    } elsif ($str eq "radio") {
      $widgettype = $SNIPTYPE::radio;

    } elsif ($str eq "textarea") {
      $widgettype = $SNIPTYPE::textarea;

    } elsif ($str eq "password") {
      $widgettype = $SNIPTYPE::password;

# XXX not yet implemented
#    } elsif ($str eq "file_upload") {
#      $widgettype = $SNIPTYPE::file_upload;

    } elsif ($str eq "hidden") {
      $widgettype = $SNIPTYPE::hidden;
    }
  }

  if (defined(${$i_set_ref}{"${obj_str}_sniptype_widgetinfo"})) {
    $widgetinfo = $cols[${$i_set_ref}{"${obj_str}_sniptype_widgetinfo"}];

    #
    # for radio and selection boxes, the widgetinfo ought to be the several
    # options, one per line.  Obviously, that won't work if we use the
    # newline as a record delimiter... so we have to translate some other
    # character to newlines.
    #

    if (($widgettype == $SNIPTYPE::radio) || 
        ($widgettype == $SNIPTYPE::selectionbox)) {

      $splitstr = '!';
      $splitcol = ${$i_set_ref}{"${obj_str}_sniptype_widgetinfo_split"};

      if (defined($splitcol) && ($splitcol ne "") &&
          defined($cols[$splitcol]) && 
          ($cols[$splitcol] ne "")) {
        $splitstr = $cols[$splitcol];
      }

      @vals = split(/$splitstr/, $widgetinfo);
      $widgetinfo = join("\n", @vals);
    }

    $widgetinfo = DBLIB::db_string_clean($widgetinfo);
  }

  # label can't be null in the database!
  if ($label eq "") {
    $label = '(none)';
  }

  debug_log("get*row: [$label] [$default] [$widgettype] [$widgetinfo]");

  return ($label, $default, $widgettype, $widgetinfo);
}


#
# =============================================================================
#
# Each of the following match operations behaves in a very specific manner.
# If the operation is the first match to be performed, it operates on 
# the raw database.  That is, if an item match is the first match op
# performed, it will match against all of the items in the database. All
# subsequent match operations will pare down the result set, trimming
# it more and more.
#
# For that reason, object_set has a key called 'first_match_done,' which
# is 0 if it hasn't been done, and 1 if it has.  
#
# XXX that said, let me amend it:  this isn't done yet, so it doesn't
#     do that.  it just matches directly against all of the items in the db.
#

#
# -----------------------------------------------------------------------------
#

sub match_children {
  my($i_set_ref, $o_set_ref, @cols) = @_;

  if (${$i_set_ref}{'disable_matching'}) {
    debug_log("skipping children match...");
    return;
  }

  # remember, both groups and pages can have children.  For that matter,
  # items can have children, too, but I don't think that we really
  # want to worry about that functionality here.

  # all items/pages/groups that are children of the matching item/page/group
  # are added to the object set.  At some point we'll have to allow them
  # to configure which children to select.

  debug_log("matching children");
}

#
# -----------------------------------------------------------------------------
#

sub match_page_attrs {
  my($i_set_ref, $o_set_ref, @cols) = @_;

  if (${$i_set_ref}{'disable_matching'}) {
    debug_log("skipping page match...");
    return;
  }

  debug_log("matching pages");

  gen_obj_match_name(\&PAGE::page_find_by_name, 'page', $i_set_ref, $o_set_ref,
                     @cols);
}

#
# -----------------------------------------------------------------------------
#

sub match_group_attrs {
  my($i_set_ref, $o_set_ref, @cols) = @_;

  if (${$i_set_ref}{'disable_matching'}) {
    debug_log("skipping group match...");
    return;
  }

  debug_log("matching groups");

  gen_obj_match_name(\&GROUP::group_find_by_name, 'group', $i_set_ref, 
                     $o_set_ref, @cols);
}

#
# -----------------------------------------------------------------------------
#

sub match_item_attrs {
  my($i_set_ref, $o_set_ref, @cols) = @_;
  my($results, $results2, $i, $j);

  if (${$i_set_ref}{'disable_matching'}) {
    debug_log("skipping item match...");
    return;
  }

  debug_log("matching items");

  #
  # match items and add to object set
  #

  $results = gen_obj_match_name(\&ITEM::item_find_by_name, 'item', $i_set_ref, 
                                $o_set_ref, @cols);

  #
  # the item ids have all been added; now add the stock cells for each item.
  #

  for ($i=0; $i< $#{$results}+1; $i++) {
    $results2 = ITEM::item_get_all_stock_ids($results->[$i][0]);
    for ($j=0; $j< $#{$results2}+1; $j++) {
      add_obj_to_object_set('stock', $results2->[$j][0], $o_set_ref);
    }
  }
}

#
# -----------------------------------------------------------------------------
#

sub match_variants {
  my($i_set_ref, $o_set_ref, @cols) = @_;
 
  if (${$i_set_ref}{'disable_matching'}) {
    debug_log("skipping variant match...");
    return;
  }

  # note that a variant match results both in the item that contains the
  # variant, and the stock cell that matches the particular variant.

  debug_log("matching variants");
}

#
# -----------------------------------------------------------------------------
#

sub match_stock_attrs {
  my($i_set_ref, $o_set_ref, @cols) = @_;

  if (${$i_set_ref}{'disable_matching'}) {
    debug_log("skipping stock match...");
    return;
  }

  # hmmmm. obviously, this has to match inventory_local, via stock_access...

  debug_log("matching stocks");
}

#
# -----------------------------------------------------------------------------
#
# this executes the specified match function for each column; it adds
# the results to the object set.
#

sub gen_obj_match_name {
  my($mfunc, $type, $i_set_ref, $o_set_ref, @cols) = @_;
  my($results, $i, @m_cols, $command, $col, $tmp);

  debug_log("gen_obj_name_match: $type");

  @m_cols = @{${$i_set_ref}{"m_${type}_cols"}};

  MCOL: foreach $col (@m_cols) {
    if (!defined($cols[$col]) || ($cols[$col] eq "")) {
      next MCOL;
    }

    $command = ${$i_set_ref}{"${col}_matchtype"};
    if (!defined($command) || ($command eq "")) {
      $command = 'e';
    }

    $tmp = $cols[$col];
    $tmp = DBLIB::db_string_clean($tmp);
    $results = &$mfunc($tmp, $command, @{${$o_set_ref}{"${type}_list"}});

    for ($i=0; $i< $#{$results}+1; $i++) {
      add_obj_to_object_set($type, $results->[$i][0], $o_set_ref);
    }

  }

  return($results);
}

#
# =============================================================================
#
# given a set of object on which to operate, and the instructions for
# what to do with those objects, and the data necessary to do it, do it.    :)
#
# this is the other meaty half of the upload system.  once all of the items
# have been matched and or created, they need actual values set.  A name,
# for example, or a description.  This function takes the object set, and
# sets all of the attributes for those objects to the appropriate values.
#

sub update_objects {
  my($o_set_ref, $i_set_ref, @cols) = @_;
  my($itemid, $stockid, $pageid, $groupid, $colnum, @colnums);

  my(%o_set) = %{$o_set_ref};

  #
  # set all item, page, group, and stock attributes
  #


  # set all item level attributes for each item matched

  foreach $itemid (@{${$o_set_ref}{'item_list'}}) {  
    @colnums = @{${$i_set_ref}{'u_item_cols'}};
    foreach $colnum (@colnums) {
      item_set_attr($itemid, $colnum, $i_set_ref, 
                    ${$i_set_ref}{"${colnum}_field"}, $cols[$colnum]);
    }
  }

  # set all stock level attributes for each stock matched

  foreach $stockid (@{${$o_set_ref}{'stock_list'}}) {  
    @colnums = @{${$i_set_ref}{'c_stock_cols'}};
    foreach $colnum (@colnums) {
      stock_set_attr($stockid,${$i_set_ref}{"${colnum}_field"},$cols[$colnum]);
    }
  }

  # set all page level attributes for each page matched

  foreach $pageid (@{${$o_set_ref}{'page_list'}}) {  
    @colnums = @{${$i_set_ref}{'u_page_cols'}};
    foreach $colnum (@colnums) {
      page_set_attr($pageid, $colnum, $i_set_ref, 
                    ${$i_set_ref}{"${colnum}_field"}, $cols[$colnum]);
    }
  }

  # set all group level attributes for each group matched

  foreach $groupid (@{${$o_set_ref}{'group_list'}}) {  
    @colnums = @{${$i_set_ref}{'u_group_cols'}};
    foreach $colnum (@colnums) {
      group_set_attr($groupid, $colnum, $i_set_ref, 
                     ${$i_set_ref}{"${colnum}_field"}, $cols[$colnum]);
    }
  }


  #
  # now put items in pages and groups
  #


  # if groups and items are matched, all items are placed in that group

  foreach $pageid (@{${$o_set_ref}{'page_list'}}) {  
    foreach $itemid (@{${$o_set_ref}{'item_list'}}) {  
      put_item_on_page($pageid, $itemid);
    }
  }

  # if pages and items are matched, all items are placed on that page

  foreach $groupid (@{${$o_set_ref}{'group_list'}}) { 
    foreach $itemid (@{${$o_set_ref}{'item_list'}}) {  
      put_item_in_group($groupid, $itemid);
    }
  }

}

#
# -----------------------------------------------------------------------------
#

sub exporter_hack {
  my($o_set_ref, $i_set_ref, @cols) = @_;

  if (defined(${$i_set_ref}{'cknarkey_col'}) && 
      defined(${$i_set_ref}{'cknarval_col'}) &&
      defined(${$i_set_ref}{'cknargroup_col'})) {
    exporter_hack_knar($o_set_ref, $i_set_ref, @cols);
  }

  if (defined(${$i_set_ref}{'cmatrix_col'}) &&
      defined(${$i_set_ref}{'cdim_col'}) &&
      defined(${$i_set_ref}{'crange_col'})) {
    exporter_hack_m_d_r($o_set_ref, $i_set_ref, @cols);
  }

  if (defined(${$i_set_ref}{'citemtype_name_col'})) {
    exporter_hack_thingtypes($THING::item, 'item', @_);
  }

  if (defined(${$i_set_ref}{'cpagetype_name_col'})) {
    exporter_hack_thingtypes($THING::page, 'page', @_);
  }

  if (defined(${$i_set_ref}{'cgrouptype_name_col'})) {
    exporter_hack_thingtypes($THING::group, 'group', @_);
  }
}

#
# -----------------------------------------------------------------------------
#

sub exporter_hack_knar {
  my($o_set_ref, $i_set_ref, @cols) = @_;
  my($key, $val, $grp);
  my($id, $tmp1, $tmp2);

  $key = $cols[${$i_set_ref}{"cknarkey_col"}];
  $val = $cols[${$i_set_ref}{"cknarval_col"}];
  $grp = $cols[${$i_set_ref}{"cknargroup_col"}];

#  $key = DBLIB::db_string_clean($key);
#  $val = DBLIB::db_string_clean($val);
#  $grp = DBLIB::db_string_clean($grp);

  ($id, $tmp1, $tmp2) = KNAR::knar_get_by_name($key);

  if (defined($id)) {
    KNAR::knar_entry_set($id, $key, $val, $grp);
  } else {
    KNAR::knar_entry_create($key, $val, $grp);
  }

}

sub exporter_hack_m_d_r {
  my($o_set_ref, $i_set_ref, @cols) = @_;
  my($tmp, $matrix_id, $dim_id, $range_id);

  $tmp = $cols[${$i_set_ref}{'cmatrix_col'}];
  if (defined($tmp) && ($tmp ne "")) {
    $matrix_id = exporter_hack_find_or_create_matrix($tmp);

    $tmp = $cols[${$i_set_ref}{'cdim_col'}];
    if (defined($tmp) && ($tmp ne "")) {
      $dim_id = exporter_hack_find_or_create_dimension($tmp, $matrix_id);

      $tmp = $cols[${$i_set_ref}{'crange_col'}];
      if (defined($tmp) && ($tmp ne "")) {
        $range_id = exporter_hack_find_or_create_range($tmp, $dim_id);
      }
    }
  }
}

sub exporter_hack_thingtypes {
  my($type, $typestr, $o_set_ref, $i_set_ref, @cols) = @_;
  my($tmp1, $tmp2);

  $tmp1 = $cols[${$i_set_ref}{"c${typestr}type_name_col"}];

  if (defined(${$i_set_ref}{"c${typestr}type_label_col"})) {
    $tmp2 = $cols[${$i_set_ref}{"c${typestr}type_label_col"}];
  } else {
    $tmp2 = "";
  }

  if (defined($tmp1) && ($tmp1 ne "")) {
    exporter_hack_find_or_create_thingtype($type, $typestr, $tmp1, $tmp2);
  }
}

#
# -----------------------------------------------------------------------------
#

sub exporter_hack_find_or_create_matrix {
  my($matrix_name) = shift;
  my($matrix_cstr, $matrix_id);

  $matrix_cstr = DBLIB::db_string_clean($matrix_name);
  $matrix_id = MATRIX::matrix_find($matrix_cstr);
  if (!defined($matrix_id)) {
    $matrix_id = MATRIX::matrix_create($matrix_cstr);
    $stat_set{'matrices'}++;
  }
  return $matrix_id;
}

sub exporter_hack_find_or_create_dimension {
  my($dim_name, $matrix_id) = @_;
  my($dim_cstr, $dim_id);

  $dim_cstr = DBLIB::db_string_clean($dim_name);
  $dim_id = MATRIX::dim_find_matrix($dim_cstr, $matrix_id);
  if (!defined($dim_id)) {
    $dim_id = MATRIX::dim_create_matrix($dim_cstr, $matrix_id);
    $stat_set{'dimensions'}++;
  }
  return $dim_id;
}

sub exporter_hack_find_or_create_range {
  my($range_name, $dim_id) = @_;
  my($range_cstr, $range_id);

  $range_cstr = DBLIB::db_string_clean($range_name);
  $range_id = MATRIX::range_find_dim($range_cstr, $dim_id);
  if (!defined($range_id)) {
    $range_id = MATRIX::range_create_dim($range_cstr, $dim_id);
    $stat_set{'ranges'}++;
  }
  return $range_id;
}

sub exporter_hack_find_or_create_thingtype {
  my($type, $typestr, $name, $label) = @_;
  my($type_id);

  $name = DBLIB::db_string_clean($name);
  $label = DBLIB::db_string_clean($label);

  $type_id = THINGTYPE::thingtype_get_id($type, $name);

  if (!defined($type_id)) {
    $type_id = THINGTYPE::thingtype_create($type, $name, $label);
    $stat_set{'thingtypes'}++;
  }
  return $type_id;
}

#
# =============================================================================
#
# these functions are where we acutally set the attributes.  some attributes
# are inherent in the object (such as name), and others may be implemented
# as snippets (such as description).  We have to distinguish.
#
# if any of the values is not defined or empty, nothing is done.
#

#
# -----------------------------------------------------------------------------
#
# name, <any snippet>
#

sub item_set_attr {
  my($itemid, $col, $i_set_ref, $key, $val) = @_;
  my($sniptype_id);

  if (!defined($val) || ($val eq "")) {
    return;
  }

#  $key = DBLIB::db_string_clean($key);


  debug_log("setting item $itemid $key => $val");

  $sniptype_id = ${$i_set_ref}{"${col}_sniptype_id"};
  if (!defined($sniptype_id) || ($sniptype_id == 0)) {
    $sniptype_id = SNIPTYPE::sniptype_get_id($THING::item, $key);
    ${$i_set_ref}{"${col}_sniptype_id"} = $sniptype_id;
  }

  if (!defined($sniptype_id) || ($sniptype_id == 0)) {
    # hmmmm. we couldn't find the sniptype.  That means that it wasn't
    # created via the 'ensure_snippet_types_exist' function, which means
    # we shouldn't create it.  Return.
    return;
  }

  if ($key =~ /^name$/i) {
    $val = DBLIB::db_string_clean($val);
    ITEM::item_set_attr($itemid, ('name' => $val) );
  } else {
    SNIPPET::snip_update($THING::item, $itemid, $sniptype_id, $val);
  }
}

#
# -----------------------------------------------------------------------------
#
# sku, price, weight, volume, atp_qty, reserve_threshold, def_reserve_qty
#

sub stock_set_attr {
  my($stockid, $key, $val) = @_;

  if (!defined($val) || ($val eq "")) {
    return;
  }

  debug_log("setting stock $stockid $key => $val");

  $key =~ tr/A-Z/a-z/;
  $key = DBLIB::db_string_clean($key);
  $val = DBLIB::db_string_clean($val);

  if ($key eq "sku" || $key eq "price" || $key eq "weight" 
      || $key eq "volume" || $key eq "atp_qty" || $key eq "reserve_threshold"
      || $key eq "def_reserve_qty") {
    STOCK::set_attr($stockid, ($key => $val) );
  }
}

#
# -----------------------------------------------------------------------------
#
# name, <any snippet>
# we don't let them change the page's type
#

sub page_set_attr {
  my($pageid, $col, $i_set_ref, $key, $val) = @_;
  my($sniptype_id);

  if (!defined($val) || ($val eq "")) {
    return;
  }

  debug_log("setting page $pageid $key => $val");

  $key =~ tr/A-Z/a-z/;
#  $key = DBLIB::db_string_clean($key);


  $sniptype_id = ${$i_set_ref}{"${col}_sniptype_id"};
  if (!defined($sniptype_id) || ($sniptype_id == 0)) {
    $sniptype_id = SNIPTYPE::sniptype_get_id($THING::page, $key);
    ${$i_set_ref}{"${col}_sniptype_id"} = $sniptype_id;
  }

  if (!defined($sniptype_id) || ($sniptype_id == 0)) {
    # hmmmm. we couldn't find the sniptype.  That means that it wasn't
    # created via the 'ensure_snippet_types_exist' function, which means
    # we shouldn't create it.  Return.
    return;
  }

  if ($key eq "name") {
    $val = DBLIB::db_string_clean($val);
    PAGE::page_set_attr($pageid, ($key => $val) );
  } else {
    SNIPPET::snip_update($THING::page, $pageid, $sniptype_id, $val);
  }
}

#
# -----------------------------------------------------------------------------
#
# name, <any snippet>
#

sub group_set_attr {
  my($groupid, $col, $i_set_ref, $key, $val) = @_;
  my($sniptype_id);

  if (!defined($val) || ($val eq "")) {
    return;
  }

  debug_log("setting group $groupid $key => $val");

  $key =~ tr/A-Z/a-z/;
#  $key = DBLIB::db_string_clean($key);


  $sniptype_id = ${$i_set_ref}{"${col}_sniptype_id"};
  if (!defined($sniptype_id) || ($sniptype_id == 0)) {
    $sniptype_id = SNIPTYPE::sniptype_get_id($THING::group, $key);
    ${$i_set_ref}{"${col}_sniptype_id"} = $sniptype_id;
  }

  if (!defined($sniptype_id) || ($sniptype_id == 0)) {
    # hmmmm. we couldn't find the sniptype.  That means that it wasn't
    # created via the 'ensure_snippet_types_exist' function, which means
    # we shouldn't create it.  Return.
    return;
  }

  if ($key eq "name") {
    $val = DBLIB::db_string_clean($val);
    GROUP::group_set_attr($groupid, ($key => $val) );
  } else {
    SNIPPET::snip_update($THING::group, $groupid, $sniptype_id, $val);
  }
}

#
# -----------------------------------------------------------------------------
#

sub put_item_on_page {
  my($pageid, $itemid) = @_;

  debug_log("putting item $itemid on page $pageid");

  # talk to jason about the conventions established in the regen system.  
  # in other words, what does it mean to be on a page?  We don't know.
}

#
# -----------------------------------------------------------------------------
#

sub put_item_in_group {
  my($groupid, $itemid) = @_;

  debug_log("putting item $itemid on group $groupid");

  # talk to jason about the conventions established in the regen system.
  # in other words, what does it mean to be in a group?  We don't know.
}

#
# -----------------------------------------------------------------------------
#
# Make sure that specified instruction set is valid.  This is typically
# called after a new instruction set has been created.
#
# should make sure that dimensions and ranges don't exist together.
# should make sure that matrix names are specified for range types.
# should make sure that for dimension types, 'matrix,' 'dim,' and 'split' are 
#   specified
# should set default attributes if none are specified???
# check for empty strings where there should be none.
# 
# should we restrict column matching to name / id fields???
#
# make sure there aren't any weird empty definitions in the header.
# (note that the instruction parser won't log an instruction that
# doesn't have a value)
#
# if there was a dimension specified, did you specify a split character?
# not doing so is probably not what you want...
#

sub verify_instruction_set {
  my($i_set_ref) = shift;

  debug_log("verifying instruction set");

  debug_log_iset($i_set_ref);

  return 1;
}

#
# -----------------------------------------------------------------------------
#
# here, we set up some handy arrays corresponding to the instruction set, 
# as a performance optimization feature.
#
# a quick note about *_thingtype_str and *_thingtype_id: by checking to 
# see if it's defined before assigning, we effectively only use the
# first appropriate thingtype as our row-level thingtype.  That's good,
# since we wouldn't know how to deal with multiple specified thingtypes.
#
# The same thing (argh! Jason!) applies to row-level matrix specifications.
#

sub analyze_instruction_set {
  my($i_set_ref) = shift;
  my(%is) = %$i_set_ref;  # just an alias to improve readability

  my($i, @ignore_cols, @clean_cols, $tstr);
  my($attr, $type, $cp, $cg, $ci, $cs, $cr, $cd, $field);
  my($cis, $cps, $cgs, $do_export_hack);

  my(@m_item_cols, @m_stock_cols, @m_group_cols, @m_page_cols);
  my(@c_item_cols, @c_stock_cols, @c_group_cols, @c_page_cols);
  my(@u_item_cols,                @u_group_cols, @u_page_cols);
  my(@s_group_cols, @s_page_cols);
  my(@range_cols, @dimension_cols);
  my(@c_item_obj_cols, @c_group_obj_cols, @c_page_obj_cols);

  @ignore_cols  = ();
  @clean_cols   = ();

  @c_page_obj_cols  = ();
  @c_group_obj_cols = ();
  @c_item_obj_cols  = ();

  @m_page_cols  = ();
  @m_group_cols = ();
  @m_item_cols  = ();
  @m_stock_cols = ();

  @c_page_cols  = ();
  @c_group_cols = ();
  @c_item_cols  = ();
  @c_stock_cols = ();

  @u_page_cols  = ();
  @u_group_cols = ();
  @u_item_cols  = ();

  @s_page_cols  = ();
  @s_group_cols = ();

  @range_cols  = ();
  @dimension_cols = ();

  $i=1;  # remember, column 0 is always the string 'blank'

  $cp = 0;  # create page?
  $cg = 0;  # create group?
  $ci = 0;  # create item?
  $cs = 0;  # create stock?
  $cr = 0;  # create range?
  $cd = 0;  # create dimension?

  $cis = 0;  # create item snippet type?
  $cps = 0;  # create page snippet type?
  $cgs = 0;  # create group snippet type?
  $do_export_hack = 0;  # XXX do we need to process any of the exporter fields?

  debug_log("analyzing instruction set");

  for ( ; $i <= $is{'biggest_field_found'}; $i++) {

    if (!defined($is{"${i}_type"})) {
      next;
    }

    if (defined($is{"${i}_ignorestr"})) {
      push @ignore_cols, $i;
    }

    if (defined($is{"${i}_clean"})) {
      push @clean_cols, $i;
    }

    $attr  = $is{"${i}_attr"};
    $type  = $is{"${i}_type"};
    $field = $is{"${i}_field"};

    $type =~ tr/A-Z/a-z/;

    #
    # check for page, item and group types.  process attribute field.
    #

    if ($type eq "page") {
      if ($attr =~ /m/i) { push @m_page_cols, $i; }
      if ($attr =~ /c/i) { 
        if (($field =~ /^name$/i) || ($field =~ /^id$/i)) {
          push @c_page_obj_cols, $i; 
          $cp=1;
        } else {
          push @c_page_cols, $i; 
        }
      }
      if ($attr =~ /u/i) { push @u_page_cols, $i; }
      if ($attr =~ /s/i) { push @s_page_cols, $i; }
    }

    if ($type eq "group") {
      if ($attr =~ /m/i) { push @m_group_cols, $i; }
      if ($attr =~ /c/i) { 
        if (($field =~ /^name$/i) || ($field =~ /^id$/i)) {
          push @c_group_obj_cols, $i; 
          $cg=1;
        } else {
          push @c_group_cols, $i; 
        }
      }
      if ($attr =~ /u/i) { push @u_group_cols, $i; }
      if ($attr =~ /s/i) { push @s_group_cols, $i; }
    }

    if ($type eq "item") {
      if ($attr =~ /m/i) { push @m_item_cols, $i; }
      if ($attr =~ /c/i) { 
        if (($field =~ /^name$/i) || ($field =~ /^id$/i)) {
          push @c_item_obj_cols, $i; 
          $ci=1;
        } else {
          push @c_item_cols, $i; 
        }
      }
      if ($attr =~ /u/i) { push @u_item_cols, $i; }
    }

    #
    #  check for thingtype fields.  set up name and label columns.
    #

    if ($type eq "item_thingtype") {
      $tstr = $field;
      $tstr =~ tr/A-Z/a-z/;
      if (($tstr eq 'name') || ($tstr eq 'label')) {
        if (!defined($is{"item_thingtype_${tstr}_col"})) {
          $is{"item_thingtype_${tstr}_col"} = $i;
        }
      }
    }

    if ($type eq "page_thingtype") {
      $tstr = $field;
      $tstr =~ tr/A-Z/a-z/;
      if (($tstr eq 'name') || ($tstr eq 'label')) {
        if (!defined($is{"page_thingtype_${tstr}_col"})) {
          $is{"page_thingtype_${tstr}_col"} = $i;
        }
      }
    }

    if ($type eq "group_thingtype") {
      $tstr = $field;
      $tstr =~ tr/A-Z/a-z/;
      if (($tstr eq 'name') || ($tstr eq 'label')) {
        if (!defined($is{"group_thingtype_${tstr}_col"})) {
          $is{"group_thingtype_${tstr}_col"} = $i;
        }
      }
    }

    #
    #  check for sniptype fields.  determine if they should be created.
    #

    if ($type eq "item_sniptype") {
      $tstr = $field;
      $tstr =~ tr/A-Z/a-z/;
      if (($tstr eq 'name') || ($tstr eq 'label') || ($tstr eq 'default') || 
          ($tstr eq 'widgettype') || ($tstr eq 'widgetinfo') || 
          ($tstr eq 'widgetinfo_split')) {

        if (!defined($is{"item_sniptype_${tstr}"})) {
          $is{"item_sniptype_${tstr}"} = $i;
        }
        if ($tstr eq 'name') {
          $cis = 1;
        }
      }
    }

    if ($type eq "page_sniptype") {
      $tstr = $field;
      $tstr =~ tr/A-Z/a-z/;
      if (($tstr eq 'name') || ($tstr eq 'label') || ($tstr eq 'default') || 
          ($tstr eq 'widgettype') || ($tstr eq 'widgetinfo') || 
          ($tstr eq 'widgetinfo_split')) {
        if (!defined($is{"page_sniptype_${tstr}"})) {
          $is{"page_sniptype_${tstr}"} = $i;
        }
        if ($tstr eq 'name') {
          $cps = 1;
        }
      }
    }

    if ($type eq "group_sniptype") {
      $tstr = $field;
      $tstr =~ tr/A-Z/a-z/;
      if (($tstr eq 'name') || ($tstr eq 'label') || ($tstr eq 'default') || 
          ($tstr eq 'widgettype') || ($tstr eq 'widgetinfo') || 
          ($tstr eq 'widgetinfo_split')) {
        if (!defined($is{"group_sniptype_${tstr}"})) {
          $is{"group_sniptype_${tstr}"} = $i;
        }
        if ($tstr eq 'name') {
          $cgs = 1;
        }
      }
    }

    #
    #  ...and process the other miscellaneous fields.
    #

    if ($type eq "stock") {
      if ($attr =~ /m/i) { push @m_stock_cols, $i; }
      if (($attr =~ /c/i) || ($attr =~ /u/i)) { 
        push @c_stock_cols, $i; 
        $cs=1;
      }
    }

    if ($type eq "range") {
      push @range_cols, $i;
      $cr = 1;
    }

    if ($type eq "dim") {
      push @dimension_cols, $i;
      $cd = 1;
    }

    if ($type eq "matrix") {
      if (!defined($is{'matrix_col'})) {
        $is{'matrix_col'} = $i;
      }
    }

    # XXX database exporter stuff.  hack.  fix me please.

    if ( ($type eq 'cknarkey') ||
         ($type eq 'cknarval') ||
         ($type eq 'cknargroup') ||
         ($type eq 'cmatrix') ||
         ($type eq 'cdim') ||
         ($type eq 'crange') ||
         ($type eq 'citemtype_name') ||
         ($type eq 'citemtype_label') ||
         ($type eq 'cpagetype_name') ||
         ($type eq 'cpagetype_label') ||
         ($type eq 'cgrouptype_name') ||
         ($type eq 'cgrouptype_label') ) {
      SEC::untaint_ref(\$type);
      $is{"${type}_col"} = $i;
      $do_export_hack = 1;
    }
  }

  $is{'ignore_cols'} = \@ignore_cols;
  $is{'clean_cols'}  = \@clean_cols;

  $is{'c_page_obj_cols'}  = \@c_page_obj_cols;
  $is{'c_group_obj_cols'} = \@c_group_obj_cols;
  $is{'c_item_obj_cols'}  = \@c_item_obj_cols;

  $is{'m_page_cols'}  = \@m_page_cols;
  $is{'m_group_cols'} = \@m_group_cols;
  $is{'m_item_cols'}  = \@m_item_cols;
  $is{'m_stock_cols'} = \@m_stock_cols;

  $is{'c_page_cols'}  = \@c_page_cols;
  $is{'c_group_cols'} = \@c_group_cols;
  $is{'c_item_cols'}  = \@c_item_cols;
  $is{'c_stock_cols'} = \@c_stock_cols;

  $is{'u_page_cols'}  = \@u_page_cols;
  $is{'u_group_cols'} = \@u_group_cols;
  $is{'u_item_cols'}  = \@u_item_cols;

  $is{'s_page_cols'}  = \@s_page_cols;
  $is{'s_group_cols'} = \@s_group_cols;

  $is{'range_cols'}  = \@range_cols;
  $is{'dimension_cols'} = \@dimension_cols;

  $is{'create_page'}  = $cp;
  $is{'create_group'} = $cg;
  $is{'create_item'}  = $ci;
  $is{'create_stock'} = $cs;
  $is{'create_range'}  = $cr;
  $is{'create_dimension'} = $cd;

  $is{'create_item_sniptype'}  = $cis;
  $is{'create_page_sniptype'}  = $cps;
  $is{'create_group_sniptype'} = $cgs;

  $is{'do_export_hack'} = $do_export_hack;

  %$i_set_ref = %is;

  if (verify_instruction_set(\%is)) {
    return 1;
  } else {
    return 0;
  }
}

#
# -----------------------------------------------------------------------------
#
# takes a string; returns the actual character to use as the column
# separator.  the tab character ('\t') is the default.
#

sub get_csep_given_type {
  my($csep, $csep_t);

  $csep_t = shift;

  $csep = '\t';

  if ($csep_t eq 'tab') {
    $csep = '\t';
  } elsif ($csep_t eq 'comma') {
    $csep = ',';
  } elsif ($csep_t eq 'space') {
    $csep = ' ';
  }

  return($csep);
}

#
# -----------------------------------------------------------------------------
#
# The statistics set is used to keep some simple statistics on the upload.
# Here, we initialize it to useful values.
#

sub initialize_stat_set {
  $stat_set{'warnings'}          = 0;

  $stat_set{'data_lines'}        = 0;
  $stat_set{'ignored_lines'}     = 0;
  $stat_set{'comment_lines'}     = 0;
  $stat_set{'instruction_lines'} = 0;
  $stat_set{'empty_lines'}       = 0;
  $stat_set{'total_lines'}       = 0;

  $stat_set{'items'}             = 0;
  $stat_set{'pages'}             = 0;
  $stat_set{'groups'}            = 0;
  $stat_set{'stocks'}            = 0;

  $stat_set{'matrices'}          = 0;
  $stat_set{'dimensions'}        = 0;
  $stat_set{'ranges'}            = 0;
  $stat_set{'mixes'}             = 0;
  $stat_set{'cells'}             = 0;
  $stat_set{'nodes'}             = 0;
  $stat_set{'thingtypes'}        = 0;
  $stat_set{'snippettypes'}      = 0;
}

#
# -----------------------------------------------------------------------------
#
# Whenever we change an item, either by creating it, adding fields to
# it, converting it, etc., we need to re-analyze its widgets.
# the items_to_be_analyzed hash keeps track of all item_ids that could
# have possibly changed during the course of the upload.  Each is 
# reanalyzed once at the end of the upload.
#

sub analyze_all_items_that_need_to_analyzed {
  my($item_id);

  foreach $item_id (keys(%$items_to_be_analyzed)) {
    ITEM::item_widget_analysis_do($item_id);
  }
}

sub add_items_to_analysis_list {
  my($o_set_ref) = shift;
  my($item_id);

  foreach $item_id (@{${$o_set_ref}{'item_list'}}) {
    $$items_to_be_analyzed{$item_id} = '1';
  }
}

#
# -----------------------------------------------------------------------------
#

sub stat_set_add_warning {
  my($msg) = shift;
  my($wnum);

  # check to see if it's defined, so that warnings that are exactly the
  # same aren't repeated...

  if (!defined($stat_set{"warn_$msg"})) {
    $stat_set{'warnings'}++;
    $wnum = $stat_set{'warnings'};
    $stat_set{"warning_$wnum"} = $msg;
    $stat_set{"warn_$msg"} = 1;
  }

  debug_log("warning: $msg");
}

sub stat_set_add_error {
  my($msg) = shift;
  my($wnum);

  # check to see if it's defined, so that errors that are exactly the
  # same aren't repeated...

  if (!defined($stat_set{"error_$msg"})) {
    $stat_set{'errors'}++;
    $wnum = $stat_set{'errors'};
    $stat_set{"error_$wnum"} = $msg;
    $stat_set{"error_$msg"} = 1;
  }

  debug_log("warning: $msg");
}

#
# =============================================================================
#
# Creates an initial instruction set from form data that the user has
# specified.
#

sub dbupload_create_instruction_set_from_form {
  my(%fdata, $argh_ref, $argh_key);
  my(%i_set, $i, $type, $field, $str, $str2, $str_to_use);

  debug_log_remove();

  $argh_ref = shift;
  foreach $argh_key (keys(%$argh_ref)) {
    $fdata{$argh_key} = ${$argh_ref}{$argh_key};
  }

  # iterate over each field

  if (!defined($fdata{'cols'}) || ($fdata{'cols'} eq "")) {
    stat_set_add_error("Invalid form data: missing fdata{'cols'}!");      
    analyze_instruction_set(\%i_set);
    return(\%i_set);
  }

  for ($i=1; $i<$fdata{'cols'}+1; $i++) {

    #
    # first, determine what this column corresponds to.
    #

    $str = $fdata{"${i}_type"};
    $str2 = $fdata{"${i}_advcorr_type"};

    if (defined($str) && ($str ne "") && ($str ne "advcorr")) {
      if ($str eq 'ignore') {
        next;
      } else {
        $str_to_use = $str;
      }
    } elsif (defined($str2) && ($str2 ne "") && ($str2 ne "ignore")) {
      $str_to_use = $str2;
    } else {
      next;
    }

    ($type, $field) = split(/\+/, $str_to_use, 2);

    #
    # they may want us to create a new snippet type.  check to see.
    #

    if (($fdata{"${i}_new_type"} eq "on") && 
        (defined($fdata{"${i}_new_type_str"})) &&
        ($fdata{"${i}_new_type_str"} ne "") &&
        ( ($fdata{"${i}_new_type_type"} eq 'item') ||
          ($fdata{"${i}_new_type_type"} eq 'page') ||
          ($fdata{"${i}_new_type_type"} eq 'group') ) ) {

      $type = $fdata{"${i}_new_type_type"};
      $field = $fdata{"${i}_new_type_str"};
    }

    if (($type eq "item") ||
        ($type eq "page") ||
        ($type eq "group") ||
        ($type eq "stock") ||
        ($type eq "item_thingtype") ||
        ($type eq "page_thingtype") ||
        ($type eq "group_thingtype") ||
        ($type eq "item_sniptype") ||
        ($type eq "page_sniptype") ||
        ($type eq "group_sniptype") ||
        ($type eq "dim") ||
        ($type eq "range") ||
        ($type eq "matrix")) {

      $i_set{"${i}_type"} = $type;
      if (defined($field) && ($field ne "")) {
        $i_set{"${i}_field"} = $field;
      }

    } else {
      # invalid form option. hmmmmm. warn.
      stat_set_add_warning("invalid form option: $str_to_use");
      next;
    }

    #
    # set up match options
    #

    if ($fdata{"${i}_attr_m"} eq "on") {
      $i_set{"${i}_attr"} = 'm';

      if ($fdata{"${i}_attr_mtype"} eq "s") {
        $i_set{"${i}_matchtype"} = 's';
      } else {
        $i_set{"${i}_matchtype"} = 'e';
      }

      if ($fdata{"${i}_attr_mi"} eq "on") {
        $i_set{"${i}_matchtype"} .= 'i';
      }    
    }    

    if ($fdata{"${i}_attr_c"} eq "on") {
      $i_set{"${i}_attr"} .= 'c';
    }    

    if ($fdata{"${i}_attr_u"} eq "on") {
      $i_set{"${i}_attr"} .= 'u';
    }    

    #
    # set up ignore options
    #

    if (defined($fdata{"${i}_ignore_str"})) {
      $i_set{"${i}_ignorestr"} = $fdata{"${i}_ignore_str"};

      if ($fdata{"${i}_ignore_type"} eq "s") {
        $i_set{"${i}_ignoretype"} = 's';
      } else {
        $i_set{"${i}_ignoretype"} = 'e';
      }

      if ($fdata{"${i}_ignore_ci"} eq "on") {
        $i_set{"${i}_ignoretype"} .= 'i';
      }
    }

    #
    # set up cleaning options
    #

    if ($fdata{"${i}_clean_u"} eq "on") {
      $i_set{"${i}_clean"} .= 'u';
    }
    if ($fdata{"${i}_clean_a"} eq "on") {
      $i_set{"${i}_clean"} .= 'a';
    }
    if ($fdata{"${i}_clean_l"} eq "on") {
      $i_set{"${i}_clean"} .= 'l';
    }
    if ($fdata{"${i}_clean_s"} eq "on") {
      $i_set{"${i}_clean"} .= 's';
    }
    if ($fdata{"${i}_clean_q"} eq "on") {
      $i_set{"${i}_clean"} .= 'q';
    }
    if ($fdata{"${i}_clean_n"} eq "on") {
      $i_set{"${i}_clean"} .= 'n';
    }
    if ($fdata{"${i}_clean_d"} eq "on") {
      $i_set{"${i}_clean"} .= 'd';
    }

    if ($i_set{"${i}_clean"} eq "") {
      delete $i_set{"${i}_clean"};
    }

    #
    # set up variant and misc. options
    #

    foreach $str ('matrix', 'dim', 'split', 'thingtype_name', 
                  'thingtype_label', 'sniptype_label', 'sniptype_default',
                  'sniptype_widgettype', 'sniptype_widgetinfo') {

      if (defined($fdata{"${i}_${str}"}) && 
          $fdata{"${i}_${str}"} ne "") {
        $i_set{"${i}_${str}"} = $fdata{"${i}_${str}"};
      }
    }
  }

  #
  # set up special instructions
  #

  if ($fdata{'ignore_inline_instructions'} eq 'on') {
    $i_set{'ignore_inline_instructions'} = 1;
  }

  if ($fdata{'disable_matching'} eq 'on') {
    $i_set{'disable_matching'} = 1;
  }

  if ($fdata{'show_debug_log'} eq 'on') {
    $i_set{'show_debug_log'} = 1;
  }

  if ($fdata{'dont_unescape_headers'} eq 'on') {
    $i_set{'dont_unescape_headers'} = 1;
  }

  if (defined($fdata{'cols'})) {
    $i_set{'biggest_field_found'} = $fdata{'cols'};
  }

  #
  # and finally, analyze the instruction set
  #

  if (!analyze_instruction_set(\%i_set)) {
    # XXX throw an error condition here.  Might not have verified properly.
  }  

  $stat_set{'in_iset'} = 0;

  return \%i_set;
}

#
# -----------------------------------------------------------------------------
#
# A simple file analyzer;  looks for the first data line, and splits it
# up according to a specified column separator.  Mostly useful for the
# web interface.
#

sub dbupload_get_columns {
  my ($csep_t, $csep, $fname, @cols, $count, $found, $instructions_found);

  $csep_t = shift;
  $csep = get_csep_given_type($csep_t);

  $fname = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.txt";  
  SEC::untaint_ref(\$fname);

  if (!open(FILE,$fname)) {
    $errmsg = "Could not open file $fname: $!";
    return (0,undef);
  }

  #
  # scan through the file until we find the first non-blank, non-comment
  # line (ie the first data line)
  #

  $found = 0;
  $instructions_found = 0;

  LINE: while (!$found) {
    $_ = <FILE>;
    if (!defined($_) || ($_ !~ /\n$/)) {
      close(FILE);
      $errmsg = "Unable to detect rows -- are there newlines in your data?";
      return(0,undef);
    }

    chomp;
    s/\r//g;

    if ($_ =~ /^##/) {
      $instructions_found = 1;
    }

    if ( ($_ eq "") || ($_ =~ /^#/) ) {
      next LINE;
    }

    $found = 1;
  }

  # we use the -1 to ensure that null trailing fields aren't stripped.
  # see the perlfunc man page under 'split' for more info.

  @cols = split(/$csep/, $_, -1);
  unshift @cols, 'blank';
  # we unshift the blank on, because that's the way it'll be done when
  # it's parsed for real.

  $count = 1;

  LINE2: while (<FILE>) { 
    chomp;
    s/\r//g;

    if ( ($_ eq "") || ($_ =~ /^#/) ) { 
      next LINE2; 
    }
    $count++; 
  }

  close(FILE);

  return ($count, $instructions_found, @cols);
}

#
# -----------------------------------------------------------------------------
#
# we want to provide some decent default values for our web-based interface.
# just iterate over all of the columns and set them.
#
# at this point, the default values have to be the same for all of the 
# columns, since we don't know what any of the columns correspond to.
#

sub dbupload_setup_initial_form {
  my(%myfdat, $hohum, $i);

  $hohum = shift;
  
  %myfdat = %$hohum;

  for ($i=0; $i< $myfdat{'cols'}; $i++) {

    # match options

    $myfdat{"${i}_attr_mtype"} = 'e';
    $myfdat{"${i}_attr_u"} = 'on';
    $myfdat{"${i}_attr_c"} = 'on';

    # clean options

    $myfdat{"${i}_clean_a"} = 'on';
    $myfdat{"${i}_clean_l"} = 'on';

    # ignore options

    $myfdat{"${i}_ignore_type"} = 'e';

    # advanced correspondance

    $myfdat{"${i}_new_type_type"} = 'item';
  }

  %$hohum = %myfdat;  
}

#
# -----------------------------------------------------------------------------
#
# this function just parses out the data.  It splits each line as if it
# were actually processing the file, but just returns a huge array.
# this is useful for debugging the contents of an upload file, by 
# examining it for 'holes' -- places where a tab (or other delimiter)
# _should_ have existed, but didn't, and was therefore causing problems.
#
# the zeroth column is the line number.
#

sub dbupload_get_table_o_data {
  my ($csep_t, $csep, $fname, @cols, $col, $line, $ref, $row);

  $csep_t = shift;
  $csep = get_csep_given_type($csep_t);

  $fname = "$ENV{'TALLYMAN_PATH'}/stores/$ENV{'TALLYMAN_SID'}/tmp/dbupload.txt";  
  SEC::untaint_ref(\$fname);

  if (!open(FILE,$fname)) {
    $errmsg = "Could not open file $fname: $!";
    return (0,undef);
  }

  $line = 0;
  $row = 0;
  $ref = [];

  LINE: while (<FILE>) {
    $line++;
    s/\r//g;
    s/\n//g;
    if (/^#/ || ($_ eq "")) {
      next LINE;
    }

    # the -1 is explained elsewhere
    @cols = split(/$csep/, $_, -1);

    $ref->[$row] = [];
    $ref->[$row][0] = $line;
    for ($col=1; $col < $#cols+2; $col++) {
      $ref->[$row][$col] = $cols[$col-1];
    }

    $row++;
  }

  close(FILE);

  return ($ref);
}

#
# -----------------------------------------------------------------------------
#

