#!usr/local/bin/perl # 17 March 1999 Tone Merete Bruvik # # Last changed: 10.11.1999 TMB: Handles input files with filling after # record teminator character 1D. Skip the filling until a number shows up which indicates a # new recordheader or EOF. # # Program reading export file from collections using ISO 2907 formats and translate then # into "readable" MARC format. # Copyright (C) 1999 Tone Merete Bruvik , HIT - The Humanities Information Technologies Research Programme # at the University of Bergen # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # Made for the MALVINE project, # # Reading each record from the imputfile, translate into line encodes MARC, writing an output file # # Arguments: if arguments are provided them must look like this: # , # # On MacOS: If arguments are not given, the user is asked for the information in the order given above. # On UNIX/ PC: If arguments are not given, an error message is given, andre the script dies. # # If the input file contains lines which the script can't translate, these lines are written at the # end of each component # Some flags $ekskluder = 0; $prompt1 = "Choose input file"; $prompt2 = "Give a name for the output file:"; $format = ''; $coll = ''; $localEncoding = ''; # Some variables $recordSep = ''; # clean the recordsepetrator string $fieldStart = ''; # clean the field start string @theRecord = ''; # clean the record string if (@ARGV < 1) { # Test which platform the program is running on if ($^O =~/MacOS/) { require "StandardFile.pl"; print("MARCcon starts (without arguments): $^O\n"); open(INNFIL,$innf = &StandardFile'GetFile($prompt, ("TEXT"))) || die("Something went wrong in the GetFile dialog\n"); # $format = MacPerl::Pick("Which collection is the file from?", "OLA", "GSA", "NMD"); open(UTFIL,">".($utfilnavn = &StandardFile'PutFile($prompt2, "UtDCD"))) || die("Something went wrong in the PutFile dialog"); &MacPerl'SetFileInfo("MSWD","TEXT",$utfilnavn); } else { #Run on PC or UNIX: the arguments have to be used. die("No arguments given. Run the script with arguments: , , \n"); } } else { print("MARCcon starts (with arguments): $^O\n"); open(INNFIL,$innf = $ARGV[0]) || die("ARGV-input file does not work\n"); open(UTFIL,">".($utfilnavn = $ARGV[1])) || die("ARGV-output file does not work\n"); } $coll = $format; $localEncoding = $format; print("From tape format to line MARC format\n"); # Read in a record from the file, we have to known the record seperator for each type # of imput format &readMARCfile($owner); close(INNFIL); close(UTFIL); print("Done\n"); ############################################ # Sub rutine: From Marc International tapeformat to MARC line encoding # Reads a MARC export for in ISO stand tape formats, and write a file with the MARC encoding # as start of line, with tne data following # TMB 15/3-99 sub readMARCfile { my ($owner) = @_; # Reading each record in the input file $lastLine = $linje; $fieldnr = 0; $/ = chr(30); $theRecord = ''; $i = 0; #Reading the file, if there is only hex20, continue reading until something interesting shows up # Read first header of the file read (INNFIL, $header, 24); while (length ($header) > 0) { $header =~ s/^\s+(.+)/$1/; $totBytes = substr($header,0,5); #Getting the number of bytes in the record $posData = substr($header,12,5) - 24; #Getting position of the first field $fieldLen = substr($header,20,1); #Getting length of the field lenght field $startAdrLen = substr($header,21,1); #Getting length of the adresse field $i++; print ($i, ","); if (%$i == 0) { print ("\n"); }; #print ("header: ", $header, "\n"); #print ("totBytes: ", $totBytes, "\n"); #print ("posData: ", $posData, "\n"); #print ("fieldLen: ", $fieldLen, "\n"); #print ("startAdrLen: ", $startAdrLen, "\n"); if ($posData-1 > 0) { #Find the index part read (INNFIL, $index, $posData); # $index = substr($header,24); #print ("index 1: ", $index, "\n"); #Read the data #print ("totBytes-posData: ", $totBytes-$posData, "\n"); if ($totBytes-$posData -24 > 0) { #print ("Her er jeg: \n"); if (read (INNFIL, $data, ($totBytes-$posData -24))) { # For each index, find the data, and write the code number and the data to the output file $indexNumber= 0; #print ("data: ", $data, "\n"); while ($anIndex = substr($index,$indexNumber * 12,12)) { $indexNumber++; $len = substr($anIndex,3,$fieldLen); $start = substr($anIndex,7,$startAdrLen); #print ("#",substr($anIndex,0,3)); #print (" len: ", $len); #print (" start: ", $start); #print (" verdi: ", substr($data,$start,$len), "\n"); if ($len > 0) { print UTFIL ("\#", substr($anIndex,0,3)," "); # Do not print ending character chr(30) print UTFIL (substr($data,$start,$len), "\n"); } else { print UTFIL ("\n"); } } #while # Reading the next characters, until there is a number showing up, then there # there is another header starting, or it is the end of file. while (read (INNFIL, $aChar, 1)) { if ($aChar =~ /\d/) { last; } } #print ("Ut char: ", $aChar,"\n"); #There is something else than filling, a new header starts read (INNFIL, $header, 23); #print ("1 Ny header: ", $header,"\n"); $header = $aChar.$header; #Add the first character of the header #print ("2 Ny header: ", $header,"\n"); } else { print ("Error: ", $!, "\n"); } } } } }