[armedbear-cvs] r11395 - in branches/open-external-format: . src/org/armedbear/lisp src/org/armedbear/lisp/util

Erik Huelsmann ehuelsmann at common-lisp.net
Sun Nov 23 11:29:10 UTC 2008


Author: ehuelsmann
Date: Sun Nov 23 11:29:10 2008
New Revision: 11395

Log:
Commit in-progress implementation.

Note: This commit still fails ansi test FILE-POSITION.5: it just locks up.

Added:
   branches/open-external-format/src/org/armedbear/lisp/util/
   branches/open-external-format/src/org/armedbear/lisp/util/RandomAccessCharacterFile.java
Modified:
   branches/open-external-format/build.xml
   branches/open-external-format/src/org/armedbear/lisp/FileStream.java
   branches/open-external-format/src/org/armedbear/lisp/open.lisp

Modified: branches/open-external-format/build.xml
==============================================================================
--- branches/open-external-format/build.xml	(original)
+++ branches/open-external-format/build.xml	Sun Nov 23 11:29:10 2008
@@ -100,6 +100,7 @@
 
     <patternset id="abcl.source.java">
       <include name="org/armedbear/lisp/*.java"/>
+      <include name="org/armedbear/lisp/util/*.java"/>
       <include name="org/armedbear/Main.java"/>
     </patternset>
     
@@ -117,6 +118,7 @@
 
     <patternset id="abcl.objects">
       <include name="org/armedbear/lisp/*.class"/>
+      <include name="org/armedbear/lisp/util/*.class"/>
       <include name="org/armedbear/lisp/*.cls"/> 
       <include name="org/armedbear/lisp/*.abcl"/>
       <patternset refid="abcl.source.lisp.dist"/>

Modified: branches/open-external-format/src/org/armedbear/lisp/FileStream.java
==============================================================================
--- branches/open-external-format/src/org/armedbear/lisp/FileStream.java	(original)
+++ branches/open-external-format/src/org/armedbear/lisp/FileStream.java	Sun Nov 23 11:29:10 2008
@@ -37,29 +37,47 @@
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.RandomAccessFile;
+import org.armedbear.lisp.util.RandomAccessCharacterFile;
 
 public final class FileStream extends Stream
 {
-    private static final int BUFSIZE = 4096;
-
-    private final RandomAccessFile raf;
-    private final RandomAccessFile in;
-    private final RandomAccessFile out;
+    private final RandomAccessCharacterFile racf;
+    private final RandomAccessCharacterFile in;
+    private final RandomAccessCharacterFile out;
     private final Pathname pathname;
     private final int bytesPerUnit;
-    private final byte[] inputBuffer;
-    private final byte[] outputBuffer;
-
-    private long inputBufferFilePosition;
-    private int inputBufferOffset;
-    private int inputBufferCount;
-    private int outputBufferOffset;
 
+    public enum EolStyle {
+        CR,
+        CRLF,
+        LF
+    }
+    
+    static final private Symbol keywordCodePage = Packages.internKeyword("CODE-PAGE");
+    
+    private final static EolStyle platformEolStyle = Utilities.isPlatformWindows ? EolStyle.CRLF : EolStyle.LF;
+    
+    private EolStyle eolStyle = platformEolStyle;
+    private char eolChar = 0;
+    
     public FileStream(Pathname pathname, String namestring,
                       LispObject elementType, LispObject direction,
-                      LispObject ifExists)
+                      LispObject ifExists, String encoding, EolStyle eol)
         throws IOException
     {
+        /* externalFormat is a LispObject of which the first char is a
+         * name of a character encoding (such as :UTF-8 or :ISO-8859-1), used
+         * by ABCL as a string designator, unless the name is :CODE-PAGE.
+         * A real string is (thus) also allowed.
+         * 
+         * Then, a property list follows with 3 possible keys:
+         *   :ID (values: code page numbers supported by MS-DOS/IBM-DOS/MS-Windows
+         *   :EOL-STYLE (values: :CR / :LF / :CRLF [none means native])
+         *   :LITTLE-ENDIAN (values: NIL / T)
+         * 
+         * These definitions have been taken from FLEXI-STREAMS:
+         *    http://www.weitz.de/flexi-streams/#make-external-format
+         */
         final File file = new File(namestring);
         String mode = null;
         if (direction == Keyword.INPUT) {
@@ -73,10 +91,12 @@
             isInputStream = true;
             isOutputStream = true;
         }
+        
         Debug.assertTrue(mode != null);
-        raf = new RandomAccessFile(file, mode);
-        in = isInputStream ? raf : null;
-        out = isOutputStream ? raf : null;
+        RandomAccessFile raf = new RandomAccessFile(file, mode);
+        racf = new RandomAccessCharacterFile(raf, encoding);
+        in = isInputStream ? racf : null;
+        out = isOutputStream ? racf : null;
         // ifExists is ignored unless we have an output stream.
         if (isOutputStream) {
             final long length = file.isFile() ? file.length() : 0;
@@ -105,18 +125,7 @@
             }
             bytesPerUnit = width / 8;
         }
-        if (isBinaryStream && isInputStream && !isOutputStream && bytesPerUnit == 1)
-            inputBuffer = new byte[BUFSIZE];
-        else if (isCharacterStream && isInputStream && !isOutputStream)
-            inputBuffer = new byte[BUFSIZE];
-        else
-            inputBuffer = null;
-        if (isBinaryStream && isOutputStream && !isInputStream && bytesPerUnit == 1)
-            outputBuffer = new byte[BUFSIZE];
-        else if (isCharacterStream && isOutputStream && !isInputStream)
-            outputBuffer = new byte[BUFSIZE];
-        else
-            outputBuffer = null;
+        eolChar = (eol == EolStyle.CR) ? '\r' : '\n';
     }
 
     @Override
@@ -150,7 +159,7 @@
     public LispObject listen() throws ConditionThrowable
     {
         try {
-            return in.getFilePointer() < in.length() ? T : NIL;
+            return in.dataIsAvailableForRead() ? T : NIL;
         }
         catch (NullPointerException e) {
             streamNotInputStream();
@@ -168,7 +177,7 @@
         final long length;
         if (isOpen()) {
             try {
-                length = raf.length();
+                length = racf.length();
             }
             catch (IOException e) {
                 error(new StreamError(this, e));
@@ -190,60 +199,28 @@
         return number(length / bytesPerUnit);
     }
 
-    @Override
-    public LispObject readLine(boolean eofError, LispObject eofValue)
-        throws ConditionThrowable
-    {
-        if (inputBuffer != null) {
-            final LispThread thread = LispThread.currentThread();
-            final FastStringBuffer sb = new FastStringBuffer();
-            while (true) {
-                int n = _readChar();
-                if (n < 0) {
-                    // End of file.
-                    if (sb.length() == 0) {
-                        if (eofError)
-                            return error(new EndOfFile(this));
-                        return thread.setValues(eofValue, T);
-                    }
-                    return thread.setValues(new SimpleString(sb), T);
-                }
-                char c = (char) n;
-                if (c == '\n')
-                    return thread.setValues(new SimpleString(sb), NIL);
-                else
-                    sb.append(c);
-            }
-        } else
-            return super.readLine(eofError, eofValue);
-    }
-
     // Returns -1 at end of file.
     @Override
     protected int _readChar() throws ConditionThrowable
     {
         try {
-            int c = _readByte();
-            if (Utilities.isPlatformWindows) {
+            int c = in.getReader().read();
+            if (eolStyle == EolStyle.CRLF) {
                 if (c == '\r') {
-                    int c2 = _readByte();
+                    long mark = in.position();
+                    int c2 = in.getReader().read();
                     if (c2 == '\n') {
                         ++lineNumber;
                         return c2;
                     }
                     // '\r' was not followed by '\n'
-                    if (inputBuffer != null && inputBufferOffset > 0) {
-                        --inputBufferOffset;
-                    } else {
-                        clearInputBuffer();
-                        long pos = in.getFilePointer();
-                        if (pos > 0)
-                            in.seek(pos - 1);
-                    }
+                    // we cannot depend on characters to contain 1 byte only
+                    // so we need to revert to the last known position.
+                    in.position(mark);
                 }
                 return c;
             }
-            if (c == '\n') {
+            if (c == eolChar) {
                 ++lineNumber;
                 return c;
             }
@@ -262,45 +239,8 @@
     @Override
     protected void _unreadChar(int n) throws ConditionThrowable
     {
-        if (inputBuffer != null && inputBufferOffset > 0) {
-            --inputBufferOffset;
-            if (n != '\n')
-                return;
-            --lineNumber;
-            if (!Utilities.isPlatformWindows)
-                return;
-            // Check for preceding '\r'.
-            if (inputBufferOffset > 0) {
-                if (inputBuffer[--inputBufferOffset] != '\r')
-                    ++inputBufferOffset;
-                return;
-            }
-            // We can't go back far enough in the buffered input. Reset and
-            // fall through...
-            ++inputBufferOffset;
-        }
         try {
-            long pos;
-            if (inputBuffer != null && inputBufferFilePosition >= 0)
-                pos = inputBufferFilePosition + inputBufferOffset;
-            else
-                pos = in.getFilePointer();
-            clearInputBuffer();
-            if (pos > 0)
-                in.seek(pos - 1);
-            if (Utilities.isPlatformWindows && n == '\n') {
-                // Check for preceding '\r'.
-                pos = in.getFilePointer();
-                if (pos > 0) {
-                    in.seek(pos - 1);
-                    n = in.read();
-                    if (n == '\r')
-                        in.seek(pos - 1);
-                }
-            }
-        }
-        catch (NullPointerException e) {
-            streamNotInputStream();
+            in.unreadChar((char)n);
         }
         catch (IOException e) {
             error(new StreamError(this, e));
@@ -316,14 +256,19 @@
     @Override
     public void _writeChar(char c) throws ConditionThrowable
     {
-        if (c == '\n') {
-            if (Utilities.isPlatformWindows)
-                _writeByte((byte)'\r');
-            _writeByte((byte)c);
-            charPos = 0;
-        } else {
-            _writeByte((byte)c);
-            ++charPos;
+        try {
+            if (c == '\n') {
+                if (eolStyle == EolStyle.CRLF)
+                    out.getWriter().write((byte)'\r');
+                out.getWriter().write((byte)eolChar);
+                charPos = 0;
+            } else {
+                out.getWriter().write((byte)c);
+                ++charPos;
+            }
+        }
+        catch (IOException e) {
+            error(new StreamError(this, e));
         }
     }
 
@@ -331,67 +276,51 @@
     public void _writeChars(char[] chars, int start, int end)
         throws ConditionThrowable
     {
-        if (Utilities.isPlatformWindows) {
-            for (int i = start; i < end; i++) {
-                char c = chars[i];
-                if (c == '\n') {
-                    _writeByte((byte)'\r');
-                    _writeByte((byte)c);
-                    charPos = 0;
-                } else {
-                    _writeByte((byte)c);
-                    ++charPos;
+        try {
+            if (eolStyle == EolStyle.CRLF) {
+                for (int i = start; i < end; i++) {
+                    char c = chars[i];
+                    if (c == '\n') {
+                        out.getWriter().write((byte)'\r');
+                        out.getWriter().write((byte)'\n');
+                        charPos = 0;
+                    } else {
+                        out.getWriter().write((byte)c);
+                        ++charPos;
+                    }
+                }
+            } else {
+                for (int i = start; i < end; i++) {
+                    char c = chars[i];
+                    out.getWriter().write((byte)c);
+                    if (c == '\n') {
+                        out.getWriter().write((byte)eolChar);
+                        charPos = 0;
+                    } else {
+                        out.getWriter().write((byte)c);
+                        ++charPos;
+                    }
                 }
-            }
-        } else {
-            // We're not on Windows, so no newline conversion is necessary.
-            for (int i = start; i < end; i++) {
-                char c = chars[i];
-                _writeByte((byte)c);
-                if (c == '\n')
-                    charPos = 0;
-                else
-                    ++charPos;
             }
         }
+        catch (IOException e) {
+            error(new StreamError(this, e));
+        }
     }
 
     @Override
     public void _writeString(String s) throws ConditionThrowable
     {
-        final int length = s.length();
-        if (Utilities.isPlatformWindows) {
-            for (int i = 0; i < length; i++) {
-                char c = s.charAt(i);
-                if (c == '\n') {
-                    _writeByte((byte)'\r');
-                    _writeByte((byte)c);
-                    charPos = 0;
-                } else {
-                    _writeByte((byte)c);
-                    ++charPos;
-                }
-            }
-        } else {
-            // We're not on Windows, so no newline conversion is necessary.
-            for (int i = 0; i < length; i++) {
-                char c = s.charAt(i);
-                _writeByte((byte)c);
-                if (c == '\n')
-                    charPos = 0;
-                else
-                    ++charPos;
-            }
-        }
+        _writeChars(s.toCharArray(), 0, s.length());
     }
 
     @Override
     public void _writeLine(String s) throws ConditionThrowable
     {
         _writeString(s);
-        if (Utilities.isPlatformWindows)
-            _writeByte((byte)'\r');
-        _writeByte((byte)'\n');
+        if (eolStyle == EolStyle.CRLF)
+            _writeChar('\r');
+        _writeChar(eolChar);
         charPos = 0;
     }
 
@@ -399,10 +328,8 @@
     @Override
     public int _readByte() throws ConditionThrowable
     {
-        if (inputBuffer != null)
-            return readByteFromBuffer();
         try {
-            return in.read(); // Reads an 8-bit byte.
+            return in.getInputStream().read(); // Reads an 8-bit byte.
         }
         catch (NullPointerException e) {
             streamNotInputStream();
@@ -418,34 +345,22 @@
     @Override
     public void _writeByte(int n) throws ConditionThrowable
     {
-        if (outputBuffer != null) {
-            writeByteToBuffer((byte)n);
-        } else {
-            try {
-                out.write((byte)n); // Writes an 8-bit byte.
-            }
-            catch (NullPointerException e) {
-                streamNotOutputStream();
-            }
-            catch (IOException e) {
-                error(new StreamError(this, e));
-            }
+        try {
+            out.getOutputStream().write(n); // Writes an 8-bit byte.
+        }
+        catch (NullPointerException e) {
+            streamNotOutputStream();
+        }
+        catch (IOException e) {
+            error(new StreamError(this, e));
         }
-    }
-
-    @Override
-    public void _finishOutput() throws ConditionThrowable
-    {
-        if (outputBuffer != null)
-            flushOutputBuffer();
     }
 
     @Override
     public void _clearInput() throws ConditionThrowable
     {
         try {
-            in.seek(in.length());
-            clearInputBuffer();
+            in.position(in.length());
         }
         catch (NullPointerException e) {
             streamNotInputStream();
@@ -458,14 +373,8 @@
     @Override
     protected long _getFilePosition() throws ConditionThrowable
     {
-        if (inputBuffer != null) {
-            if (inputBufferFilePosition >= 0)
-                return inputBufferFilePosition + inputBufferOffset;
-        }
-        if (outputBuffer != null)
-            flushOutputBuffer();
         try {
-            long pos = raf.getFilePointer();
+            long pos = racf.position();
             return pos / bytesPerUnit;
         }
         catch (IOException e) {
@@ -478,21 +387,17 @@
     @Override
     protected boolean _setFilePosition(LispObject arg) throws ConditionThrowable
     {
-        if (outputBuffer != null)
-            flushOutputBuffer();
-        if (inputBuffer != null)
-            clearInputBuffer();
         try {
             long pos;
             if (arg == Keyword.START)
                 pos = 0;
             else if (arg == Keyword.END)
-                pos = raf.length();
+                pos = racf.length();
             else {
                 long n = Fixnum.getValue(arg); // FIXME arg might be a bignum
                 pos = n * bytesPerUnit;
             }
-            raf.seek(pos);
+            racf.position(pos);
         }
         catch (IOException e) {
             error(new StreamError(this, e));
@@ -503,10 +408,8 @@
     @Override
     public void _close() throws ConditionThrowable
     {
-        if (outputBuffer != null)
-            flushOutputBuffer();
         try {
-            raf.close();
+            racf.close();
             setOpen(false);
         }
         catch (IOException e) {
@@ -514,61 +417,6 @@
         }
     }
 
-    private int readByteFromBuffer() throws ConditionThrowable
-    {
-        if (inputBufferOffset >= inputBufferCount) {
-            fillInputBuffer();
-            if (inputBufferCount < 0)
-                return -1;
-        }
-        return inputBuffer[inputBufferOffset++] & 0xff;
-    }
-
-    private void fillInputBuffer() throws ConditionThrowable
-    {
-        try {
-            inputBufferFilePosition = in.getFilePointer();
-            inputBufferOffset = 0;
-            inputBufferCount = in.read(inputBuffer, 0, BUFSIZE);
-        }
-        catch (NullPointerException e) {
-            streamNotInputStream();
-        }
-        catch (IOException e) {
-            error(new StreamError(this, e));
-        }
-    }
-
-    private void clearInputBuffer()
-    {
-        inputBufferFilePosition = -1;
-        inputBufferOffset = 0;
-        inputBufferCount = 0;
-    }
-
-    private void writeByteToBuffer(byte b) throws ConditionThrowable
-    {
-        if (outputBufferOffset == BUFSIZE)
-            flushOutputBuffer();
-        outputBuffer[outputBufferOffset++] = b;
-    }
-
-    private void flushOutputBuffer() throws ConditionThrowable
-    {
-        if (outputBufferOffset > 0) {
-            try {
-                out.write(outputBuffer, 0, outputBufferOffset);
-                outputBufferOffset = 0;
-            }
-            catch (NullPointerException e) {
-                streamNotOutputStream();
-            }
-            catch (IOException e) {
-                error(new StreamError(this, e));
-            }
-        }
-    }
-
     @Override
     public String writeToString() throws ConditionThrowable
     {
@@ -578,12 +426,12 @@
     // ### make-file-stream pathname namestring element-type direction if-exists => stream
     private static final Primitive MAKE_FILE_STREAM =
         new Primitive("make-file-stream", PACKAGE_SYS, true,
-                      "pathname namestring element-type direction if-exists")
+                      "pathname namestring element-type direction if-exists external-format")
     {
         @Override
         public LispObject execute(LispObject first, LispObject second,
                                   LispObject third, LispObject fourth,
-                                  LispObject fifth)
+                                  LispObject fifth, LispObject sixth)
             throws ConditionThrowable
         {
             final Pathname pathname;
@@ -603,12 +451,29 @@
             LispObject elementType = third;
             LispObject direction = fourth;
             LispObject ifExists = fifth;
+            LispObject externalFormat = sixth;
+            
+            String encoding = "ISO-8859-1";
+            if (externalFormat != NIL) {
+                Symbol enc = (Symbol)externalFormat.car(); //FIXME: class cast exception to be caught
+                if (enc != NIL) {
+                    if (enc != keywordCodePage) {
+                        encoding = enc.getName();
+                    }
+                    //FIXME: the else for the keywordCodePage to be filled in
+                }
+                //FIXME: the else for the == NIL to be filled in: raise an error...
+            }
+        
+            
+            
             if (direction != Keyword.INPUT && direction != Keyword.OUTPUT &&
                 direction != Keyword.IO)
                 error(new LispError("Direction must be :INPUT, :OUTPUT, or :IO."));
             try {
                 return new FileStream(pathname, namestring.getStringValue(),
-                                      elementType, direction, ifExists);
+                                      elementType, direction, ifExists,
+                                      encoding, platformEolStyle);
             }
             catch (FileNotFoundException e) {
                 return NIL;

Modified: branches/open-external-format/src/org/armedbear/lisp/open.lisp
==============================================================================
--- branches/open-external-format/src/org/armedbear/lisp/open.lisp	(original)
+++ branches/open-external-format/src/org/armedbear/lisp/open.lisp	Sun Nov 23 11:29:10 2008
@@ -143,7 +143,7 @@
                    :pathname pathname
                    :format-control "The file ~S does not exist."
                    :format-arguments (list namestring)))))
-       (make-file-stream pathname namestring element-type :input nil))
+       (make-file-stream pathname namestring element-type :input nil nil))
       (:probe
        (case if-does-not-exist
          (:error
@@ -157,7 +157,8 @@
           ;; this abstract pathname if and only if a file with this name does
           ;; not yet exist." See java.io.File.createNewFile().
           (create-new-file namestring)))
-       (let ((stream (make-file-stream pathname namestring element-type :input nil)))
+       (let ((stream (make-file-stream pathname namestring element-type
+                                       :input nil nil)))
          (when stream
            (close stream))
          stream))
@@ -204,7 +205,8 @@
           (error 'simple-error
                  :format-control "Option not supported: ~S."
                  :format-arguments (list if-exists))))
-       (let ((stream (make-file-stream pathname namestring element-type direction if-exists)))
+       (let ((stream (make-file-stream pathname namestring element-type
+                                       direction if-exists nil)))
          (unless stream
            (error 'file-error
                   :pathname pathname

Added: branches/open-external-format/src/org/armedbear/lisp/util/RandomAccessCharacterFile.java
==============================================================================
--- (empty file)
+++ branches/open-external-format/src/org/armedbear/lisp/util/RandomAccessCharacterFile.java	Sun Nov 23 11:29:10 2008
@@ -0,0 +1,446 @@
+/*
+ * RandomAccessCharacterFile.java
+ *
+ * Copyright (C) 2008 Hideo at Yokohama
+ * Copyright (C) 2008 Erik Huelsmann
+ * $Id$
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * As a special exception, the copyright holders of this library give you
+ * permission to link this library with independent modules to produce an
+ * executable, regardless of the license terms of these independent
+ * modules, and to copy and distribute the resulting executable under
+ * terms of your choice, provided that you also meet, for each linked
+ * independent module, the terms and conditions of the license of that
+ * module.  An independent module is a module which is not derived from
+ * or based on this library.  If you modify this library, you may extend
+ * this exception to your version of the library, but you are not
+ * obligated to do so.  If you do not wish to do so, delete this
+ * exception statement from your version.
+ */
+
+package org.armedbear.lisp.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.RandomAccessFile;
+import java.io.Reader;
+import java.io.Writer;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CoderResult;
+
+public class RandomAccessCharacterFile {
+
+        public class RandomAccessInputStream extends InputStream {
+
+                private RandomAccessCharacterFile racf;
+
+                public RandomAccessInputStream(RandomAccessCharacterFile racf) {
+                        this.racf = racf;
+                }
+                private byte[] buf = new byte[1];
+
+                public int read() throws IOException {
+                        int len = read(buf);
+                        if (len == 1) {
+                                // byte is signed, char is unsigned, int is signed.
+                                // buf can hold 0xff, we want it as 0xff in int, not -1.
+                                return 0xff & (int) buf[0];
+                        } else {
+                                return -1;
+                        }
+                }
+                
+                @Override
+                public int read(byte[] b, int off, int len) throws IOException {
+                        return racf.read(b, off, len);
+                }
+        }
+
+        public class RandomAccessOutputStream extends OutputStream {
+
+                private RandomAccessCharacterFile racf;
+
+                public RandomAccessOutputStream(RandomAccessCharacterFile racf) {
+                        this.racf = racf;
+                }
+
+                private byte[] buf = new byte[1];
+                public void write(int b) throws IOException {
+                        buf[0] = (byte)b;
+                        write(buf);
+                }
+
+                @Override
+                public void write(byte[] b, int off, int len) throws IOException {
+                        racf.write(b, off, len);
+                }
+        }
+
+        public class RandomAccessReader extends Reader {
+
+                private RandomAccessCharacterFile racf;
+
+                public RandomAccessReader(
+                                RandomAccessCharacterFile racf) {
+                        this.racf = racf;
+                }
+
+                public void close() throws IOException {
+                        racf.close();
+                }
+
+                public int read(char[] cb, int off, int len) throws IOException {
+                        return racf.read(cb, off, len);
+                }
+        }
+
+        public class RandomAccessWriter extends Writer {
+
+                private RandomAccessCharacterFile racf;
+
+                public RandomAccessWriter(
+                                RandomAccessCharacterFile racf) {
+                        this.racf = racf;
+                }
+
+                public void close() throws IOException {
+                        racf.close();
+                }
+
+                public void flush() throws IOException {
+                        racf.flush();
+                }
+
+                public void write(char[] cb, int off, int len) throws IOException {
+                        racf.write(cb, off, len);
+                }
+
+        }
+
+
+	final static int BUFSIZ = 4*1024; // setting this to a small value like 8 is helpful for testing.
+	
+	private RandomAccessWriter writer;
+	private RandomAccessReader reader;
+	private RandomAccessInputStream inputStream;
+	private RandomAccessOutputStream outputStream;
+	private FileChannel fcn;
+	private long fcnpos; /* where fcn is pointing now. */
+	private long fcnsize; /* the file size */
+	
+	private Charset cset;
+	private CharsetEncoder cenc;
+	private CharsetDecoder cdec;
+	
+	/**
+	 * bbuf is treated as a cache of the file content.
+	 * If it points to somewhere in the middle of the file, it holds the copy of the file content,
+	 * even when you are writing a large chunk of data.  If you write in the middle of a file,
+	 * bbuf first gets filled with contents of the data, and only after that any new data is
+	 * written on bbuf.
+	 * The exception is when you are appending data at the end of the file.
+	 */
+	private ByteBuffer bbuf;
+	private boolean bbufIsDirty; /* whether bbuf holds data that must be written. */
+	private long bbufpos; /* where the beginning of bbuf is pointing in the file now. */
+
+	public RandomAccessCharacterFile(RandomAccessFile raf, String encoding) throws IOException {
+		fcn = raf.getChannel();
+		fcnpos = 0; // fcn points at BOF.
+		fcnsize = fcn.size();
+		
+		cset = Charset.forName(encoding);
+		cdec = cset.newDecoder();
+		cenc = cset.newEncoder(); 
+		
+		bbuf = ByteBuffer.allocate(BUFSIZ);
+		
+		// there is no readable data available in the buffers.
+		bbuf.flip();
+		
+		// there is no write pending data in the buffers.
+		bbufIsDirty = false;
+		
+		bbufpos = fcn.position(); // so as the byte buffer.
+
+		reader = new RandomAccessReader(this);
+		writer = new RandomAccessWriter(this);
+		inputStream = new RandomAccessInputStream(this);
+		outputStream = new RandomAccessOutputStream(this);
+	}
+	
+	public Writer getWriter() {
+		return writer;
+	}
+	
+	public Reader getReader() {
+		return reader;
+	}
+	
+	public InputStream getInputStream() {
+		return inputStream;
+	}
+	
+	public OutputStream getOutputStream() {
+		return outputStream;
+	}
+	
+	public void close() throws IOException {
+		internalFlush(true);
+		fcn.close();
+	}
+	
+	public void flush() throws IOException {
+		internalFlush(false);
+	}
+
+	public int read(char[] cb, int off, int len) throws IOException {
+		CharBuffer cbuf = CharBuffer.wrap(cb, off, len);
+		boolean decodeWasUnderflow = false;
+                boolean atEof = false;
+		while ((cbuf.remaining() > 0) && dataIsAvailableForRead()
+                        && ! atEof) {
+			if ((bbuf.remaining() == 0) || decodeWasUnderflow) {
+				// need to read from the file.
+				flushBbuf(); // in case bbuf is dirty.
+				// update bbufpos.
+				bbufpos += bbuf.position();
+				int partialBytes = bbuf.remaining(); // partialBytes > 0 happens when decodeWasUnderflow
+				// if reads and writes are mixed, we may need to seek first.
+				if (bbufpos + partialBytes != fcnpos) {
+					fcn.position(bbufpos + partialBytes);
+				}
+				// need to read data from file.
+				bbuf.compact();
+                                //###FIXME: we're ignoring end-of-stream here!!!
+				atEof = (fcn.read(bbuf) == -1);
+				bbuf.flip();
+				fcnpos = bbufpos + bbuf.remaining();
+			}
+			CoderResult r = cdec.decode(bbuf, cbuf, pointingAtEOF() );
+			decodeWasUnderflow = (CoderResult.UNDERFLOW == r);
+		}
+		if (cbuf.remaining() == len) {
+			return -1;
+		} else {
+			return len - cbuf.remaining();
+		}
+	}
+
+	public boolean dataIsAvailableForRead() throws IOException {
+		return ((bbuf.remaining() > 0) || (fcn.position() < fcn.size()));
+	}
+	
+	private boolean pointingAtEOF() {
+		return (bbuf.remaining() == 0) && (fcnpos == fcnsize);
+	}
+
+	public void write(char[] cb, int off, int len) throws IOException {
+		CharBuffer cbuf = CharBuffer.wrap(cb, off, len);
+		encodeAndWrite(cbuf, false, false);
+	}
+
+	private void internalFlush(boolean endOfFile) throws IOException {
+		if (endOfFile) {
+			CharBuffer cbuf = CharBuffer.allocate(0);
+			encodeAndWrite(cbuf, true, endOfFile);
+		} else {
+			flushBbuf();
+		}
+	}
+
+	private void encodeAndWrite(CharBuffer cbuf, boolean flush, boolean endOfFile) throws IOException {
+		if (bbufpos == fcnsize) {
+			bbuf.clear();
+		}
+		while (cbuf.remaining() > 0) {
+			CoderResult r = cenc.encode(cbuf, bbuf, endOfFile);
+			bbufIsDirty = true;
+			long curpos = bbufpos + bbuf.position();
+			if (curpos > fcnsize) {
+				// the file is extended.
+				fcnsize = curpos;
+			}
+			if (CoderResult.OVERFLOW == r || bbuf.remaining() == 0) {
+				flushBbuf();
+				bbufpos += bbuf.limit();
+				bbuf.clear();
+				if (fcnpos < fcnsize) {
+					fcn.read(bbuf);
+					bbuf.flip();
+					fcnpos += bbuf.remaining();
+				}
+				// if we are at the end of file, bbuf is simply cleared.
+				// in that case, bbufpos + bbuf.position points to the EOF, not fcnpos. 
+			}
+		}
+		if (bbuf.position() > 0 && bbufIsDirty && flush) {
+			flushBbuf();
+		}
+	}
+
+	public void position(long newPosition) throws IOException {
+                flushBbuf();
+		long bbufend = bbufpos + bbuf.limit();
+		if (newPosition >= bbufpos && newPosition < bbufend) {
+			// near seek. within existing data of bbuf.
+			bbuf.position((int)(newPosition - bbufpos));
+		} else {
+			// far seek. discard the buffer.
+			flushBbuf();
+			fcn.position(newPosition);
+			fcnpos = newPosition;
+			bbuf.clear();
+			bbuf.flip(); // "there is no useful data on this buffer yet."
+			bbufpos = fcnpos;
+		}
+	}
+	
+	public long position() throws IOException {
+                flushBbuf();
+		return bbufpos + bbuf.position(); // the logical position within the file.
+	}
+
+        public long length() throws IOException {
+            flushBbuf();
+            return fcn.size();
+        }
+        
+	private void flushBbuf() throws IOException {
+		if (bbufIsDirty) {
+			if (fcnpos != bbufpos) {
+				fcn.position(bbufpos);
+			}
+			bbuf.position(0);
+			if (bbufpos + bbuf.limit() > fcnsize) {
+				// the buffer is at the end of the file.
+				// area beyond fcnsize does not have data.
+				bbuf.limit((int)(fcnsize - bbufpos));
+			}
+			fcn.write(bbuf);
+			fcnpos = bbufpos + bbuf.limit();
+			bbufIsDirty = false;
+		}
+	}
+
+	public int read(byte[] b, int off, int len) throws IOException {
+		int pos = off;
+                boolean atEof = false;
+		while (pos - off < len && dataIsAvailableForRead()
+                        && ! atEof) {
+			if (bbuf.remaining() == 0) {
+				// need to read from the file.
+				flushBbuf(); // in case bbuf is dirty.
+				// update bbufpos.
+				bbufpos += bbuf.limit();
+				// if reads and writes are mixed, we may need to seek first.
+				if (bbufpos != fcnpos) {
+					fcn.position(bbufpos);
+				}
+				// need to read data from file.
+				bbuf.clear();
+				atEof = (fcn.read(bbuf) == -1);
+				bbuf.flip();
+				fcnpos = bbufpos + bbuf.remaining();
+			}
+			int want = len - pos;
+			if (want > bbuf.remaining()) {
+				want = bbuf.remaining();
+			}
+			bbuf.get(b, pos, want);
+			pos += want;
+		}
+		return pos - off;
+	}
+        
+	// a method corresponding to the good ol' ungetc in C.
+	// This function may fail when using (combined) character codes that use
+	// escape sequences to switch between sub-codes.
+	// ASCII, ISO-8859 series, any 8bit code are OK, all unicode variations are OK,
+	// but applications of the ISO-2022 encoding framework can have trouble.
+	// Example of such code is ISO-2022-JP which is used in Japanese e-mail.
+	private CharBuffer singleCharBuf;
+	private ByteBuffer shortByteBuf;
+	public void unreadChar(char c) throws IOException {
+		// algorithm : 
+		//  1. encode c into bytes, to find out how many bytes it corresponds to
+		//  2. move the position backwards that many bytes.
+		//  ** we stop here.  Don't bother to write the bytes to the buffer,
+		//     assuming that it is the same as the original data.
+		//     If we allow to write back different characters, the buffer must get 'dirty'
+		//     but that would require read/write permissions on files you use unreadChar,
+		//     even if you are just reading for some tokenizer.
+		//
+		//  So we don't do the following.
+		//  3. write the bytes.
+		//  4. move the position back again.
+		if (singleCharBuf == null) {
+			singleCharBuf = CharBuffer.allocate(1);
+			shortByteBuf = ByteBuffer.allocate((int)cenc.maxBytesPerChar());
+		}
+		singleCharBuf.clear();
+		singleCharBuf.append(c);
+		singleCharBuf.flip();
+		shortByteBuf.clear();
+		cenc.encode(singleCharBuf, shortByteBuf, false);
+		int n = shortByteBuf.position();
+		long pos = position() - n;
+		position(pos);
+	}
+	
+	public void unreadByte(byte b) throws IOException {
+		long pos = position() - 1;
+		position(pos);
+	}
+
+	public void write(byte[] b, int off, int len) throws IOException {
+		int pos = off;
+		while (pos < off + len) {
+			int want = len;
+			if (want > bbuf.remaining()) {
+				want = bbuf.remaining();
+			}
+			bbuf.put(b, pos, want);
+			pos += want;
+			bbufIsDirty = true;
+			long curpos = bbufpos + bbuf.position();
+			if (curpos > fcn.size()) {
+				// the file is extended.
+				fcnsize = curpos;
+			}
+			if (bbuf.remaining() == 0) {
+				flushBbuf();
+				bbufpos += bbuf.limit();
+				bbuf.clear();
+				if (fcn.position() < fcn.size()) {
+                                        bbufpos = fcn.position();
+					fcn.read(bbuf);
+					bbuf.flip();
+					fcnpos += bbuf.remaining();
+				}
+				// if we are at the end of file, bbuf is simply cleared.
+				// in that case, bbufpos + bbuf.position points to the EOF, not fcnpos. 
+			}
+		}
+	}
+}




More information about the armedbear-cvs mailing list