Logo Search packages:      
Sourcecode: yudit version File versions


 *  Yudit Unicode Editor Source File
 *  GNU Copyright (C) 2003  Gaspar Sinai <gsinai@yudit.org>  
 *  GNU Copyright (C) 2002  Gaspar Sinai <gsinai@yudit.org>  
 *  GNU Copyright (C) 2001  Gaspar Sinai <gsinai@yudit.org>  
 *  GNU Copyright (C) 2000  Gaspar Sinai <gsinai@yudit.org>  
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License, version 2,
 *  dated June 1991. See file COPYYING for details.
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  GNU General Public License for more details.
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 * Generic map (n to n)
 * @author: Gaspar Sinai <gsinai@yudit.org>
 * @version: 1999-12-04
#ifndef SBMap_H
#define SBMap_H

#include "stoolkit/STypes.h"
#include "stoolkit/SIO.h"
#include "stoolkit/SIOStream.h"
#include "stoolkit/SStringVector.h"

typedef enum {
} SFileFormat;

class SBMapBuffer
  SBMapBuffer(int fromWordSize, int toWordSize);

  void reset ();

  void append (const void* buffer, int length);
  void appendFromNet (const void* buffer, int length);
  void appendToNet (const void* buffer, int length);
  void ensureCapacity (int length);

  // Shift macthed input with size.
  void    shift (int size);

  // The length of data expected in the input.
  // 0 1 2 3
  int  fromWordSize;

  // The length of data in buffer
  // 0 1 2 3
  int  toWordSize;

  // This is the size of one of the buffers bellow
  int       bufferSize;

  // This is the length of the array. You should do a  toWordSize*length
  int    length;
  union {
    SS_WORD8  *u8;
    SS_WORD16  *u16;
    SS_WORD32  *u32;
    SS_WORD64  *u64;
  } u;


class SStateModel
  SStateModel (int inFromLength, 
    int inToLength,
    int outFromLength,
    int outToLength);
  ~SStateModel ();  

  void    reset ();

  // State Machine. Do not overwrite it.
  // high is used for state machine state.
  int    low;
  int    high;

  int    nextPos;
  int    lastPos;

  int    lastMatch;

  // These are used for curcular-matches.
  int    circle;
  SS_WORD64  circleResult;
  int    circleCount;
  int    circleSize;
  int    needReset;

  SBMapBuffer    out;
  SBMapBuffer    in;


typedef enum {
} SS_LineEnd;

class SBMapItem
  // These flags are the apper bits of states in state machine.
  enum   SFound {REJECT=0, MORE, MATCH_MORE, MATCH };
  enum   SBMapItemType {SBMapNToN=0, SBMapBumap}; 

  // This will create an SBMapBumap static Map
  // If matrix is null, it will me a straight map.
  SBMapItem (int encode, unsigned int inWordSize, int outWordSize, SS_WORD16 highMin, SS_WORD16 highMax, SS_WORD16 lowMin, SS_WORD16 lowMax, const unsigned char* matrix=0);

  // This will create an SBMapNToN static Map
  SBMapItem (const unsigned char* buffer);

  // This will create an SBMapNToN static Map
  SBMapItem (int _encode, 
    const unsigned char* _name, const unsigned char* _comment, 
    unsigned int _commentSize,
    unsigned int _inWordSize, unsigned int _outWordSize, 
    unsigned int _inLengthSize, unsigned int _outLengthSize);
  ~SBMapItem ();
  unsigned int getDecoderMap (SStringVector* key, SStringVector* value,
        unsigned int size);

  /* For maps with holes */
  unsigned int getLinearPosition (SS_UCS4 key);
  SS_UCS4 getLinearKey (unsigned int position);
  SS_UCS4 getLinearValue (unsigned int position);

  // to is also used to hash out state machine states.
  SFound find(const unsigned char in, unsigned int pos, int* from, int* to);
  const unsigned char* getComment (int arrayIndex, unsigned int* length);
  const unsigned char* getValue (int arrayIndex, unsigned int* length);
  const unsigned char* getKey (int arrayIndex, unsigned int* length,
    unsigned int* matchedLength=0);

  unsigned int getLength(unsigned int arrayIndex);
  void   convertFromBumap();

  // This takes
  int  add (const unsigned char* key, int keySize, int matchSize, 
      const unsigned char* value, int valueSize,
      const unsigned char* comment, int commSize);

  int  addLine (const unsigned char* line, unsigned int length, bool reverse=false);

  void buildStateMachine ();
  int  serialize (SString* fd, SFileFormat format, int last=0);
  int SBMapItem::getSerializeSize ();

  // Get rid of the state machine.
  void  strip ();

  // The comment field 
  // byte size.
  SS_WORD32  commentSize;
  const unsigned char*  comment;

  // Set to one if encode.
  unsigned char encode;

  // The input/output word in bytes
  unsigned char inWordSize;
  unsigned char outWordSize;

  // The size of the length indicator (in bytes) in from of strings
  unsigned char inByteLength;
  unsigned char outByteLength;

  // This is an array of SS_WORD32 s but they may not be aligned.
  // WORD32 - 4 byte word size.
  SS_WORD32    codeSize;
  const unsigned char*  codeMap;

  // This is the buffer where the references refer to in codeMap.
  // byte size
  SS_WORD32    baseSize;
  const unsigned char*   base;

  // 64 byte Word size
  SS_WORD32             stateMachineSize;
  const unsigned char*  stateMachine;

  // 32 bytes
  const unsigned char*   name;

  SBMapItemType    itemType;

  SS_WORD16    highMin;
  SS_WORD16    highMax;
  SS_WORD16    lowMin;
  SS_WORD16    lowMax;

  int       writeCodeArea (SString* _fd, int _index, 
          SFileFormat _format, SS_LineEnd _last);

  int      writeTextBytes (SString* fd, 
          const unsigned char *from,
          int length, int slash, int wordSize,
          SFileFormat _format, SS_LineEnd _last); 
  unsigned int nextSorted (const unsigned char* key, unsigned int keylen);

  unsigned char*     toHex (const unsigned char* in, unsigned int size, unsigned int* len, unsigned int* match);
  unsigned int      stateMachineBufferSize;
  // This is writable
  unsigned char*     stateMachineBuffer;

  unsigned int       codeMapBufferSize;
  unsigned char*     codeMapBuffer;

  unsigned int       baseBufferSize;
  unsigned char*     baseBuffer;

  unsigned int       commentBufferSize;
  unsigned char*     commentBuffer;

  unsigned char*     nameBuffer;

  enum   SType { SS_STATIC=0, SS_DYNAMIC=1 };
  SType    stateMachineType;
  SType    baseType;
  SType    codeMapType;
  SType    commentType;
  SType    nameType;

  // This add one element to state machine, if needed.
  SS_WORD32   addState(SS_WORD32 oldState, 
        const unsigned char in, 
        unsigned int pos, 
        int from, int to);

// This is for low level routines.
// Encode means reverse map should be used.

#define SS_ACCEPT -1
#define SS_REJECT -2

 * This is really what you should use
00275 class SBMap 
  // This is actually reverse = encode logic.
  enum   SBMapType {SBMap_DECODE=0, SBMap_ENCODE};

  SBMap ();

  void setType (int mapType);
  void setName (const unsigned char* name);
  void setComment (const unsigned char* comment, int commentSize);

  ~SBMap ();

  /* for bumaps only - don't call this otherwise ! */
  SS_UCS4 decode (SS_UCS2 in);
  SS_UCS2 encode (SS_UCS4 in);

  unsigned int getDecoderMap (SStringVector* key, SStringVector* value,
        unsigned int size);

  /* For maps with holes */
  unsigned int getLinearPosition (unsigned int _index, SS_UCS4 key);
  SS_UCS4 getLinearKey (unsigned int _index, unsigned int position);
  SS_UCS4 getLinearValue (unsigned int _index, unsigned int position);

  // This item will be owned by SBMap.
  bool add (SBMapItem* item, int position=-1);

  bool  setFileImage (const SFileImage& image);

  bool  setArray (unsigned char* buffer, int size);

  inline bool getStatus() { return status; }

  inline unsigned int getInWordSize (int mapIndex)
    return (unsigned int) maps[mapIndex]->inWordSize;
  inline unsigned int getOutWordSize (int mapIndex)
    return (unsigned int) maps[mapIndex]->outWordSize;

  bool SBMap::makeUnicodeMap ();
  bool SBMap::makeStraightMap ();

  // Return max index
  int getSize (int index=-1);

  // Return SBMap_ENCODE or SBMap_DECODE
  SBMapType getType (int index);

  int  encode (int mapIndex, const void* in, int in_size,
    SStateModel *stateModel, int more=0);

  // Same as encode, but put it in the circle
  int  circle (SBMapType type, const void* in, int in_size,
    SStateModel *stateModel, int more=0);

  // Return the name field into an array if called with no args return
   // The name of the whole map. Return the size, but null teminate as well
  int  getName (char* line, int len, int mapIndex=-1);

  // Read 'between the lines' and return the comment.
  // Return the size, bu null terminate too.
  int  getComment (char* line, int len, int mapIndex=-1);

  // Has state machine ?
  const unsigned char*  getStateMachine (int mapIndex);

  void buildStateMachine (int mapIndex=-1);

  // Add data to buffer...
  int serialize (SOutputStream& fd, SFileFormat format=SS_BINARY);

  inline SBMapItem*  getItem (int _index)
    return maps[_index];

  // Get rid of state machines.
  void strip ();
  int    mapType;
  bool   isUMap();

  // For humap, cumap
  int serializeUMAP (SString* fd, SFileFormat format=SS_BINARY);

  int  packString (char* line, int len, const unsigned char* input, int maxlen);
  enum  Type { SBMap_MMAP, SBMap_ARRAY, SBMap_DYNAMIC };
  void   setOutput(SBMapItem* map, SStateModel* stateModel);

  bool  status;

  void clear ();
  bool processBuffer ();
  bool processSBMapBuffer ();
  bool processBMBuffer ();

  SFileImage      image;      

  unsigned char*    buffer;
  unsigned int      bufferSize;
  Type                bufferType;

  // 32 bytes
  const unsigned char*   name;
  unsigned char*  nameBuffer;

  Type    nameType;

  const unsigned char*  comment;
  SS_WORD32  commentSize;

  unsigned char*  commentBuffer;
  Type    commentType;

  const unsigned char*  base;

  // From buffer. checkBuffer sets them, clear clear them.

  // Points to beginning of tables.
  int    mapSize;
  SBMapItem**  maps;

#endif /* SBMap_H */

Generated by  Doxygen 1.6.0   Back to index