Repository: mgholam/RaptorDB-Document
Branch: master
Commit: 4c1c2af5825e
Files: 150
Total size: 1.2 MB
Directory structure:
gitextract_wcou_a2z/
├── BuildVersion.cs
├── LICENSE
├── README.md
├── RaptorDB/
│ ├── AssemblyInfo.cs
│ ├── DataTypes/
│ │ └── DataTypes.cs
│ ├── Global.cs
│ ├── Helper/
│ │ ├── Container.cs
│ │ ├── MGRB.cs
│ │ └── WAHBitarray2.cs
│ ├── Indexes/
│ │ ├── BitmapIndex.cs
│ │ ├── Cache.cs
│ │ ├── Hoot.cs
│ │ ├── IIndex.cs
│ │ ├── ITokenizer.cs
│ │ ├── IndexFile.cs
│ │ ├── Indexes.cs
│ │ ├── MGIndex.cs
│ │ └── tokenizer.cs
│ ├── Properties/
│ │ ├── Resources.Designer.cs
│ │ └── Resources.resx
│ ├── REST/
│ │ ├── aWebServer.cs
│ │ └── rdbRest.cs
│ ├── RaptorDB.cs
│ ├── RaptorDB.csproj
│ ├── RaptorDBServer.cs
│ ├── Replication/
│ │ ├── Configuration.cs
│ │ ├── Packets.cs
│ │ ├── Readme.txt
│ │ ├── ReplicationClient.cs
│ │ ├── ReplicationServer.cs
│ │ └── msg.txt
│ ├── Storage/
│ │ ├── KeyStore.cs
│ │ ├── KeyStoreHF.cs
│ │ ├── StorageFile.cs
│ │ ├── StorageFileHF.cs
│ │ └── StringHF.cs
│ ├── Views/
│ │ ├── Dynamic.cs
│ │ ├── LINQQuery.cs
│ │ ├── TaskQueue.cs
│ │ ├── ViewHandler.cs
│ │ ├── ViewManager.cs
│ │ └── apimapper.cs
│ ├── WEB/
│ │ ├── bundle.css
│ │ ├── bundle.js
│ │ ├── global.css
│ │ └── index.html
│ └── cron/
│ ├── CronDaemon.cs
│ ├── CronJob.cs
│ └── CronSchedule.cs
├── RaptorDB.Common/
│ ├── DataTypes.cs
│ ├── FieldDescriptor.cs
│ ├── IRaptorDB.cs
│ ├── Interfaces.cs
│ ├── LINQString.cs
│ ├── Logger.cs
│ ├── MiniLZO.cs
│ ├── MurMurHash2.cs
│ ├── NetworkClient.cs
│ ├── Packets.cs
│ ├── Properties/
│ │ └── AssemblyInfo.cs
│ ├── RaptorDB.Common.csproj
│ ├── RaptorDBClient.cs
│ ├── SafeDictionary.cs
│ ├── View.cs
│ ├── ZipStorer.cs
│ ├── fastBinaryJSON/
│ │ ├── BJSON.cs
│ │ ├── BJsonParser.cs
│ │ ├── BJsonSerializer.cs
│ │ ├── Helper.cs
│ │ └── dynamic.cs
│ └── fastJSON/
│ ├── Formatter.cs
│ ├── Getters.cs
│ ├── Helper.cs
│ ├── JSON.cs
│ ├── JsonParser.cs
│ ├── JsonSerializer.cs
│ ├── Reflection.cs
│ └── dynamic.cs
├── RaptorDBCore/
│ ├── RaptorDB/
│ │ └── RaptorDB.csproj
│ ├── RaptorDb.Common/
│ │ └── RaptorDb.Common.csproj
│ └── test/
│ ├── sample.cs
│ └── test.csproj
├── RaptorDBServer/
│ ├── Installer.cs
│ ├── Program.cs
│ ├── Properties/
│ │ └── AssemblyInfo.cs
│ ├── RaptorDBServer.csproj
│ ├── Service1.Designer.cs
│ ├── Service1.cs
│ └── Service1.resx
├── RaptorDBTest.sln
├── RaptorDB_Doc.nuspec
├── RaptorDbCore.sln
├── Tools/
│ └── buildversion.ncs
├── Views/
│ ├── Class1.cs
│ ├── Properties/
│ │ └── AssemblyInfo.cs
│ ├── ServerSide.cs
│ └── Views.csproj
├── WebStudio/
│ ├── README.md
│ ├── build.cmd
│ ├── deploy.cmd
│ ├── package.json
│ ├── rollup.config.js
│ └── src/
│ ├── App.svelte
│ ├── UI/
│ │ ├── Button.svelte
│ │ ├── Modal.svelte
│ │ ├── datatable.svelte
│ │ └── nav.svelte
│ ├── debug.js
│ ├── global.css
│ ├── index.html
│ ├── main.js
│ └── pages/
│ ├── dochistory.svelte
│ ├── docsearch.svelte
│ ├── docview.svelte
│ ├── help.svelte
│ ├── hfbrowser.svelte
│ ├── query.svelte
│ ├── schema.svelte
│ ├── sysconfig.svelte
│ └── sysinfo.svelte
├── build.cmd
├── datagridbinding/
│ ├── Form1.resources
│ ├── Program.cs
│ ├── Properties/
│ │ └── AssemblyInfo.cs
│ ├── datagridbinding.csproj
│ ├── frmMain.Designer.cs
│ ├── frmMain.cs
│ ├── frmMain.resx
│ ├── frmStartup.Designer.cs
│ ├── frmStartup.cs
│ └── frmStartup.resx
├── history.txt
├── raptordb.snk
├── test script/
│ ├── run.cmd
│ └── sample.cs
├── testing/
│ ├── AssemblyInfo.cs
│ ├── Class1.cs
│ ├── program.cs
│ └── tests.csproj
├── testing.view
└── vbTestConsole/
├── App.config
├── Module1.vb
├── My Project/
│ ├── Application.Designer.vb
│ ├── Application.myapp
│ ├── AssemblyInfo.vb
│ ├── Resources.Designer.vb
│ ├── Resources.resx
│ ├── Settings.Designer.vb
│ └── Settings.settings
└── vbtestconsole.vbproj
================================================
FILE CONTENTS
================================================
================================================
FILE: BuildVersion.cs
================================================
using System.Reflection;
// build number = 606
// build version = 4.0.10
[assembly: AssemblyVersion("4.0.0.0")]
[assembly: AssemblyFileVersion("4.0.10.606")]
================================================
FILE: LICENSE
================================================
The MIT License (MIT)
Copyright (c) 2014 Mehdi Gholam
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
================================================
FILE: README.md
================================================
# RaptorDB Document Store
NoSql, JSON based, Document store database with compiled .net map functions and automatic hybrid bitmap indexing and LINQ query filters (now with standalone Server mode, Backup and Active Restore, Transactions, Server side queries, MonoDroid support, HQ-Branch Replication)
see the article here : [http://www.codeproject.com/Articles/375413/RaptorDB-the-Document-Store] (http://www.codeproject.com/Articles/375413/RaptorDB-the-Document-Store)
## Quick Start
First compile the source, then you can easily run any c# file like this:
```
# run any cs file
c:\rdb\test script> ..\tools\nscript.exe sample.cs
# or just run the batch file
c:\rdb\test script> run.cmd
```
The `sample.cs` file now contains a comment section at the top for specifing references used which will tell `nscript.exe` where to find the dll files:
```
// ref : ..\output\raptordb.dll
// ref : ..\output\raptordb.common.dll
// ref : ..\faker.dll
using System;
using System.Collections.Generic;
...
```
================================================
FILE: RaptorDB/AssemblyInfo.cs
================================================
using System.Reflection;
using System.Security;
[assembly: AssemblyTitle("RaptorDB Document Store")]
[assembly: AssemblyDescription("NoSql, JSON based, Document store database with compiled .net map functions and automatic hybrid bitmap indexing and LINQ query filters (now with standalone Server mode, Backup and Active Restore, Transactions, Server side queries, MonoDroid support, HQ-Branch Replication)")]
[assembly: AssemblyProduct("RaptorDB Document Store")]
================================================
FILE: RaptorDB/DataTypes/DataTypes.cs
================================================
using System;
using RaptorDB.Common;
namespace RaptorDB
{
///
/// Used to track ViewDelete usage for view rebuilds
///
internal class View_delete
{
public Guid ID = Guid.NewGuid();
public string Viewname;
public string Filter;
}
internal class View_insert
{
public Guid ID = Guid.NewGuid();
public string Viewname;
public object RowObject;
}
internal class FullTextString
{
}
internal class NoIndexing
{
}
public interface IRowFiller
{
object FillRow(object row, object[] data);
}
internal interface IGetBytes
{
byte[] GetBytes(T obj);
T GetObject(byte[] buffer, int offset, int count);
}
internal class RDBDataType
{
public static IGetBytes ByteHandler()
{
Type type = typeof(T);
if (type == typeof(int)) return (IGetBytes)new int_handler();
else if (type == typeof(uint)) return (IGetBytes)new uint_handler();
else if (type == typeof(long)) return (IGetBytes)new long_handler();
else if (type == typeof(Guid)) return (IGetBytes)new guid_handler();
else if (type == typeof(string)) return (IGetBytes)new string_handler();
else if (type == typeof(DateTime)) return (IGetBytes)new datetime_handler();
else if (type == typeof(decimal)) return (IGetBytes)new decimal_handler();
else if (type == typeof(short)) return (IGetBytes)new short_handler();
else if (type == typeof(ushort)) return (IGetBytes)new ushort_handler();
else if (type == typeof(float)) return (IGetBytes)new float_handler();
else if (type == typeof(byte)) return (IGetBytes)new byte_handler();
else if (type == typeof(double)) return (IGetBytes)new double_handler();
return null;
}
public static byte GetByteSize(byte keysize)
{
byte size = 4;
Type t = typeof(T);
if (t == typeof(int)) size = 4;
if (t == typeof(uint)) size = 4;
if (t == typeof(long)) size = 8;
if (t == typeof(Guid)) size = 16;
if (t == typeof(DateTime)) size = 8;
if (t == typeof(decimal)) size = 16;
if (t == typeof(float)) size = 4;
if (t == typeof(short)) size = 2;
if (t == typeof(string)) size = keysize;
if (t == typeof(byte)) size = 1;
if (t == typeof(double)) size = 8;
return size;
}
internal static object GetEmpty()
{
Type t = typeof(T);
if (t == typeof(string))
return "";
return default(T);
}
}
#region [ handlers ]
internal class double_handler : IGetBytes
{
public byte[] GetBytes(double obj)
{
return BitConverter.GetBytes(obj);
}
public double GetObject(byte[] buffer, int offset, int count)
{
return BitConverter.ToDouble(buffer, offset);
}
}
internal class byte_handler : IGetBytes
{
public byte[] GetBytes(byte obj)
{
return new byte[1] { obj };
}
public byte GetObject(byte[] buffer, int offset, int count)
{
return buffer[offset];
}
}
internal class float_handler : IGetBytes
{
public byte[] GetBytes(float obj)
{
return BitConverter.GetBytes(obj);
}
public float GetObject(byte[] buffer, int offset, int count)
{
return BitConverter.ToSingle(buffer, offset);
}
}
internal class decimal_handler : IGetBytes
{
public byte[] GetBytes(decimal obj)
{
byte[] b = new byte[16];
var bb = decimal.GetBits(obj);
int index = 0;
foreach (var d in bb)
{
byte[] db = Helper.GetBytes(d, false);
Buffer.BlockCopy(db, 0, b, index, 4);
index += 4;
}
return b;
}
public decimal GetObject(byte[] buffer, int offset, int count)
{
int[] i = new int[4];
i[0] = Helper.ToInt32(buffer, offset);
offset += 4;
i[1] = Helper.ToInt32(buffer, offset);
offset += 4;
i[2] = Helper.ToInt32(buffer, offset);
offset += 4;
i[3] = Helper.ToInt32(buffer, offset);
offset += 4;
return new decimal(i);
}
}
internal class ushort_handler : IGetBytes
{
public byte[] GetBytes(ushort obj)
{
return Helper.GetBytes(obj, false);
}
public ushort GetObject(byte[] buffer, int offset, int count)
{
return (ushort)Helper.ToInt16(buffer, offset);
}
}
internal class short_handler : IGetBytes
{
public byte[] GetBytes(short obj)
{
return Helper.GetBytes(obj, false);
}
public short GetObject(byte[] buffer, int offset, int count)
{
return Helper.ToInt16(buffer, offset);
}
}
internal class string_handler : IGetBytes
{
public byte[] GetBytes(string obj)
{
return Helper.GetBytes(obj);
}
public string GetObject(byte[] buffer, int offset, int count)
{
return Helper.GetString(buffer, offset, (short)count);
}
}
internal class int_handler : IGetBytes
{
public byte[] GetBytes(int obj)
{
return Helper.GetBytes(obj, false);
}
public int GetObject(byte[] buffer, int offset, int count)
{
return Helper.ToInt32(buffer, offset);
}
}
internal class uint_handler : IGetBytes
{
public byte[] GetBytes(uint obj)
{
return Helper.GetBytes(obj, false);
}
public uint GetObject(byte[] buffer, int offset, int count)
{
return (uint)Helper.ToInt32(buffer, offset);
}
}
internal class long_handler : IGetBytes
{
public byte[] GetBytes(long obj)
{
return Helper.GetBytes(obj, false);
}
public long GetObject(byte[] buffer, int offset, int count)
{
return Helper.ToInt64(buffer, offset);
}
}
internal class guid_handler : IGetBytes
{
public byte[] GetBytes(Guid obj)
{
return obj.ToByteArray();
}
public Guid GetObject(byte[] buffer, int offset, int count)
{
byte[] b = new byte[16];
Buffer.BlockCopy(buffer, offset, b, 0, 16);
return new Guid(b);
}
}
internal class datetime_handler : IGetBytes
{
public byte[] GetBytes(DateTime obj)
{
return Helper.GetBytes(obj.Ticks, false);
}
public DateTime GetObject(byte[] buffer, int offset, int count)
{
long ticks = Helper.ToInt64(buffer, offset);
return new DateTime(ticks);
}
}
#endregion
}
================================================
FILE: RaptorDB/Global.cs
================================================
namespace RaptorDB
{
public class Global
{
///
/// Store bitmap as int offsets then switch over to bitarray
///
public static int BitmapOffsetSwitchOverCount = 10;
///
/// True = Save to other views in process , False = background save to other views
///
public static bool BackgroundSaveToOtherViews = true;
///
/// Default maximum string key size for indexes
///
public static byte DefaultStringKeySize = 60;
///
/// Free bitmap index memory on save
///
public static bool FreeBitmapMemoryOnSave = false;
///
/// Number of items in each index page (default = 10000) [Expert only, do not change]
///
public static ushort PageItemCount = 10000;
///
/// KeyStore save to disk timer
///
public static int SaveIndexToDiskTimerSeconds = 1800;
///
/// Flush the StorageFile stream immediately
///
public static bool FlushStorageFileImmediately = false;
///
/// Save doc as binary json
///
public static bool SaveAsBinaryJSON = true;
///
/// Remove completed tasks timer
///
public static int TaskCleanupTimerSeconds = 3;
///
/// Save to other views timer seconds if enabled
///
public static int BackgroundSaveViewTimer = 1;
///
/// How many items to process in a background view save event
///
public static int BackgroundViewSaveBatchSize = 1000000;
/////
///// Check the restore folder for new backup files to restore
/////
//public static int RestoreTimerSeconds = 10; // TODO : implement this
///
/// Timer for full text indexing of original documents (default = 15 sec)
///
public static int FullTextTimerSeconds = 15;
///
/// How many documents to full text index in a batch
///
public static int BackgroundFullTextIndexBatchSize = 10000;
///
/// Free memory checking timer (default = 300 sec ~ 5 min)
///
public static int FreeMemoryTimerSeconds = 5 * 60;// 1800;
///
/// Memory usage limit for internal caching (default = 100 Mb) [using GC.GetTotalMemory()]
///
public static long MemoryLimit = 100;
///
/// Backup cron schedule (default = "0 * * * *" [every hour])
///
public static string BackupCronSchedule = "0 * * * *";
///
/// Require primary view to be defined for save, false = key/value store (default = true)
///
public static bool RequirePrimaryView = true;
///
/// Maximum documents in each package for replication
///
public static int PackageSizeItemCountLimit = 10000;
///
/// Process inbox timer (default = 60 sec)
///
public static int ProcessInboxTimerSeconds = 60;
///
/// Split the data storage files in MegaBytes (default 0 = off) [500 = 500mb]
/// - You can set and unset this value anytime and it will operate from that point on.
/// - If you unset (0) the value previous split files will remain and all the data will go to the last file.
///
public static ushort SplitStorageFilesMegaBytes = 0;
///
/// Compress the documents in the storage file if it is over this size (default = 100 Kilobytes)
/// - You will be trading CPU for disk IO
///
public static ushort CompressDocumentOverKiloBytes = 100;
///
/// Disk block size for high frequency KV storage file (default = 2048)
/// * Do not use anything under 512 with large string keys
///
public static ushort HighFrequencyKVDiskBlockSize = 2048;
///
/// String key MGIndex that stores keys in an external file for smaller index files
///
public static bool EnableOptimizedStringIndex = true;
///
/// Enable the Web Studio interface
///
public static bool EnableWebStudio = false;
///
/// Web Studio port (default = 91)
///
public static short WebStudioPort = 91;
///
/// Local machine access only Web Studio - no network access (default = true)
///
public static bool LocalOnlyWebStudio = true;
///
/// If True -> less memory use SafeSortedList and slower
/// False -> more memory use SafeDictionary and faster
///
public static bool UseLessMemoryStructures = false;
//public static bool useSortedList = false;
public static bool CompressBitmapBytes = false;
public static bool SkipDocsOnViewInsert = false;
}
}
================================================
FILE: RaptorDB/Helper/Container.cs
================================================
using System.Collections.Generic;
namespace RaptorDB
{
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
class BitmapContainer : Container
{
public BitmapContainer()
{
}
public BitmapContainer(int OneCount)
{
if (OneCount > Container.BSize)
throw new System.Exception("OneCount > 65536");
else if (OneCount == Container.BSize)
ALLONE = true;
else
{
List l = new List();
while(OneCount>0)
{
if (OneCount > 64)
l.Add(ulong.MaxValue);
else
l.Add(ulong.MaxValue< 0)
return _onecount;
lock (_lock)
{
long c = 0;
foreach (var l in _values)
c += BitCount(l);
_onecount = c;
return c;
}
}
public override long CountZeros()
{
if (ALLONE)
return 0;
return BSize - CountOnes();
}
public override IEnumerable GetBitIndexes()
{
lock (_lock)
{
if (ALLONE)
{
for (int i = 0; i < BSize; i++)
yield return (ushort)i;
}
ushort c = 0;
foreach (var l in _values)
{
for (int i = 0; i < 64; i++)
{
ulong mask = (ulong)1 << (63 - i); // high order bit get
if ((l & mask) != 0)
yield return (ushort)(c + i);
}
c += 64;
}
}
}
public override bool Get(long offset)
{
lock (_lock)
{
if (ALLONE)
return true;
int pos = (ushort)offset >> 6;
int off = (int)(offset % 64);
if (pos >= _values.Length) // out of range
{
return false;
}
ulong mask = (ulong)1 << (63 - off); // high order bit get
return (_values[pos] & mask) != 0;
}
}
public override void Set(long offset, bool val)
{
lock (_lock)
{
if (ALLONE)
{
if (val == true)
return;
// change to bits
ALLONE = false;
_values = new ulong[1024];
for (int i = 0; i < 1024; i++)
_values[i] = ulong.MaxValue;
}
int pos = (ushort)offset >> 6;
int off = (int)(offset % 64);
_onecount = -1;
if (_values == null)
{
_values = new ulong[0];
}
if (pos >= _values.Length) // out of range
{
// resize
var a = new ulong[pos + 1];
_values.CopyTo(a, 0);
_values = a;
}
ulong mask = (ulong)1 << (63 - off); // high order bit get
if (val)
_values[pos] |= mask;
else
_values[pos] &= ~mask;
Size = _values.Length * 64;
}
}
public override bool ChangeRequired()
{
if (ALLONE)
return false;
if (CountOnes() == BSize) // -> all ones container
return true;
if (CountZeros() < CHGOVER) // -> inverted container
return true;
var offbytes = CountOnes() << 1; //*2
var bytes = _values.Length << 3; //*8
if (bytes > offbytes)
return true; // -> offset container
return false;
}
public override Container ToBitmap()
{
return Copy();
}
public override Container Change()
{
if (ALLONE)
return new BitmapContainer(true);
if (CountOnes() == BSize)
return new BitmapContainer(true);
// create inverted
if (CountZeros() < CHGOVER)
return new InvertedContainer(Not().GetBitIndexes());
Container c = null;
//if (Global.useSortedList)
// c = new OffsetContainerSL();
//else
c = new OffsetContainer();
foreach (var i in GetBitIndexes())
c.Set(i, true);
return c;
}
public override Container Copy()
{
if (ALLONE)
return new BitmapContainer(true);
if (_values != null && _values.Length > 0)
return new BitmapContainer(Values());
else
return new BitmapContainer();
}
public override Container Not()
{
lock (_lock)
{
if (ALLONE)
return new BitmapContainer();
var vals = new ulong[1024]; // TODO : upto Size ??
for (int i = 0; i < 1024; i++)
vals[i] = ulong.MaxValue;
for (int i = 0; i < _values.Length; i++)
vals[i] = ~_values[i];
return new BitmapContainer(vals);
}
}
}
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
class OffsetContainer : Container
{
public OffsetContainer()
{
}
public OffsetContainer(IEnumerable vals)
{
_values = new List(vals);
Size = _values[_values.Count - 1];
}
List _values = new List();
private object _lock = new object();
public ushort[] Values()
{
lock (_lock)
return _values.ToArray();
}
public override Container ToBitmap()
{
lock (_lock)
{
var c = new BitmapContainer();
foreach (var i in _values)
c.Set(i, true);
return c;
}
}
public override Container Change()
{
if (CountOnes() == Container.BSize)
return new BitmapContainer(true);
if (CountZeros() < CHGOVER)
return new InvertedContainer(ToBitmap().Not().GetBitIndexes());
return ToBitmap();
}
public override bool ChangeRequired()
{
if (_values.Count > CHGOVER)
return true;
if (CountZeros() < CHGOVER)
return true;
return false;
}
public override Container Copy()
{
lock (_lock)
{
if (_values != null && _values.Count > 0)
return new OffsetContainer(_values.ToArray());
else
return new OffsetContainer();
}
}
public override long CountOnes()
{
return _values.Count;
}
public override long CountZeros()
{
return BSize - CountOnes();
}
public override bool Get(long offset)
{
lock (_lock)
{
var i = _values.BinarySearch((ushort)offset);
if (i >= 0)
return true;
return false;
}
}
public override IEnumerable GetBitIndexes()
{
lock (_lock)
foreach (var i in _values)
yield return i;
}
public override void Set(long offset, bool val)
{
lock (_lock)
{
var i = _values.BinarySearch((ushort)offset);
if (val == true)
{
if (i < 0) // not in array -> add
{
var c = ~i;
if (c < _values.Count)
_values.Insert(c, (ushort)offset);
else
_values.Add((ushort)offset);
}
}
else if (i >= 0)
{
// remove from array
_values.RemoveAt(i);
}
if (_values.Count > 0)
Size = _values[_values.Count - 1];
else
Size = -1;
}
}
public override Container Not()
{
return ToBitmap().Not();
}
}
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
class InvertedContainer : Container
{
public InvertedContainer()
{
}
// list of zeros
public InvertedContainer(IEnumerable vals)
{
_values = new List(vals);
if (_values.Count > 0)
Size = _values[_values.Count - 1];
else
Size = Container.BSize;
}
List _values = new List();
private object _lock = new object();
public override Container ToBitmap()
{
lock (_lock)
{
var b = new BitmapContainer();
foreach (var i in GetBitIndexes())
{
b.Set(i, true);
}
return b;
}
}
public override Container Change()
{
if (CountZeros() > CHGOVER)
return ToBitmap();
return Copy();
}
public override bool ChangeRequired()
{
if (CountZeros() > CHGOVER)
return true;
return false;
}
public override Container Copy()
{
lock (_lock)
{
if (_values != null && _values.Count > 0)
return new InvertedContainer(_values.ToArray());
else
return new InvertedContainer();
}
}
public override long CountOnes()
{
return BSize - CountZeros();
}
public override long CountZeros()
{
return _values.Count;
}
public override bool Get(long offset)
{
lock (_lock)
{
var i = _values.BinarySearch((ushort)offset);
if (i >= 0) // in the list of zeros
return false;
return true;
}
}
public override IEnumerable GetBitIndexes()
{
lock (_lock)
{
for (int i = 0; i < BSize; i++)
{
var j = _values.BinarySearch((ushort)i);
if (j < 0) // not in the list of zeros
yield return (ushort)i;
}
}
}
public override Container Not()
{
return ToBitmap().Not();
}
public override void Set(long offset, bool val)
{
lock (_lock)
{
var i = _values.BinarySearch((ushort)offset);
if (val == false)
{
if (i < 0) // not in array -> add
{
var c = ~i;
if (c < _values.Count)
_values.Insert(c, (ushort)offset);
else
_values.Add((ushort)offset);
}
}
else if (i >= 0)
{
// remove from array
_values.RemoveAt(i);
}
// fix : return correct size
if (_values.Count > 0)
Size = _values[_values.Count - 1];
else
Size = -1;
}
}
public ushort[] Values()
{
lock (_lock)
return _values.ToArray();
}
}
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------------------------------
public abstract class Container
{
internal const int BSize = 65536;
internal const int CHGOVER = 4096;
public abstract void Set(long offset, bool val);
public abstract bool Get(long offset);
public abstract long CountOnes();
public abstract long CountZeros();
public abstract IEnumerable GetBitIndexes();
public abstract bool ChangeRequired();
public abstract Container Change();
public abstract Container ToBitmap();
public abstract Container Copy();
public abstract Container Not();
public int Size = -1;
//[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static int BitCount(ulong x)
{
x -= (x >> 1) & 0x5555555555555555UL; //put count of each 2 bits into those 2 bits
x = (x & 0x3333333333333333UL) + ((x >> 2) & 0x3333333333333333UL); //put count of each 4 bits into those 4 bits
x = (x + (x >> 4)) & 0x0F0F0F0F0F0F0F0FUL; //put count of each 8 bits into those 8 bits
return (int)((x * 0x0101010101010101UL) >> 56); //returns left 8 bits of x + (x<<8) + (x<<16) + (x<<24) + ...
}
}
public enum CTYPE
{
ALLONES
, BITMAP
, OFFSET
, INV
//,OFFSETSL
}
public class CData
{
public ushort i;
public CTYPE t;
public byte[] d;
}
public class MGRBData
{
public List c = new List();
}
}
================================================
FILE: RaptorDB/Helper/MGRB.cs
================================================
using RaptorDB.Common;
using System;
using System.Collections.Generic;
namespace RaptorDB
{
public class MGRB
{
public MGRB()
{ }
internal MGRB(SafeSortedList containers) : this(containers, -1)
{ }
internal MGRB(SafeSortedList containers, long size)
{
_containers = containers;
var k = _containers.Keys();
_size = size;
if (size <= 0)//== -1)
{
_size = 0;
var l = k.Length - 1;
if (l >= 0)
_size = (k[l] << 16) + _containers.GetValue(l).Size;
}
}
private SafeSortedList _containers = new SafeSortedList();
private long _size;
private ushort _MASK = 0xffff;
private object _lock = new object();
public bool isDirty = false;
public long Length { get { return _size; } }
public void Set(long position, bool val)
{
lock (_lock)
{
isDirty = true;
if (_size < position && val == true)
_size = position;
var idx = (int)(position >> 16);
Container c = null;
if (_containers.TryGetValue(idx, out c) == false)
{
//if (Global.useSortedList)
// c = new OffsetContainerSL();
//else
c = new OffsetContainer();
// add container
_containers.Add(idx, c);
}
c.Set(position & _MASK, val);
//if (c.ChangeRequired())
// _containers[idx] = c.Change();
}
}
public bool Get(long position)
{
lock (_lock)
{
var idx = (int)(position >> 16);
if (_containers.TryGetValue(idx, out Container c))
return c.Get(position & _MASK);
return false;
}
}
public MGRB And(MGRB B)
{
var v = new SafeSortedList();
var len = _size;
if (B.Length < len)
len = B.Length;
var a = LastContainerIdx();
var b = B.LastContainerIdx();
var min = a;
if (b < min)
min = b;
min++;
for (int i = 0; i < min; i++)
{
Container ca = null;
Container cb = null;
_containers.TryGetValue(i, out ca);
B._containers.TryGetValue(i, out cb);
if (ca != null && cb != null)
v.Add(i, containerAND(ca, cb));
}
return new MGRB(v, len);
}
public MGRB Or(MGRB B)
{
var v = new SafeSortedList();
var len = _size;
if (B.Length > len)
len = B.Length;
var a = LastContainerIdx();
var b = B.LastContainerIdx();
var max = a;
if (b > max)
max = b;
max++;
for (int i = 0; i < max; i++)
{
Container ca = null;
Container cb = null;
_containers.TryGetValue(i, out ca);
B._containers.TryGetValue(i, out cb);
if (ca == null && cb != null)
v.Add(i, cb.Copy());
else if (cb == null && ca != null)
v.Add(i, ca.Copy());
else if (ca != null && cb != null)
v.Add(i, containerOR(ca, cb));
}
return new MGRB(v, len);
}
public MGRB AndNot(MGRB b)
{
long c = _size;
if (b._size > c)
c = b._size;
return And(b.Not(c));
}
public MGRB Not()
{
var con = new SafeSortedList();
foreach (var c in _containers)
{
con.Add(c.Key, c.Value.Not());
}
return new MGRB(con, _size);
}
public MGRB Not(long count)
{
var con = new SafeSortedList();
var c = count >> 16;
for (int i = 0; i <= c; i++)
{
Container a = null;
_containers.TryGetValue(i, out a);
if (a == null)
con.Add(i, new BitmapContainer(true));
else
con.Add(i, a.Not());
}
return new MGRB(con, count);
}
public static MGRB Fill(long count)
{
if (count == 0)
return new MGRB();
var con = new SafeSortedList();
int i = 0;
long c = count;
while (count > 0)
{
if (count > Container.BSize)
con.Add(i, new BitmapContainer(true));
else
con.Add(i, new BitmapContainer((int)count));
count -= Container.BSize;
i++;
}
return new MGRB(con, c);
}
public long CountOnes()
{
long c = 0;
if (_size > 0)
foreach (var i in _containers)
c += i.Value.CountOnes();
return c;
}
public long CountZeros()
{
var c = CountOnes();
return _size - c;
}
public IEnumerable GetBitIndexes()
{
foreach (var c in _containers)
{
int i = c.Key << 16;
foreach (var j in c.Value.GetBitIndexes())
yield return i + j;
}
}
public MGRB Optimize()
{
lock (_lock)
{
var keys = _containers.Keys();
var remove = new List();
for (int i = 0; i < keys.Length; i++)
{
var k = keys[i];
var c = _containers[k];
if (c.CountOnes() == 0)
remove.Add(k);
//else if (c.CountZeros() < Container.CHGOVER)
//{
// _containers[k] = new ZeroContainer();
//}
else if (c.ChangeRequired())
_containers[k] = c.Change();
}
foreach (var k in remove)
_containers.Remove(k);
return this;
}
}
public MGRBData Serialize()
{
var d = new MGRBData();
foreach (var c in _containers)
{
var cd = new CData();
{
cd.i = (ushort)c.Key;
if (c.Value is BitmapContainer)
{
var bm = c.Value as BitmapContainer;
cd.t = CTYPE.BITMAP;
if (bm.ALLONE)
cd.t = CTYPE.ALLONES;
else
{
// get data
cd.d = ToByteArray(bm.Values());
}
}
else if (c.Value is OffsetContainer)
{
var of = c.Value as OffsetContainer;
cd.t = CTYPE.OFFSET;
cd.d = ToByteArray(of.Values());
}
else if (c.Value is InvertedContainer)
{
var inv = c.Value as InvertedContainer;
cd.t = CTYPE.INV;
cd.d = ToByteArray(inv.Values());
}
//else
//{
// var of = c.Value as OffsetContainerSL;
// cd.t = CTYPE.OFFSETSL;
// var b = new byte[cd.d.Length];
// int k = 0;
// foreach (var i in of._values)
// {
// Buffer.BlockCopy(GetBytes(i.Key, false), 0, b, k, 2);
// k += 2;
// }
// cd.d = b;
//}
d.c.Add(cd);
}
}
return d;
}
public void Deserialize(MGRBData input)
{
foreach (var c in input.c)
{
Container con = null;
if (c.t == CTYPE.ALLONES)
{
con = new BitmapContainer(true);
}
else if (c.t == CTYPE.BITMAP)
{
List list = new List();
var dataLen = c.d.Length;
for (int i = 0; i < dataLen; i += 8)
{
list.Add(ToULong(c.d, i));
}
con = new BitmapContainer(list.ToArray());
}
else if (c.t == CTYPE.OFFSET)
{
List list = new List();
var dataLen = c.d.Length;
for (int i = 0; i < dataLen; i += 2)
{
list.Add(ToUShort(c.d, i));
}
con = new OffsetContainer(list);
}
else if (c.t == CTYPE.INV)
{
List list = new List();
var dataLen = c.d.Length;
for (int i = 0; i < dataLen; i += 2)
{
list.Add(ToUShort(c.d, i));
}
con = new InvertedContainer(list);
}
//else
//{
// List list = new List();
// var dataLen = c.d.Length;
// for (int i = 0; i < dataLen; i += 2)
// {
// list.Add(ToUShort(c.d, i));
// }
// con = new OffsetContainerSL(list);
//}
_containers.Add(c.i, con);
}
var k = _containers.Keys();
var l = k.Length - 1;
if (l >= 0)
_size = (k[l] << 16) + _containers.GetValue(l).Size;
}
public MGRB Copy()
{
if (_containers.Count() > 0)
{
var o = Serialize();
var m = new MGRB();
m.Deserialize(o);
return m;
}
return new MGRB();
}
public int GetFirst()
{
int j = 0;
foreach (var i in GetBitIndexes())
{
j = i;
break;
}
return j;
}
private int LastContainerIdx()
{
if (_containers.Count() > 0)
return _containers.Keys()[_containers.Count() - 1];
else
return 0;
}
private static Container containerAND(Container ca, Container cb)
{
BitmapContainer a = null;
BitmapContainer b = null;
if (ca is BitmapContainer)
a = (BitmapContainer)ca;
else if (ca is OffsetContainer)
a = (BitmapContainer)ca.ToBitmap();
else
a = (BitmapContainer)ca.ToBitmap();
if (cb is BitmapContainer)
b = (BitmapContainer)cb;
else if (cb is OffsetContainer)
b = (BitmapContainer)cb.ToBitmap();
else
b = (BitmapContainer)cb.ToBitmap();
var av = a.Values();
var bv = b.Values();
var la = av != null ? av.Length : 1024;
var lb = bv != null ? bv.Length : 1024;
var min = la;
if (lb < min)
min = lb;
List vals = new List();
for (int i = 0; i < min; i++)
{
ulong ua = ulong.MaxValue;
ulong ub = ulong.MaxValue;
if (av != null)
ua = av[i];
if (bv != null)
ub = bv[i];
vals.Add(ua & ub);
}
return new BitmapContainer(vals.ToArray());
}
private static Container containerOR(Container ca, Container cb)
{
BitmapContainer a = null;
BitmapContainer b = null;
if (ca is BitmapContainer)
a = (BitmapContainer)ca;
else if (ca is OffsetContainer)
a = (BitmapContainer)ca.ToBitmap();
else
a = (BitmapContainer)ca.ToBitmap();
if (cb is BitmapContainer)
b = (BitmapContainer)cb;
else if (cb is OffsetContainer)
b = (BitmapContainer)cb.ToBitmap();
else
b = (BitmapContainer)cb.ToBitmap();
var av = a.Values();
var bv = b.Values();
var la = av != null ? av.Length : 1024;
var lb = bv != null ? bv.Length : 1024;
var max = la;
if (lb > max)
max = lb;
List vals = new List();
for (int i = 0; i < max; i++)
{
ulong ua = 0;
ulong ub = 0;
if (av != null && i < la)
ua = av[i];
if (bv != null && i < lb)
ub = bv[i];
vals.Add(ua | ub);
}
return new BitmapContainer(vals.ToArray());
}
private static unsafe byte[] GetBytes(ushort num, bool reverse)
{
byte[] buffer = new byte[2];
fixed (byte* numRef = buffer)
{
*((ushort*)numRef) = num;
}
if (reverse)
Array.Reverse(buffer);
return buffer;
}
private static unsafe ulong ToULong(byte[] value, int startIndex)
{
fixed (byte* numRef = &(value[startIndex]))
{
return *(((ulong*)numRef));
}
}
private static unsafe ushort ToUShort(byte[] value, int startIndex)
{
fixed (byte* numRef = &(value[startIndex]))
{
return *(((ushort*)numRef));
}
}
private static unsafe byte[] ToByteArray(ulong[] data)
{
int arrayLength = data.Length;
byte[] byteArray = new byte[8 * arrayLength];
fixed (ulong* pointer = data)
{
fixed (byte* bytePointer = byteArray)
{
ulong* read = pointer;
ulong* write = (ulong*)bytePointer;
for (int i = 0; i < arrayLength; i++)
{
*write++ = *read++;
}
}
// below not working
//System.Runtime.InteropServices.Marshal.Copy(new IntPtr(pointer), byteArray, 0, arrayLength);
}
// not working
//fixed (ulong* src = data)
//{
// System.Runtime.InteropServices.Marshal.Copy(new IntPtr(src), byteArray, 0, arrayLength);
//}
// not working
//Buffer.BlockCopy(data, 0, byteArray, 0, arrayLength);
return byteArray;
}
private static unsafe byte[] ToByteArray(ushort[] data)
{
int arrayLength = data.Length;
byte[] byteArray = new byte[2 * arrayLength];
fixed (ushort* pointer = data)
{
fixed (byte* bytePointer = byteArray)
{
ushort* read = pointer;
ushort* write = (ushort*)bytePointer;
for (int i = 0; i < arrayLength; i++)
{
*write++ = *read++;
}
}
}
// not working
//fixed (ushort* src = data)
//{
// System.Runtime.InteropServices.Marshal.Copy(new IntPtr(src), byteArray, 0, arrayLength);
//}
// not working
//Buffer.BlockCopy(data, 0, byteArray, 0, arrayLength);
return byteArray;
}
}
}
================================================
FILE: RaptorDB/Helper/WAHBitarray2.cs
================================================
using RaptorDB.Common;
using System;
using System.Collections.Generic;
namespace RaptorDB
{
public class WAHBitArray
{
public enum TYPE
{
WAH = 1,
Bitarray = 0,
Indexes = 2
}
public WAHBitArray()
{
_state = TYPE.Indexes;
if (Global.UseLessMemoryStructures)
_offsets = new SafeSortedList();
else
_offsets = new SafeDictionary();
}
public WAHBitArray(TYPE type, uint[] ints)
{
_state = type;
switch (type)
{
case TYPE.WAH:
_compressed = ints;
Uncompress();
_state = TYPE.Bitarray;
_compressed = null;
break;
case TYPE.Bitarray:
_uncompressed = ints;
break;
case TYPE.Indexes:
if (Global.UseLessMemoryStructures)
_offsets = new SafeSortedList();
else
_offsets = new SafeDictionary();
//new Dictionary();
foreach (var i in ints)
_offsets.Add(i, true);
break;
}
}
private uint[] _compressed;
private uint[] _uncompressed;
//private Dictionary _offsets = new Dictionary();
private IKV _offsets = null;// new SafeSortedList();
private uint _curMax = 0;
private TYPE _state;
public bool isDirty = false;
public WAHBitArray Copy()
{
lock (_lock)
{
uint[] i = GetBitArray();
return new WAHBitArray(TYPE.Bitarray, i);
}
}
public bool Get(int index)
{
lock (_lock)
{
if (_state == TYPE.Indexes)
{
bool b = false;
var f = _offsets.TryGetValue((uint)index, out b);
if (f)
return b;
else
return false;
}
CheckBitArray();
Resize(index);
return internalGet(index);
}
}
private object _lock = new object();
public void Set(int index, bool val)
{
lock (_lock)
{
if (_state == TYPE.Indexes)
{
isDirty = true;
if (val == true)
{
_offsets.Add((uint)index, true);
// set max
if (index > _curMax)
_curMax = (uint)index;
}
else
{
_offsets.Remove((uint)index);
}
ChangeTypeIfNeeded();
return;
}
CheckBitArray();
Resize(index);
internalSet(index, val);
}
}
public int Length
{
set
{
lock (_lock)
{
if (_state == TYPE.Indexes)
{
// ignore
return;
}
CheckBitArray();
int c = value >> 5;
c++;
if (c > _uncompressed.Length)
{
uint[] ar = new uint[c];
_uncompressed.CopyTo(ar, 0);
_uncompressed = ar;
}
}
}
get
{
if (_state == TYPE.Indexes)
{
if (_offsets.Count() == 0) return 0;
uint[] k = GetOffsets();
uint l = k[k.Length - 1];
return (int)l;
}
CheckBitArray();
return _uncompressed.Length << 5;
}
}
#region [ B I T O P E R T A I O N S ]
public WAHBitArray And(WAHBitArray op)
{
lock (_lock)
{
uint[] left;
uint[] right;
prelogic(op, out left, out right);
for (int i = 0; i < left.Length; i++)
left[i] &= right[i];
return new WAHBitArray(TYPE.Bitarray, left);
}
}
public WAHBitArray AndNot(WAHBitArray op)
{
lock (_lock)
{
uint[] left;
uint[] right;
prelogic(op, out left, out right);
for (int i = 0; i < left.Length; i++)
left[i] &= ~right[i];
return new WAHBitArray(TYPE.Bitarray, left);
}
}
public WAHBitArray Or(WAHBitArray op)
{
lock (_lock)
{
uint[] left;
uint[] right;
prelogic(op, out left, out right);
for (int i = 0; i < left.Length; i++)
left[i] |= right[i];
return new WAHBitArray(TYPE.Bitarray, left);
}
}
public WAHBitArray Not(int size)
{
lock (_lock)
{
this.CheckBitArray();
uint[] left = this.GetBitArray();
int c = left.Length;
int ms = size >> 5;
if (size - (ms << 5) > 0)
ms++; // include remainder
if (ms > c)
{
var a = new uint[ms];
Array.Copy(left, 0, a, 0, c);
left = a;
c = ms;
}
for (int i = 0; i < c; i++)
left[i] = ~left[i];
return new WAHBitArray(TYPE.Bitarray, left);
}
}
public WAHBitArray Xor(WAHBitArray op)
{
lock (_lock)
{
uint[] left;
uint[] right;
prelogic(op, out left, out right);
for (int i = 0; i < left.Length; i++)
left[i] ^= right[i];
return new WAHBitArray(TYPE.Bitarray, left);
}
}
#endregion
private static int BitCount(uint n)
{ // 32-bit recursive reduction using SWAR
n -= ((n >> 1) & 0x55555555);
n = (((n >> 2) & 0x33333333) + (n & 0x33333333));
n = (((n >> 4) + n) & 0x0f0f0f0f);
return (int)((n * 0x01010101) >> 24);
}
public long CountOnes()
{
if (_state == TYPE.Indexes)
{
return _offsets.Count();
}
long c = 0;
CheckBitArray();
foreach (uint i in _uncompressed)
c += BitCount(i);
return c;
}
public long CountZeros()
{
if (_state == TYPE.Indexes)
{
long ones = _offsets.Count();
uint[] k = GetOffsets();
long l = k[k.Length - 1];
return l - ones;
}
CheckBitArray();
int count = _uncompressed.Length << 5;
long cc = CountOnes();
return count - cc;
}
public void FreeMemory()
{
if (_state == TYPE.Bitarray)
{
if (_uncompressed != null)
{
lock (_lock)
{
_compressed = Compress(_uncompressed);
_uncompressed = null;
_state = TYPE.WAH;
}
}
}
}
public uint[] GetCompressed(out TYPE type)
{
type = TYPE.WAH;
ChangeTypeIfNeeded();
if (_state == TYPE.Indexes)
{
type = TYPE.Indexes;
return GetOffsets();
}
else if (_uncompressed == null)
return new uint[] { 0 };
uint[] d = Compress(_uncompressed);
return d;
}
public IEnumerable GetBitIndexes()
{
if (_state == TYPE.Indexes)
{
foreach (int i in GetOffsets())
yield return i;
}
else
{
CheckBitArray();
int count = _uncompressed.Length;
for (int i = 0; i < count; i++)
{
if (_uncompressed[i] > 0)
{
for (int j = 0; j < 32; j++)
{
bool b = internalGet((i << 5) + j);
if (b == true)// ones)
yield return (i << 5) + j;
}
}
}
}
}
#region [ P R I V A T E ]
private uint[] GetOffsets()
{
uint[] k;
lock (_lock)
{
k = new uint[_offsets.Count()];
_offsets.Keys().CopyTo(k, 0);
}
Array.Sort(k);
return k;
}
private void prelogic(WAHBitArray op, out uint[] left, out uint[] right)
{
this.CheckBitArray();
left = this.GetBitArray();
right = op.GetBitArray();
int ic = left.Length;
int uc = right.Length;
if (ic > uc)
{
uint[] ar = new uint[ic];
right.CopyTo(ar, 0);
right = ar;
}
else if (ic < uc)
{
uint[] ar = new uint[uc];
left.CopyTo(ar, 0);
left = ar;
}
}
internal uint[] GetBitArray()
{
lock (_lock)
{
if (_state == TYPE.Indexes)
return UnpackOffsets();
this.CheckBitArray();
uint[] ui = new uint[_uncompressed.Length];
_uncompressed.CopyTo(ui, 0);
return ui;
}
}
private uint[] UnpackOffsets()
{
// return bitmap uints
uint max = 0;
if (_offsets.Count() == 0) return new uint[0];
uint[] k = GetOffsets();
max = k[k.Length - 1];
uint[] ints = new uint[(max >> 5) + 1];
foreach (int index in k)
{
int pointer = ((int)index) >> 5;
uint mask = (uint)1 << (31 - // high order bit set
((int)index % 32));
ints[pointer] |= mask;
}
return ints;
}
private void ChangeTypeIfNeeded()
{
if (_state != TYPE.Indexes)
return;
uint T = (_curMax >> 5) + 1;
int c = _offsets.Count();
if (c > T && c > Global.BitmapOffsetSwitchOverCount)
{
// change type to WAH
_state = TYPE.Bitarray;
_uncompressed = new uint[0];
// create bitmap
foreach (var i in _offsets.Keys())
Set((int)i, true);
// clear list
if (Global.UseLessMemoryStructures)
_offsets = new SafeSortedList();
else
_offsets = new SafeDictionary();
//new Dictionary();
}
}
private void Resize(int index)
{
if (_state == TYPE.Indexes)
return;
int c = index >> 5;
c++;
if (_uncompressed == null)
{
_uncompressed = new uint[c];
return;
}
if (c > _uncompressed.Length)
{
uint[] ar = new uint[c];
_uncompressed.CopyTo(ar, 0);
_uncompressed = ar;
}
}
private static void ResizeAsNeeded(List list, int index)
{
int count = index >> 5;
while (list.Count < count)
list.Add(0);
}
private void internalSet(int index, bool val)
{
isDirty = true;
int pointer = index >> 5;
uint mask = (uint)1 << (31 - // high order bit set
(index % 32));
if (val)
_uncompressed[pointer] |= mask;
else
_uncompressed[pointer] &= ~mask;
}
private bool internalGet(int index)
{
int pointer = index >> 5;
uint mask = (uint)1 << (31 - // high order bit get
(index % 32));
if (pointer < _uncompressed.Length)
return (_uncompressed[pointer] & mask) != 0;
else
return false;
}
private void CheckBitArray()
{
if (_state == TYPE.Bitarray)
return;
if (_state == TYPE.WAH)
{
_uncompressed = new uint[0];
Uncompress();
_state = TYPE.Bitarray;
_compressed = null;
return;
}
}
#region compress / uncompress
private static uint Take31Bits(uint[] data, int index)
{
ulong l1 = 0;
ulong l2 = 0;
ulong l = 0;
ulong ret = 0;
int off = (index % 32);
int pointer = index >> 5;
l1 = data[pointer];
pointer++;
if (pointer < data.Length)
l2 = data[pointer];
l = (l1 << 32) + l2;
ret = (l >> (33 - off)) & 0x7fffffff;
return (uint)ret;
}
private static uint[] Compress(uint[] data)
{
List compressed = new List();
uint zeros = 0;
uint ones = 0;
int count = data.Length << 5;
int i = 0;
while (i < count)//for (int i = 0; i < count;)
{
uint num = Take31Bits(data, i);
i += 31;
if (num == 0) // all zero
{
zeros += 31;
FlushOnes(compressed, ref ones);
}
else if (num == 0x7fffffff) // all ones
{
ones += 31;
FlushZeros(compressed, ref zeros);
}
else // literal
{
FlushOnes(compressed, ref ones);
FlushZeros(compressed, ref zeros);
compressed.Add(num);
}
}
FlushOnes(compressed, ref ones);
FlushZeros(compressed, ref zeros);
return compressed.ToArray();
}
private static void FlushOnes(List compressed, ref uint ones)
{
if (ones > 0)
{
uint n = 0xc0000000 + ones;
ones = 0;
compressed.Add(n);
}
}
private static void FlushZeros(List compressed, ref uint zeros)
{
if (zeros > 0)
{
uint n = 0x80000000 + zeros;
zeros = 0;
compressed.Add(n);
}
}
private static void Write31Bits(List list, int index, uint val)
{
ResizeAsNeeded(list, index + 32);
int off = (index % 32);
int pointer = index >> 5;
if (pointer >= list.Count - 1)
list.Add(0);
ulong l = ((ulong)list[pointer] << 32) + list[pointer + 1];
l |= (ulong)val << (33 - off);
list[pointer] = (uint)(l >> 32);
list[pointer + 1] = (uint)(l & 0xffffffff);
}
private void WriteOnes(List list, int index, uint count)
{
ResizeAsNeeded(list, index);
int off = index % 32;
int pointer = index >> 5;
int ccount = (int)count;
int indx = index;
int x = 32 - off;
if (pointer >= list.Count)
list.Add(0);
if (ccount > x)//|| x == 32) //current pointer
{
list[pointer] |= (uint)((0xffffffff >> off));
ccount -= x;
indx += x;
}
else
{
list[pointer] |= (uint)((0xffffffff << (32 - ccount)) >> off);
ccount = 0;
}
bool checklast = true;
while (ccount >= 32)//full ints
{
if (checklast && list[list.Count - 1] == 0)
{
list.RemoveAt(list.Count - 1);
checklast = false;
}
list.Add(0xffffffff);
ccount -= 32;
indx += 32;
}
int p = indx >> 5;
off = indx % 32;
if (ccount > 0)
{
uint i = 0xffffffff << (32 - ccount);
if (p > (list.Count - 1)) //remaining
list.Add(i);
else
list[p] |= (uint)(i >> off);
}
}
private void Uncompress()
{
int index = 0;
List list = new List();
if (_compressed == null)
return;
foreach (uint ci in _compressed)
{
if ((ci & 0x80000000) == 0) // literal
{
Write31Bits(list, index, ci);
index += 31;
}
else
{
uint count = ci & 0x3fffffff;
if ((ci & 0x40000000) != 0) // ones count
WriteOnes(list, index, count);
index += (int)count;
}
}
ResizeAsNeeded(list, index);
_uncompressed = list.ToArray();
}
#endregion
#endregion
internal static WAHBitArray Fill(int count)
{
if (count > 0)
{
int c = count >> 5;
int r = count % 32;
if (r > 0)
c++;
uint[] ints = new uint[c];
for (int i = 0; i < c; i++)
ints[i] = 0xffffffff;
if (r > 0)
ints[c - 1] = 0xffffffff << (31 - r);
return new WAHBitArray(TYPE.Bitarray, ints);
}
return new WAHBitArray();
}
internal int GetFirst()
{
foreach (var i in GetBitIndexes())
return i;
return 0;
}
}
}
================================================
FILE: RaptorDB/Indexes/BitmapIndex.cs
================================================
using RaptorDB.Common;
using System;
using System.Collections.Generic;
using System.IO;
using System.Threading;
namespace RaptorDB
{
internal class BitmapIndex
{
public BitmapIndex(string path, string filename)
{
if (Global.UseLessMemoryStructures)
_cache = new SafeSortedList();
else
_cache = new SafeDictionary();
_FileName = Path.GetFileNameWithoutExtension(filename);
_Path = path;
if (_Path.EndsWith(Path.DirectorySeparatorChar.ToString()) == false)
_Path += Path.DirectorySeparatorChar.ToString();
Initialize();
}
class L : IDisposable
{
BitmapIndex _sc;
public L(BitmapIndex sc)
{
_sc = sc;
_sc.CheckInternalOP();
}
void IDisposable.Dispose()
{
_sc.Done();
}
}
private string _recExt = ".mgbmr";
private string _bmpExt = ".mgbmp";
private string _FileName = "";
private string _Path = "";
private FileStream _bitmapFileWriteOrg;
private BufferedStream _bitmapFileWrite;
private FileStream _bitmapFileRead;
private FileStream _recordFileRead;
private FileStream _recordFileWriteOrg;
private BufferedStream _recordFileWrite;
private long _lastBitmapOffset = 0;
private int _lastRecordNumber = 0;
//private SafeDictionary _cache = new SafeDictionary();
private IKV _cache = null;// new SafeSortedList();
private ILog log = LogManager.GetLogger(typeof(BitmapIndex));
private bool _stopOperations = false;
private bool _shutdownDone = false;
private int _workingCount = 0;
private bool _isDirty = false;
#region
public void Shutdown()
{
using (new L(this))
{
log.Debug("Shutdown BitmapIndex");
InternalShutdown();
}
}
public int GetFreeRecordNumber()
{
using (new L(this))
{
int i = _lastRecordNumber++;
_cache.Add(i, new MGRB());
return i;
}
}
public void Commit(bool freeMemory)
{
if (_isDirty == false)
return;
using (new L(this))
{
log.Debug("writing " + _FileName);
int[] keys = _cache.Keys();
Array.Sort(keys);
foreach (int k in keys)
{
MGRB bmp = null;
if (_cache.TryGetValue(k, out bmp) && bmp.isDirty)
{
bmp.Optimize();
SaveBitmap(k, bmp);
bmp.isDirty = false;
}
}
Flush();
if (freeMemory)
{
if (Global.UseLessMemoryStructures)
_cache = new SafeSortedList();
else
_cache = new SafeDictionary();
log.Debug(" freeing cache");
}
_isDirty = false;
}
}
public void SetDuplicate(int bitmaprecno, int record)
{
using (new L(this))
{
MGRB ba = null;
ba = internalGetBitmap(bitmaprecno); //GetBitmap(bitmaprecno);
ba.Set(record, true);
_isDirty = true;
}
}
public MGRB GetBitmap(int recno)
{
using (new L(this))
{
return internalGetBitmap(recno);
}
}
private object _oplock = new object();
public void Optimize()
{
lock (_oplock)
lock (_readlock)
lock (_writelock)
{
_stopOperations = true;
while (_workingCount > 0) Thread.SpinWait(1);
Flush();
if (File.Exists(_Path + _FileName + "$" + _bmpExt))
File.Delete(_Path + _FileName + "$" + _bmpExt);
if (File.Exists(_Path + _FileName + "$" + _recExt))
File.Delete(_Path + _FileName + "$" + _recExt);
Stream _newrec = new FileStream(_Path + _FileName + "$" + _recExt, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
Stream _newbmp = new FileStream(_Path + _FileName + "$" + _bmpExt, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
long newoffset = 0;
int c = (int)(_recordFileRead.Length / 8);
for (int i = 0; i < c; i++)
{
long offset = ReadRecordOffset(i);
byte[] b = ReadBMPDataForOptimize(offset);
if (b == null)
{
_stopOperations = false;
throw new Exception("bitmap index file is corrupted");
}
_newrec.Write(Helper.GetBytes(newoffset, false), 0, 8);
newoffset += b.Length;
_newbmp.Write(b, 0, b.Length);
}
_newbmp.Flush();
_newbmp.Close();
_newrec.Flush();
_newrec.Close();
InternalShutdown();
File.Delete(_Path + _FileName + _bmpExt);
File.Delete(_Path + _FileName + _recExt);
File.Move(_Path + _FileName + "$" + _bmpExt, _Path + _FileName + _bmpExt);
File.Move(_Path + _FileName + "$" + _recExt, _Path + _FileName + _recExt);
Initialize();
_stopOperations = false;
}
}
internal void FreeMemory()
{
try
{
List free = new List();
foreach (var k in _cache.Keys())
{
var val = _cache.GetValue(k);
if (val.isDirty == false)
free.Add(k);
}
log.Info("releasing bmp count = " + free.Count + " out of " + _cache.Count());
foreach (int i in free)
_cache.Remove(i);
}
catch (Exception ex)
{
log.Error(ex);
}
}
#endregion
#region [ P R I V A T E ]
private long ReadRecordOffset(int recnum)
{
byte[] b = new byte[8];
long off = ((long)recnum) * 8;
_recordFileRead.Seek(off, SeekOrigin.Begin);
_recordFileRead.Read(b, 0, 8);
return Helper.ToInt64(b, 0);
}
private void Initialize()
{
_recordFileRead = new FileStream(_Path + _FileName + _recExt, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
_recordFileWriteOrg = new FileStream(_Path + _FileName + _recExt, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
_recordFileWrite = new BufferedStream(_recordFileWriteOrg);
_bitmapFileRead = new FileStream(_Path + _FileName + _bmpExt, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
_bitmapFileWriteOrg = new FileStream(_Path + _FileName + _bmpExt, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.ReadWrite);
_bitmapFileWrite = new BufferedStream(_bitmapFileWriteOrg);
_bitmapFileWrite.Seek(0L, SeekOrigin.End);
_lastBitmapOffset = _bitmapFileWrite.Length;
_lastRecordNumber = (int)(_recordFileRead.Length / 8);
_shutdownDone = false;
}
private void InternalShutdown()
{
bool d1 = false;
bool d2 = false;
if (_shutdownDone == false)
{
Flush();
if (_recordFileWrite.Length == 0) d1 = true;
if (_bitmapFileWrite.Length == 0) d2 = true;
_recordFileRead.Close();
_bitmapFileRead.Close();
_bitmapFileWriteOrg.Close();
_recordFileWriteOrg.Close();
_recordFileWrite.Close();
_bitmapFileWrite.Close();
if (d1)
File.Delete(_Path + _FileName + _recExt);
if (d2)
File.Delete(_Path + _FileName + _bmpExt);
_recordFileWrite = null;
_recordFileRead = null;
_bitmapFileRead = null;
_bitmapFileWrite = null;
_recordFileRead = null;
_recordFileWrite = null;
_shutdownDone = true;
}
}
private void Flush()
{
if (_shutdownDone)
return;
if (_recordFileWrite != null)
_recordFileWrite.Flush();
if (_bitmapFileWrite != null)
_bitmapFileWrite.Flush();
if (_recordFileRead != null)
_recordFileRead.Flush();
if (_bitmapFileRead != null)
_bitmapFileRead.Flush();
if (_bitmapFileWriteOrg != null)
_bitmapFileWriteOrg.Flush();
if (_recordFileWriteOrg != null)
_recordFileWriteOrg.Flush();
}
private object _readlock = new object();
private MGRB internalGetBitmap(int recno)
{
lock (_readlock)
{
MGRB ba = new MGRB();
if (recno == -1)
return ba;
if (_cache.TryGetValue(recno, out ba))
{
return ba;
}
else
{
long offset = 0;
//if (_offsetCache.TryGetValue(recno, out offset) == false)
{
offset = ReadRecordOffset(recno);
// _offsetCache.Add(recno, offset);
}
ba = LoadBitmap(offset);
_cache.Add(recno, ba);
return ba;
}
}
}
private object _writelock = new object();
private void SaveBitmap(int recno, MGRB bmp)
{
lock (_writelock)
{
long offset = SaveBitmapToFile(bmp);
//long v;
//if (_offsetCache.TryGetValue(recno, out v))
// _offsetCache[recno] = offset;
//else
// _offsetCache.Add(recno, offset);
long pointer = ((long)recno) * 8;
_recordFileWrite.Seek(pointer, SeekOrigin.Begin);
byte[] b = new byte[8];
b = Helper.GetBytes(offset, false);
_recordFileWrite.Write(b, 0, 8);
}
}
//-----------------------------------------------------------------
// new format
// 0 : b
// 1 : m
// 2 : type 0 = uncompressed, 1 = compressed
// 3 : data size (int)
// 8 : data bytes
private byte _hdrlen = 2 + 4 + 1;
private long SaveBitmapToFile(MGRB bmp)
{
long off = _lastBitmapOffset;
var dat = bmp.Serialize();
var hdr = new byte[_hdrlen];
var b = fastBinaryJSON.BJSON.ToBJSON(dat, new fastBinaryJSON.BJSONParameters { UseExtensions = false });
hdr[0] = (byte)'b';
hdr[1] = (byte)'m';
hdr[2] = 0; // uncompressed
if (Global.CompressBitmapBytes)
{
hdr[2] = 1;
b = MiniLZO.Compress(b);
}
var s = Helper.GetBytes(b.Length, false);
Buffer.BlockCopy(s, 0, hdr, 3, 4);
_bitmapFileWrite.Write(hdr, 0, hdr.Length);
_lastBitmapOffset += hdr.Length;
_bitmapFileWrite.Write(b, 0, b.Length);
_lastBitmapOffset += b.Length;
return off;
}
private byte[] ReadBMPDataForOptimize(long offset)
{
// return data + header
_bitmapFileRead.Seek(offset, SeekOrigin.Begin);
byte[] hdr = new byte[_hdrlen];
_bitmapFileRead.Read(hdr, 0, _hdrlen);
if (hdr[0] == (byte)'b' && hdr[1] == (byte)'m')
{
int c = Helper.ToInt32(hdr, 3);
var data = new byte[c + _hdrlen];
Buffer.BlockCopy(hdr, 0, data, 0, _hdrlen);
_bitmapFileRead.Read(data, _hdrlen, c);
return data;
}
return null;
}
private MGRB LoadBitmap(long offset)
{
MGRB bc = new MGRB();
if (offset == -1)
return bc;
FileStream bmp = _bitmapFileRead;
bmp.Seek(offset, SeekOrigin.Begin);
var hdr = new byte[_hdrlen];
bmp.Read(hdr, 0, hdr.Length);
if (hdr[0] == (byte)'b' && hdr[1] == (byte)'m')
{
int c = Helper.ToInt32(hdr, 3);
var b = new byte[c];
bmp.Read(b, 0, c);
if (hdr[2] == 1)
b = MiniLZO.Decompress(b);
bc.Deserialize(fastBinaryJSON.BJSON.ToObject(b));
}
else
log.Error("bitmap not recognized");
return bc;
}
#pragma warning disable 642
private void CheckInternalOP()
{
if (_stopOperations)
lock (_oplock) { } // yes! this is good
Interlocked.Increment(ref _workingCount);
}
#pragma warning restore 642
private void Done()
{
Interlocked.Decrement(ref _workingCount);
}
#endregion
}
}
================================================
FILE: RaptorDB/Indexes/Cache.cs
================================================
using System;
using System.Xml.Serialization;
namespace RaptorDB
{
public enum OPERATION
{
AND,
OR,
ANDNOT
}
public class Document
{
public string FullName;
public DateTime Created;
public DateTime Modified;
public long Length;
public string Extension;
public Document()
{
DocNumber = -1;
}
public Document(string filename, string text)
{
FileName = filename;
Text = text;
DocNumber = -1;
}
public int DocNumber { get; set; }
[XmlIgnore]
public string Text { get; set; }
public string FileName { get; set; }
public string Abstract { get; set; }
public override string ToString()
{
return FileName;
}
}
}
================================================
FILE: RaptorDB/Indexes/Hoot.cs
================================================
using System;
using System.Collections.Generic;
using System.Text;
using System.IO;
using System.Text.RegularExpressions;
using RaptorDB.Common;
namespace RaptorDB
{
public class Hoot
{
public Hoot(string IndexPath, string FileName, bool DocMode) : this(IndexPath, FileName, DocMode, new tokenizer())
{
}
public Hoot(string IndexPath, string FileName, bool DocMode, ITokenizer tokenizer)
{
if (tokenizer != null)
_tokenizer = tokenizer;
else
_tokenizer = new tokenizer();
_Path = IndexPath;
_FileName = FileName;
_docMode = DocMode;
if (_Path.EndsWith(Path.DirectorySeparatorChar.ToString()) == false) _Path += Path.DirectorySeparatorChar;
Directory.CreateDirectory(IndexPath);
_log.Debug("Starting hOOt....");
_log.Debug("Storage Folder = " + _Path);
if (DocMode)
{
_docs = new KeyStoreString(_Path + "files.docs", false);
// read deleted
_deleted = new BoolIndex(_Path, "_deleted", ".hoot");
_lastDocNum = (int)_docs.Count();
}
_bitmaps = new BitmapIndex(_Path, _FileName + "_hoot.bmp");
// read words
LoadWords();
}
private ITokenizer _tokenizer;
private SafeDictionary _words = new SafeDictionary();
//private SafeSortedList _words = new SafeSortedList();
private BitmapIndex _bitmaps;
private BoolIndex _deleted;
private ILog _log = LogManager.GetLogger(typeof(Hoot));
private int _lastDocNum = 0;
private string _FileName = "words";
private string _Path = "";
private KeyStoreString _docs;
private bool _docMode = false;
private bool _wordschanged = true;
private bool _shutdowndone = false;
private object _lock = new object();
public string[] Words
{
get { checkloaded(); return _words.Keys(); }
}
public int WordCount
{
get { checkloaded(); return _words.Count(); }
}
public int DocumentCount
{
get { checkloaded(); return _lastDocNum - (int)_deleted.GetBits().CountOnes(); }
}
public string IndexPath { get { return _Path; } }
public void Save()
{
lock (_lock)
InternalSave();
}
public void Index(int recordnumber, string text)
{
checkloaded();
AddtoIndex(recordnumber, text);
}
public MGRB Query(string filter, int maxsize)
{
checkloaded();
return ExecutionPlan(filter, maxsize);
}
public int Index(Document doc, bool deleteold)
{
checkloaded();
_log.Info("indexing doc : " + doc.FileName);
DateTime dt = FastDateTime.Now;
if (deleteold && doc.DocNumber > -1)
_deleted.Set(true, doc.DocNumber);
if (deleteold == true || doc.DocNumber == -1)
doc.DocNumber = _lastDocNum++;
// save doc to disk
string dstr = fastJSON.JSON.ToJSON(doc, new fastJSON.JSONParameters { UseExtensions = false });
_docs.Set(doc.FileName.ToLower(), fastJSON.Reflection.UnicodeGetBytes(dstr));
_log.Info("writing doc to disk (ms) = " + FastDateTime.Now.Subtract(dt).TotalMilliseconds);
dt = FastDateTime.Now;
// index doc
AddtoIndex(doc.DocNumber, doc.Text);
_log.Info("indexing time (ms) = " + FastDateTime.Now.Subtract(dt).TotalMilliseconds);
return _lastDocNum;
}
public IEnumerable FindRows(string filter)
{
checkloaded();
MGRB bits = ExecutionPlan(filter, _docs.RecordCount());
// enumerate records
return bits.GetBitIndexes();
}
public IEnumerable FindDocuments(string filter)
{
checkloaded();
MGRB bits = ExecutionPlan(filter, _docs.RecordCount());
// enumerate documents
foreach (int i in bits.GetBitIndexes())
{
if (i > _lastDocNum - 1)
break;
string b = _docs.ReadData(i);
T d = fastJSON.JSON.ToObject(b, new fastJSON.JSONParameters { ParametricConstructorOverride = true });
yield return d;
}
}
public IEnumerable FindDocumentFileNames(string filter)
{
checkloaded();
MGRB bits = ExecutionPlan(filter, _docs.RecordCount());
// enumerate documents
foreach (int i in bits.GetBitIndexes())
{
if (i > _lastDocNum - 1)
break;
string b = _docs.ReadData(i);
var d = (Dictionary)fastJSON.JSON.Parse(b);
yield return d["FileName"].ToString();
}
}
public void RemoveDocument(int number)
{
// add number to deleted bitmap
_deleted.Set(true, number);
}
public bool RemoveDocument(string filename)
{
// remove doc based on filename
byte[] b;
if (_docs.Get(filename.ToLower(), out b))
{
Document d = fastJSON.JSON.ToObject(fastJSON.Reflection.UnicodeGetString(b));
RemoveDocument(d.DocNumber);
return true;
}
return false;
}
public bool IsIndexed(string filename)
{
byte[] b;
return _docs.Get(filename.ToLower(), out b);
}
public void OptimizeIndex()
{
lock (_lock)
{
InternalSave();
//_bitmaps.Commit(false);
_bitmaps.Optimize();
}
}
#region [ P R I V A T E M E T H O D S ]
private void checkloaded()
{
if (_wordschanged == false)
{
LoadWords();
}
}
private MGRB ExecutionPlan(string filter, int maxsize)
{
//_log.Debug("query : " + filter);
DateTime dt = FastDateTime.Now;
// query indexes
string[] words = filter.Split(' ');
//bool defaulttoand = true;
//if (filter.IndexOfAny(new char[] { '+', '-' }, 0) > 0)
// defaulttoand = false;
MGRB found = null;// MGRB.Fill(maxsize);
foreach (string s in words)
{
int c;
bool not = false;
string word = s;
if (s == "") continue;
OPERATION op = OPERATION.AND;
//if (defaulttoand)
// op = OPERATION.AND;
if (word.StartsWith("+"))
{
op = OPERATION.OR;
word = s.Replace("+", "");
}
if (word.StartsWith("-"))
{
op = OPERATION.ANDNOT;
word = s.Replace("-", "");
not = true;
if (found == null) // leading with - -> "-oak hill"
{
found = MGRB.Fill(maxsize);
}
}
if (word.Contains("*") || word.Contains("?"))
{
MGRB wildbits = new MGRB();
// do wildcard search
Regex reg = new Regex("^" + word.Replace("*", ".*").Replace("?", ".") + "$", RegexOptions.IgnoreCase);
foreach (string key in _words.Keys())
{
if (reg.IsMatch(key))
{
_words.TryGetValue(key, out c);
MGRB ba = _bitmaps.GetBitmap(c);
wildbits = DoBitOperation(wildbits, ba, OPERATION.OR, maxsize);
}
}
if (found == null)
found = wildbits;
else
{
if (not) // "-oak -*l"
found = found.AndNot(wildbits);
else if (op == OPERATION.AND)
found = found.And(wildbits);
else
found = found.Or(wildbits);
}
}
else if (_words.TryGetValue(word.ToLowerInvariant(), out c))
{
// bits logic
MGRB ba = _bitmaps.GetBitmap(c);
found = DoBitOperation(found, ba, op, maxsize);
}
else if (op == OPERATION.AND)
found = new MGRB();
}
if (found == null)
return new MGRB();
// remove deleted docs
MGRB ret;
if (_docMode)
ret = found.AndNot(_deleted.GetBits());
else
ret = found;
//_log.Debug("query time (ms) = " + FastDateTime.Now.Subtract(dt).TotalMilliseconds);
return ret;
}
private static MGRB DoBitOperation(MGRB bits, MGRB c, OPERATION op, int maxsize)
{
if (bits != null)
{
switch (op)
{
case OPERATION.AND:
bits = bits.And(c);
break;
case OPERATION.OR:
bits = bits.Or(c);
break;
case OPERATION.ANDNOT:
bits = bits.And(c.Not(maxsize));
break;
}
}
else
bits = c;
return bits;
}
private void InternalSave()
{
_log.Info("saving index...");
DateTime dt = FastDateTime.Now;
// save deleted
if (_deleted != null)
_deleted.SaveIndex();
// save docs
if (_docMode)
_docs.SaveIndex();
if (_bitmaps != null)
_bitmaps.Commit(true);
if (_words != null && _wordschanged == true)
{
// save words and bitmaps
using (FileStream words = new FileStream(_Path + _FileName + ".words", FileMode.Create))
{
using (BinaryWriter bw = new BinaryWriter(words, Encoding.UTF8))
{
foreach (string key in _words.Keys())
{
bw.Write(key);
bw.Write(_words[key]);
}
}
}
_wordschanged = false;
}
_log.Info("save time (ms) = " + FastDateTime.Now.Subtract(dt).TotalMilliseconds);
}
private void LoadWords()
{
lock (_lock)
{
if (_words == null)
_words = new SafeDictionary();
// new SafeSortedList();
if (File.Exists(_Path + _FileName + ".words") == false)
return;
// load words
using (FileStream words = new FileStream(_Path + _FileName + ".words", FileMode.Open))
{
if (words.Length == 0)
return;
using (BinaryReader br = new BinaryReader(words, Encoding.UTF8))
{
string s = br.ReadString();
while (s != "")
{
int off = br.ReadInt32();
_words.Add(s, off);
try
{
s = br.ReadString();
}
catch { s = ""; }
}
}
}
//byte[] b = File.ReadAllBytes(_Path + _FileName + ".words");
//if (b.Length == 0)
// return;
//MemoryStream ms = new MemoryStream(b);
//BinaryReader br = new BinaryReader(ms, Encoding.UTF8);
//string s = br.ReadString();
//while (s != "")
//{
// int off = br.ReadInt32();
// _words.Add(s, off);
// try
// {
// s = br.ReadString();
// }
// catch { s = ""; }
//}
_log.Debug("Word Count = " + _words.Count());
_wordschanged = true;
}
}
private void AddtoIndex(int recnum, string text)
{
if (text == "" || text == null)
return;
text = text.ToLowerInvariant(); // lowercase index
string[] keys;
if (_docMode)
{
//_log.Debug("text size = " + text.Length);
Dictionary wordfreq = _tokenizer.GenerateWordFreq(text);
//_log.Debug("word count = " + wordfreq.Count);
var kk = wordfreq.Keys;
keys = new string[kk.Count];
kk.CopyTo(keys, 0);
}
else
{
keys = text.Split(' ');
}
foreach (string key in keys)
{
if (key == "")
continue;
int bmp;
if (_words.TryGetValue(key, out bmp))
{
_bitmaps.GetBitmap(bmp).Set(recnum, true);
}
else
{
bmp = _bitmaps.GetFreeRecordNumber();
_bitmaps.SetDuplicate(bmp, recnum);
_words.Add(key, bmp);
}
}
_wordschanged = true;
}
#endregion
public void Shutdown()
{
lock (_lock)
{
if (_shutdowndone == true)
return;
InternalSave();
if (_deleted != null)
{
_deleted.SaveIndex();
_deleted.Shutdown();
_deleted = null;
}
if (_bitmaps != null)
{
_bitmaps.Commit(Global.FreeBitmapMemoryOnSave);
_bitmaps.Shutdown();
_bitmaps = null;
}
if (_docMode)
_docs.Shutdown();
_shutdowndone = true;
}
}
public void FreeMemory()
{
lock (_lock)
{
InternalSave();
if (_deleted != null)
_deleted.FreeMemory();
if (_bitmaps != null)
_bitmaps.FreeMemory();
if (_docs != null)
_docs.FreeMemory();
//_words = null;// new SafeSortedList();
//_loaded = false;
}
}
public T Fetch(int docnum)
{
string b = _docs.ReadData(docnum);
return fastJSON.JSON.ToObject(b);
}
}
}
================================================
FILE: RaptorDB/Indexes/IIndex.cs
================================================
namespace RaptorDB
{
internal enum RDBExpression
{
Equal,
Greater,
GreaterEqual,
Less,
LessEqual,
NotEqual,
Between,
Contains
}
internal interface IIndex
{
void Set(object key, int recnum);
MGRB Query(object fromkey, object tokey, int maxsize);
MGRB Query(RDBExpression ex, object from , int maxsize);
void FreeMemory();
void Shutdown();
void SaveIndex();
object[] GetKeys();
}
}
================================================
FILE: RaptorDB/Indexes/ITokenizer.cs
================================================
using System.Collections.Generic;
namespace RaptorDB
{
public interface ITokenizer
{
Dictionary GenerateWordFreq(string text);
}
}
================================================
FILE: RaptorDB/Indexes/IndexFile.cs
================================================
using System;
using System.Collections.Generic;
using System.IO;
using RaptorDB.Common;
using System.Threading;
using fastBinaryJSON;
namespace RaptorDB
{
internal class IndexFile
{
FileStream _file = null;
private byte[] _FileHeader = new byte[] {
(byte)'M', (byte)'G', (byte)'I',
0, // 3 = [keysize] max 255
0,0, // 4 = [node size] max 65536
0,0,0,0, // 6 = [root page num]
0, // 10 = Index file type : 0=mgindex 1=mgindex+strings (key = firstallocblock)
0,0,0,0 // 11 = last record number indexed
};
private byte[] _BlockHeader = new byte[] {
(byte)'P',(byte)'A',(byte)'G',(byte)'E',
0, // 4 = [Flag] = 0=page 1=page list
0,0, // 5 = [item count]
0,0,0,0, // 7 = reserved
0,0,0,0 // 11 = [right page number] / [next page number]
};
internal byte _maxKeySize;
internal ushort _PageNodeCount = 5000;
private int _LastPageNumber = 1; // 0 = page list
private int _PageLength;
private int _rowSize;
private bool _allowDups = true;
ILog log = LogManager.GetLogger(typeof(IndexFile));
private BitmapIndex _bitmap;
IGetBytes _T = null;
private object _fileLock = new object();
private StringHF _strings;
private bool _externalStrings = false;
//private List _pagelistalllocblock = null;
private string _FileName = "";
public IndexFile(string filename, byte maxKeySize)//, ushort pageNodeCount)
{
_T = RDBDataType.ByteHandler();
if (typeof(T) == typeof(string) && Global.EnableOptimizedStringIndex)
{
_externalStrings = true;
_maxKeySize = 4;// blocknum:int
}
else
_maxKeySize = maxKeySize;
_PageNodeCount = Global.PageItemCount;// pageNodeCount;
_rowSize = (_maxKeySize + 1 + 4 + 4);
_FileName = filename.Substring(0, filename.LastIndexOf('.'));
string path = Path.GetDirectoryName(filename);
Directory.CreateDirectory(path);
if (File.Exists(filename))
{
// if file exists open and read header
_file = File.Open(filename, FileMode.Open, FileAccess.ReadWrite, FileShare.ReadWrite);
ReadFileHeader();
if (_externalStrings == false)// if the file says different
{
_rowSize = (_maxKeySize + 1 + 4 + 4);
}
// compute last page number from file length
_PageLength = (_BlockHeader.Length + _rowSize * (_PageNodeCount));
_LastPageNumber = (int)((_file.Length - _FileHeader.Length) / _PageLength);
}
else
{
// else create new file
_file = File.Open(filename, FileMode.Create, FileAccess.ReadWrite, FileShare.ReadWrite);
_PageLength = (_BlockHeader.Length + _rowSize * (_PageNodeCount));
CreateFileHeader(0);
_LastPageNumber = (int)((_file.Length - _FileHeader.Length) / _PageLength);
}
if (_externalStrings)
{
_strings = new StringHF(path, Path.GetFileNameWithoutExtension(filename) + ".strings");
}
if (_LastPageNumber == 0)
_LastPageNumber = 1;
// bitmap duplicates
if (_allowDups)
_bitmap = new BitmapIndex(Path.GetDirectoryName(filename), Path.GetFileNameWithoutExtension(filename));
}
#region [ C o m m o n ]
public void SetBitmapDuplicate(int bitmaprec, int rec)
{
_bitmap.SetDuplicate(bitmaprec, rec);
}
public int GetBitmapDuplaicateFreeRecordNumber()
{
return _bitmap.GetFreeRecordNumber();
}
public IEnumerable GetDuplicatesRecordNumbers(int recno)
{
return GetDuplicateBitmap(recno).GetBitIndexes();
}
public MGRB GetDuplicateBitmap(int recno)
{
return _bitmap.GetBitmap(recno);
}
private byte[] CreateBlockHeader(byte type, ushort itemcount, int rightpagenumber)
{
byte[] block = new byte[_BlockHeader.Length];
Array.Copy(_BlockHeader, block, block.Length);
block[4] = type;
byte[] b = Helper.GetBytes(itemcount, false);
Buffer.BlockCopy(b, 0, block, 5, 2);
b = Helper.GetBytes(rightpagenumber, false);
Buffer.BlockCopy(b, 0, block, 11, 4);
return block;
}
private void CreateFileHeader(int rowsindexed)
{
lock (_fileLock)
{
// max key size
byte[] b = Helper.GetBytes(_maxKeySize, false);
Buffer.BlockCopy(b, 0, _FileHeader, 3, 1);
// page node count
b = Helper.GetBytes(_PageNodeCount, false);
Buffer.BlockCopy(b, 0, _FileHeader, 4, 2);
b = Helper.GetBytes(rowsindexed, false);
Buffer.BlockCopy(b, 0, _FileHeader, 11, 4);
if (_externalStrings)
_FileHeader[10] = 1;
_file.Seek(0L, SeekOrigin.Begin);
_file.Write(_FileHeader, 0, _FileHeader.Length);
if (rowsindexed == 0)
{
byte[] pagezero = new byte[_PageLength];
byte[] block = CreateBlockHeader(1, 0, -1);
Buffer.BlockCopy(block, 0, pagezero, 0, block.Length);
_file.Write(pagezero, 0, _PageLength);
}
_file.Flush();
}
}
private bool ReadFileHeader()
{
_file.Seek(0L, SeekOrigin.Begin);
byte[] b = new byte[_FileHeader.Length];
_file.Read(b, 0, _FileHeader.Length);
if (b[0] == _FileHeader[0] && b[1] == _FileHeader[1] && b[2] == _FileHeader[2]) // header
{
byte maxks = b[3];
ushort nodes = (ushort)Helper.ToInt16(b, 4);
int root = Helper.ToInt32(b, 6);
_maxKeySize = maxks;
_PageNodeCount = nodes;
_FileHeader = b;
if (b[10] == 0)
_externalStrings = false;
}
return false;
}
public int GetNewPageNumber()
{
return Interlocked.Increment(ref _LastPageNumber); //_LastPageNumber++;
}
private void SeekPage(int pnum)
{
long offset = _FileHeader.Length;
offset += (long)pnum * _PageLength;
if (offset > _file.Length)
CreateBlankPages(pnum);
_file.Seek(offset, SeekOrigin.Begin);
}
private void CreateBlankPages(int pnum)
{
// create space
byte[] b = new byte[_PageLength];
_file.Seek(0L, SeekOrigin.Current);
for (int i = pnum; i < _LastPageNumber; i++)
_file.Write(b, 0, b.Length);
_file.Flush();
}
public void FreeMemory()
{
if (_allowDups)
_bitmap.FreeMemory();
}
public void Shutdown()
{
log.Debug("Shutdown IndexFile");
if (_externalStrings)
_strings.Shutdown();
if (_file != null)
{
_file.Flush();
_file.Close();
}
_file = null;
if (_allowDups)
{
_bitmap.Commit(Global.FreeBitmapMemoryOnSave);
_bitmap.Shutdown();
}
}
#endregion
#region [ P a g e s ]
public void GetPageList(List PageListDiskPages, SafeSortedList PageList, out int lastIndexedRow)
{
lastIndexedRow = Helper.ToInt32(_FileHeader, 11);
// load page list
PageListDiskPages.Add(0); // first page list
int nextpage = LoadPageListData(0, PageList);
while (nextpage != -1)
{
nextpage = LoadPageListData(nextpage, PageList);
if (nextpage != -1)
PageListDiskPages.Add(nextpage);
}
}
private int LoadPageListData(int page, SafeSortedList PageList)
{
lock (_fileLock)
{
// load page list data
int nextpage = -1;
SeekPage(page);
byte[] b = new byte[_PageLength];
_file.Read(b, 0, _PageLength);
if (b[0] == _BlockHeader[0] && b[1] == _BlockHeader[1] && b[2] == _BlockHeader[2] && b[3] == _BlockHeader[3])
{
short count = Helper.ToInt16(b, 5);
if (count > _PageNodeCount)
throw new Exception("Count > node size");
nextpage = Helper.ToInt32(b, 11);
int index = _BlockHeader.Length;
object[] keys = null;
// TODO : needed??
//if (File.Exists(_FileName + ".pagelist"))
//{
// var bn = File.ReadAllBytes(_FileName + ".pagelist");
// int blknum = Helper.ToInt32(bn, 0);
// byte[] bb = _strings.GetData(blknum, out _pagelistalllocblock);
// keys = (object[])BJSON.ToObject(bb);
//}
for (int i = 0; i < count; i++)
{
int idx = index + _rowSize * i;
byte ks = b[idx];
T key;
if (_externalStrings == false)
key = _T.GetObject(b, idx + 1, ks);
else
{
if (keys == null)
key = _T.GetObject(b, idx + 1, ks); // do old way until better way
else
key = (T)keys[i];
}
int pagenum = Helper.ToInt32(b, idx + 1 + _maxKeySize);
// add counts
int unique = Helper.ToInt32(b, idx + 1 + _maxKeySize + 4);
// FEATURE : add dup count
PageList.Add(key, new PageInfo(pagenum, unique, 0));
}
}
else
throw new Exception("Page List header is invalid");
return nextpage;
}
}
internal void SavePage(Page page)
{
lock (_fileLock)
{
int pnum = page.DiskPageNumber;
if (pnum > _LastPageNumber)
throw new Exception("should not be here: page out of bounds");
SeekPage(pnum);
byte[] pagebytes = new byte[_PageLength];
byte[] blockheader = CreateBlockHeader(0, (ushort)page.tree.Count(), page.RightPageNumber);
Buffer.BlockCopy(blockheader, 0, pagebytes, 0, blockheader.Length);
int index = blockheader.Length;
int i = 0;
byte[] b = null;
T[] keys = page.tree.Keys();
Array.Sort(keys); // sort keys on save for read performance
int blocknum = 0;
if (_externalStrings)
{
// free old blocks
if (page.allocblocks != null)
_strings.FreeBlocks(page.allocblocks);
List blocks = new List();
blocknum = _strings.SaveData(page.DiskPageNumber.ToString(), BJSON.ToBJSON(keys,
new BJSONParameters { UseUnicodeStrings = false, UseTypedArrays = false }), out blocks);
page.allocblocks = blocks;
}
// node children
foreach (var kp in keys)
{
var val = page.tree[kp];
int idx = index + _rowSize * i;
// key bytes
byte[] kk;
byte size;
if (_externalStrings == false)
{
kk = _T.GetBytes(kp);
size = (byte)kk.Length;
if (size > _maxKeySize)
size = _maxKeySize;
}
else
{
kk = new byte[4];
Buffer.BlockCopy(Helper.GetBytes(blocknum, false), 0, kk, 0, 4);
size = 4;
}
// key size = 1 byte
pagebytes[idx] = size;
Buffer.BlockCopy(kk, 0, pagebytes, idx + 1, pagebytes[idx]);
// offset = 4 bytes
b = Helper.GetBytes(val.RecordNumber, false);
Buffer.BlockCopy(b, 0, pagebytes, idx + 1 + _maxKeySize, b.Length);
// duplicatepage = 4 bytes
b = Helper.GetBytes(val.DuplicateBitmapNumber, false);
Buffer.BlockCopy(b, 0, pagebytes, idx + 1 + _maxKeySize + 4, b.Length);
i++;
}
_file.Write(pagebytes, 0, pagebytes.Length);
}
}
public Page LoadPageFromPageNumber(int number)
{
lock (_fileLock)
{
SeekPage(number);
byte[] b = new byte[_PageLength];
_file.Read(b, 0, _PageLength);
if (b[0] == _BlockHeader[0] && b[1] == _BlockHeader[1] && b[2] == _BlockHeader[2] && b[3] == _BlockHeader[3])
{
// create node here
Page page = new Page();
short count = Helper.ToInt16(b, 5);
if (count > _PageNodeCount)
throw new Exception("Count > node size");
page.DiskPageNumber = number;
page.RightPageNumber = Helper.ToInt32(b, 11);
int index = _BlockHeader.Length;
object[] keys = null;
for (int i = 0; i < count; i++)
{
int idx = index + _rowSize * i;
byte ks = b[idx];
T key = default(T);
if (_externalStrings == false)
key = _T.GetObject(b, idx + 1, ks);
else
{
if (keys == null)
{
int blknum = Helper.ToInt32(b, idx + 1, false);
List ablocks = new List();
byte[] bb = _strings.GetData(blknum, out ablocks);
page.allocblocks = ablocks;
keys = (object[])BJSON.ToObject(bb);
}
key = (T)keys[i];
}
int offset = Helper.ToInt32(b, idx + 1 + _maxKeySize);
int duppage = Helper.ToInt32(b, idx + 1 + _maxKeySize + 4);
page.tree.Add(key, new KeyInfo(offset, duppage));
}
return page;
}
else
throw new Exception("Page read error header invalid, number = " + number);
}
}
#endregion
internal void SavePageList(SafeSortedList _pages, List diskpages)
{
lock (_fileLock)
{
T[] keys = _pages.Keys();
int blocknum = 0;
// TODO : needed??
//if (_externalStrings)
//{
// if (_pagelistalllocblock != null)
// _strings.FreeBlocks(_pagelistalllocblock);
// blocknum = _strings.SaveData("pagelist", BJSON.ToBJSON(keys,
// new BJSONParameters { UseUnicodeStrings = false, UseTypedArrays = false }));
// File.WriteAllBytes(_FileName + ".pagelist", Helper.GetBytes(blocknum, false));
//}
// save page list
int c = (_pages.Count() / Global.PageItemCount) + 1;
// allocate pages needed
while (c > diskpages.Count)
diskpages.Add(GetNewPageNumber());
byte[] page = new byte[_PageLength];
for (int i = 0; i < (diskpages.Count - 1); i++)
{
byte[] block = CreateBlockHeader(1, Global.PageItemCount, diskpages[i + 1]);
Buffer.BlockCopy(block, 0, page, 0, block.Length);
for (int j = 0; j < Global.PageItemCount; j++)
CreatePageListData(_pages, i * Global.PageItemCount, block.Length, j, page, blocknum);
SeekPage(diskpages[i]);
_file.Write(page, 0, page.Length);
}
c = _pages.Count() % Global.PageItemCount;
byte[] lastblock = CreateBlockHeader(1, (ushort)c, -1);
Buffer.BlockCopy(lastblock, 0, page, 0, lastblock.Length);
int lastoffset = (_pages.Count() / Global.PageItemCount) * Global.PageItemCount;
for (int j = 0; j < c; j++)
CreatePageListData(_pages, lastoffset, lastblock.Length, j, page, blocknum);
SeekPage(diskpages[diskpages.Count - 1]);
_file.Write(page, 0, page.Length);
}
}
private void CreatePageListData(SafeSortedList _pages, int offset, int rowindex, int counter, byte[] page, int blocknum)
{
int idx = rowindex + _rowSize * counter;
// key bytes
byte[] kk;
byte size;
if (_externalStrings == false)
{
kk = _T.GetBytes(_pages.GetKey(counter + offset));
size = (byte)kk.Length;
if (size > _maxKeySize)
size = _maxKeySize;
}
else
{
kk = new byte[4];
Buffer.BlockCopy(Helper.GetBytes(counter + offset, false), 0, kk, 0, 4);
size = 4;
}
// key size = 1 byte
page[idx] = size;
Buffer.BlockCopy(kk, 0, page, idx + 1, page[idx]);
// offset = 4 bytes
byte[] b = Helper.GetBytes(_pages.GetValue(offset + counter).PageNumber, false);
Buffer.BlockCopy(b, 0, page, idx + 1 + _maxKeySize, b.Length);
// add counts
b = Helper.GetBytes(_pages.GetValue(offset + counter).UniqueCount, false);
Buffer.BlockCopy(b, 0, page, idx + 1 + _maxKeySize + 4, b.Length);
// FEATURE : add dup counts
}
internal void SaveLastRecordNumber(int recnum)
{
// save the last record number indexed to the header
CreateFileHeader(recnum);
}
internal void BitmapFlush()
{
if (_allowDups)
_bitmap.Commit(Global.FreeBitmapMemoryOnSave);
}
}
}
================================================
FILE: RaptorDB/Indexes/Indexes.cs
================================================
using System;
using System.Collections.Generic;
using System.IO;
namespace RaptorDB
{
#region [ TypeIndexes ]
internal class TypeIndexes : MGIndex, IIndex where T : IComparable
{
public TypeIndexes(string path, string filename, byte keysize)
: base(path, filename + ".mgidx", keysize, true)
{
}
public void Set(object key, int recnum)
{
if (key == null) return; // FEATURE : index null values ??
base.Set((T)key, recnum);
}
public MGRB Query(RDBExpression ex, object from, int maxsize)
{
T f = default(T);
if (typeof(T).Equals(from.GetType()) == false)
f = Converter(from);
else
f = (T)from;
return base.Query(ex, f, maxsize);
}
private T Converter(object from)
{
if (typeof(T) == typeof(Guid))
{
object o = new Guid(from.ToString());
return (T)o;
}
else
return (T)Convert.ChangeType(from, typeof(T));
}
void IIndex.FreeMemory()
{
base.SaveIndex();
base.FreeMemory();
}
void IIndex.Shutdown()
{
base.Shutdown();
}
object[] IIndex.GetKeys()
{
return base.GetKeys();
}
public MGRB Query(object fromkey, object tokey, int maxsize)
{
T f = default(T);
if (typeof(T).Equals(fromkey.GetType()) == false)
f = (T)Convert.ChangeType(fromkey, typeof(T));
else
f = (T)fromkey;
T t = default(T);
if (typeof(T).Equals(tokey.GetType()) == false)
t = (T)Convert.ChangeType(tokey, typeof(T));
else
t = (T)tokey;
return base.Query(f, t, maxsize);
}
}
#endregion
#region [ BoolIndex ]
internal class BoolIndex : IIndex
{
public BoolIndex(string path, string filename, string extension)
{
// create file
_filename = filename + extension;
_path = path;
if (_path.EndsWith(Path.DirectorySeparatorChar.ToString()) == false)
_path += Path.DirectorySeparatorChar.ToString();
if (File.Exists(_path + _filename))
ReadFile();
}
private MGRB _bits = new MGRB();
private string _filename;
private string _path;
private object _lock = new object();
public MGRB GetBits()
{
return _bits.Copy();
}
public void Set(object key, int recnum)
{
lock (_lock)
if (key != null)
_bits.Set(recnum, (bool)key);
}
public MGRB Query(RDBExpression ex, object from, int maxsize)
{
lock (_lock)
{
bool b = (bool)from;
if (b)
return _bits;
else
return _bits.Not(maxsize);
}
}
public void FreeMemory()
{
lock (_lock)
{
_bits.Optimize();
SaveIndex();
}
}
public void Shutdown()
{
// shutdown
WriteFile();
}
public void SaveIndex()
{
WriteFile();
}
public void InPlaceOR(MGRB left)
{
lock (_lock)
_bits = _bits.Or(left);
}
private void WriteFile()
{
lock (_lock)
{
_bits.Optimize();
var o = _bits.Serialize();
var b = fastBinaryJSON.BJSON.ToBJSON(o, new fastBinaryJSON.BJSONParameters { UseExtensions = false });
File.WriteAllBytes(_path + _filename, b);
}
}
private void ReadFile()
{
byte[] b = File.ReadAllBytes(_path + _filename);
var o = fastBinaryJSON.BJSON.ToObject(b);
_bits = new MGRB();
_bits.Deserialize(o);
}
public MGRB Query(object fromkey, object tokey, int maxsize)
{
return Query(RDBExpression.Greater, fromkey, maxsize); // range doesn't make sense here just do from
}
public object[] GetKeys()
{
return new object[] { true, false };
}
}
#endregion
#region [ FullTextIndex ]
internal class FullTextIndex : Hoot, IIndex
{
public FullTextIndex(string IndexPath, string FileName, bool docmode, bool sortable, ITokenizer tokenizer)
: base(IndexPath, FileName, docmode, tokenizer)
{
if (sortable)
{
_idx = new TypeIndexes(IndexPath, FileName, Global.DefaultStringKeySize);
_sortable = true;
}
}
private bool _sortable = false;
private IIndex _idx;
public void Set(object key, int recnum)
{
base.Index(recnum, (string)key);
if (_sortable)
_idx.Set(key, recnum);
}
public MGRB Query(RDBExpression ex, object from, int maxsize)
{
return base.Query("" + from, maxsize);
}
public void SaveIndex()
{
base.Save();
if (_sortable)
_idx.SaveIndex();
}
public MGRB Query(object fromkey, object tokey, int maxsize)
{
return base.Query("" + fromkey, maxsize); // range doesn't make sense here just do from
}
public object[] GetKeys()
{
if (_sortable)
return _idx.GetKeys(); // support get keys
else
return new object[] { };
}
void IIndex.FreeMemory()
{
base.FreeMemory();
this.SaveIndex();
}
void IIndex.Shutdown()
{
this.SaveIndex();
base.Shutdown();
if (_sortable)
_idx.Shutdown();
}
}
#endregion
#region [ EnumIndex ]
internal class EnumIndex : MGIndex, IIndex //where T : IComparable
{
public EnumIndex(string path, string filename)
: base(path, filename + ".mgidx", 30, /*Global.PageItemCount,*/ true)
{
}
public void Set(object key, int recnum)
{
if (key == null) return; // FEATURE : index null values ??
base.Set(key.ToString(), recnum);
}
public MGRB Query(RDBExpression ex, object from, int maxsize)
{
T f = default(T);
if (typeof(T).Equals(from.GetType()) == false)
f = Converter(from);
else
f = (T)from;
return base.Query(ex, f.ToString(), maxsize);
}
private T Converter(object from)
{
if (typeof(T) == typeof(Guid))
{
object o = new Guid(from.ToString());
return (T)o;
}
else
return (T)Convert.ChangeType(from, typeof(T));
}
void IIndex.FreeMemory()
{
base.SaveIndex();
base.FreeMemory();
}
void IIndex.Shutdown()
{
base.SaveIndex();
base.Shutdown();
}
public MGRB Query(object fromkey, object tokey, int maxsize)
{
T f = default(T);
if (typeof(T).Equals(fromkey.GetType()) == false)
f = (T)Convert.ChangeType(fromkey, typeof(T));
else
f = (T)fromkey;
T t = default(T);
if (typeof(T).Equals(tokey.GetType()) == false)
t = (T)Convert.ChangeType(tokey, typeof(T));
else
t = (T)tokey;
return base.Query(f.ToString(), t.ToString(), maxsize);
}
object[] IIndex.GetKeys()
{
return base.GetKeys();
}
}
#endregion
#region [ NoIndex ]
internal class NoIndex : IIndex
{
public void Set(object key, int recnum)
{
// ignore set
}
public MGRB Query(RDBExpression ex, object from, int maxsize)
{
// always return everything
return MGRB.Fill(maxsize);
}
public void FreeMemory()
{
}
public void Shutdown()
{
}
public void SaveIndex()
{
}
public object[] GetKeys()
{
return new object[] { };
}
public MGRB Query(object fromkey, object tokey, int maxsize)
{
return MGRB.Fill(maxsize); // TODO : all or none??
}
}
#endregion
}
================================================
FILE: RaptorDB/Indexes/MGIndex.cs
================================================
using System;
using System.Collections.Generic;
using System.IO;
using RaptorDB.Common;
namespace RaptorDB
{
#region [ internal classes ]
internal struct PageInfo // FEATURE : change back to class for count access for query caching
{
public PageInfo(int pagenum, int uniquecount, int duplicatecount)
{
PageNumber = pagenum;
UniqueCount = uniquecount;
}
public int PageNumber;
public int UniqueCount;
}
internal struct KeyInfo
{
public KeyInfo(int recnum)
{
RecordNumber = recnum;
DuplicateBitmapNumber = -1;
}
public KeyInfo(int recnum, int bitmaprec)
{
RecordNumber = recnum;
DuplicateBitmapNumber = bitmaprec;
}
public int RecordNumber;
public int DuplicateBitmapNumber;
}
internal class Page
{
public Page() // kludge so the compiler doesn't complain
{
DiskPageNumber = -1;
RightPageNumber = -1;
tree = new SafeDictionary(Global.PageItemCount);
isDirty = false;
FirstKey = default(T);
}
public int DiskPageNumber;
public int RightPageNumber;
public T FirstKey;
public bool isDirty;
public SafeDictionary tree;
public List allocblocks = null; // for string keys in HF key store
}
#endregion
internal class MGIndex where T : IComparable
{
ILog _log = LogManager.GetLogger(typeof(MGIndex));
private SafeSortedList _pageList = new SafeSortedList();
//private SafeDictionary> _cache = new SafeDictionary>();
private IKV> _cache = null;//new SafeSortedList>();
private List _pageListDiskPages = new List();
private IndexFile _index;
private bool _AllowDuplicates = true;
private int _LastIndexedRecordNumber = 0;
public MGIndex(string path, string filename, byte keysize, bool allowdups)
{
if (Global.UseLessMemoryStructures)
_cache = new SafeSortedList>();
else
_cache = new SafeDictionary>();
_AllowDuplicates = allowdups;
if (path.EndsWith(Path.DirectorySeparatorChar.ToString()) == false)
path += Path.DirectorySeparatorChar;
_index = new IndexFile(path + filename, keysize);
// load page list
_index.GetPageList(_pageListDiskPages, _pageList, out _LastIndexedRecordNumber);
if (_pageList.Count() == 0)
{
Page page = new Page();
page.FirstKey = (T)RDBDataType.GetEmpty();
page.DiskPageNumber = _index.GetNewPageNumber();
page.isDirty = true;
_pageList.Add(page.FirstKey, new PageInfo(page.DiskPageNumber, 0, 0));
_cache.Add(page.DiskPageNumber, page);
}
}
public int GetLastIndexedRecordNumber()
{
return _LastIndexedRecordNumber;
}
public MGRB Query(T from, T to, int maxsize)
{
MGRB bits = new MGRB();
T temp = default(T);
if (from.CompareTo(to) > 0) // check values order
{
temp = from;
from = to;
to = temp;
}
// find first page and do > than
bool found = false;
int startpos = FindPageOrLowerPosition(from, ref found);
// find last page and do < than
int endpos = FindPageOrLowerPosition(to, ref found);
bool samepage = startpos == endpos;
// from key page
Page page = LoadPage(_pageList.GetValue(startpos).PageNumber);
T[] keys = page.tree.Keys();
Array.Sort(keys);
// find better start position rather than 0
int pos = Array.BinarySearch(keys, from); // FEATURE : rewrite??
if (pos < 0) pos = ~pos;
for (int i = pos; i < keys.Length; i++)
{
T k = keys[i];
int bn = page.tree[k].DuplicateBitmapNumber;
if (samepage)
{
if (k.CompareTo(from) >= 0 && k.CompareTo(to) <= 0) // if from,to same page
bits = bits.Or(_index.GetDuplicateBitmap(bn));
}
else
{
if (k.CompareTo(from) >= 0)
bits = bits.Or(_index.GetDuplicateBitmap(bn));
}
}
if (!samepage)
{
// to key page
page = LoadPage(_pageList.GetValue(endpos).PageNumber);
keys = page.tree.Keys();
Array.Sort(keys);
// find better end position rather than last key
pos = Array.BinarySearch(keys, to);
if (pos < 0) pos = ~pos;
for (int i = 0; i <= pos; i++)
{
T k = keys[i];
int bn = page.tree[k].DuplicateBitmapNumber;
if (k.CompareTo(to) <= 0)
bits = bits.Or(_index.GetDuplicateBitmap(bn));
}
// do all pages in between
for (int i = startpos + 1; i < endpos; i++)
{
doPageOperation(ref bits, i);
}
}
return bits;
}
public MGRB Query(RDBExpression exp, T from, int maxsize)
{
T key = from;
if (exp == RDBExpression.Equal || exp == RDBExpression.NotEqual)
return doEqualOp(exp, key, maxsize);
// FEATURE : optimize complement search if page count less for the complement pages
if (exp == RDBExpression.Less || exp == RDBExpression.LessEqual)
{
return doLessOp(exp, key);
}
else if (exp == RDBExpression.Greater || exp == RDBExpression.GreaterEqual)
{
return doMoreOp(exp, key);
}
return new MGRB(); // blank results
}
private object _setlock = new object();
public void Set(T key, int val)
{
lock (_setlock)
{
PageInfo pi;
Page page = LoadPage(key, out pi);
KeyInfo ki;
if (page.tree.TryGetValue(key, out ki))
{
// item exists
if (_AllowDuplicates)
{
SaveDuplicate(key, ref ki);
// set current record in the bitmap also
_index.SetBitmapDuplicate(ki.DuplicateBitmapNumber, val);
}
ki.RecordNumber = val;
page.tree[key] = ki; // structs need resetting
}
else
{
// new item
ki = new KeyInfo(val);
if (_AllowDuplicates)
SaveDuplicate(key, ref ki);
pi.UniqueCount++;
page.tree.Add(key, ki);
}
if (page.tree.Count() > Global.PageItemCount)
SplitPage(page);
_LastIndexedRecordNumber = val;
page.isDirty = true;
}
}
public bool Get(T key, out int val)
{
val = -1;
PageInfo pi;
Page page = LoadPage(key, out pi);
KeyInfo ki;
bool ret = page.tree.TryGetValue(key, out ki);
if (ret)
val = ki.RecordNumber;
return ret;
}
public void SaveIndex()
{
//_log.Debug("Total split time (s) = " + _totalsplits);
//_log.Debug("Total pages = " + _pageList.Count);
int[] keys = _cache.Keys();
Array.Sort(keys);
// save index to disk
foreach (var i in keys)
{
var p = _cache.GetValue(i);
if (p.isDirty)
{
_index.SavePage(p);
p.isDirty = false;
}
}
_index.SavePageList(_pageList, _pageListDiskPages);
_index.BitmapFlush();
}
public void Shutdown()
{
SaveIndex();
// save page list
//_index.SavePageList(_pageList, _pageListDiskPages);
// shutdown
_index.Shutdown();
}
public void FreeMemory()
{
_index.FreeMemory();
try
{
List free = new List();
foreach (var k in _cache.Keys())
{
var val = _cache.GetValue(k);
if (val.isDirty == false)
free.Add(k);
}
_log.Info("releasing page count = " + free.Count + " out of " + _cache.Count());
foreach (var i in free)
_cache.Remove(i);
}
catch { }
}
public IEnumerable GetDuplicates(T key)
{
PageInfo pi;
Page page = LoadPage(key, out pi);
KeyInfo ki;
bool ret = page.tree.TryGetValue(key, out ki);
if (ret)
// get duplicates
if (ki.DuplicateBitmapNumber != -1)
return _index.GetDuplicatesRecordNumbers(ki.DuplicateBitmapNumber);
return new List();
}
public void SaveLastRecordNumber(int recnum)
{
_index.SaveLastRecordNumber(recnum);
}
public bool RemoveKey(T key)
{
PageInfo pi;
Page page = LoadPage(key, out pi);
bool b = page.tree.Remove(key);
// TODO : reset the first key for page ??
if (b)
{
pi.UniqueCount--;
// FEATURE : decrease dup count
}
page.isDirty = true;
return b;
}
#region [ P R I V A T E ]
private MGRB doMoreOp(RDBExpression exp, T key)
{
bool found = false;
int pos = FindPageOrLowerPosition(key, ref found);
MGRB result = new MGRB();
if (pos < _pageList.Count())
{
// all the pages after
for (int i = pos + 1; i < _pageList.Count(); i++)
doPageOperation(ref result, i);
}
// key page
Page page = LoadPage(_pageList.GetValue(pos).PageNumber);
T[] keys = page.tree.Keys();
Array.Sort(keys);
// find better start position rather than 0
pos = Array.BinarySearch(keys, key);
if (pos < 0) pos = ~pos;
for (int i = pos; i < keys.Length; i++)
{
T k = keys[i];
int bn = page.tree[k].DuplicateBitmapNumber;
if (k.CompareTo(key) > 0)
result = result.Or(_index.GetDuplicateBitmap(bn));
if (exp == RDBExpression.GreaterEqual && k.CompareTo(key) == 0)
result = result.Or(_index.GetDuplicateBitmap(bn));
}
return result;
}
private MGRB doLessOp(RDBExpression exp, T key)
{
bool found = false;
int pos = FindPageOrLowerPosition(key, ref found);
MGRB result = new MGRB();
if (pos > 0)
{
// all the pages before
for (int i = 0; i < pos - 1; i++)
doPageOperation(ref result, i);
}
// key page
Page page = LoadPage(_pageList.GetValue(pos).PageNumber);
T[] keys = page.tree.Keys();
Array.Sort(keys);
// find better end position rather than last key
pos = Array.BinarySearch(keys, key);
if (pos < 0) pos = ~pos;
for (int i = 0; i < pos; i++)
{
T k = keys[i];
if (k.CompareTo(key) > 0)
break;
int bn = page.tree[k].DuplicateBitmapNumber;
if (k.CompareTo(key) < 0)
result = result.Or(_index.GetDuplicateBitmap(bn));
if (exp == RDBExpression.LessEqual && k.CompareTo(key) == 0)
result = result.Or(_index.GetDuplicateBitmap(bn));
}
return result;
}
private MGRB doEqualOp(RDBExpression exp, T key, int maxsize)
{
PageInfo pi;
Page page = LoadPage(key, out pi);
KeyInfo k;
if (page.tree.TryGetValue(key, out k))
{
int bn = k.DuplicateBitmapNumber;
if (exp == RDBExpression.Equal)
return _index.GetDuplicateBitmap(bn);
else
return _index.GetDuplicateBitmap(bn).Not(maxsize);
}
else
{
if (exp == RDBExpression.NotEqual)
return new MGRB().Not(maxsize);
else
return new MGRB();
}
}
private void doPageOperation(ref MGRB res, int pageidx)
{
Page page = LoadPage(_pageList.GetValue(pageidx).PageNumber);
T[] keys = page.tree.Keys(); // avoid sync issues
foreach (var k in keys)
{
int bn = page.tree[k].DuplicateBitmapNumber;
res = res.Or(_index.GetDuplicateBitmap(bn));
}
}
private double _totalsplits = 0;
private void SplitPage(Page page)
{
// split the page
DateTime dt = FastDateTime.Now;
Page newpage = new Page();
newpage.DiskPageNumber = _index.GetNewPageNumber();
newpage.RightPageNumber = page.RightPageNumber;
newpage.isDirty = true;
page.RightPageNumber = newpage.DiskPageNumber;
_pageList.Remove(page.FirstKey);
// get and sort keys
T[] keys = page.tree.Keys();
Array.Sort(keys);
// copy data to new
for (int i = keys.Length / 2; i < keys.Length; i++)
{
newpage.tree.Add(keys[i], page.tree[keys[i]]);
// remove from old page
page.tree.Remove(keys[i]);
}
// set the first key
newpage.FirstKey = keys[keys.Length / 2]; // new key
// remove keys from page list
_pageList.Remove(newpage.FirstKey);
page.FirstKey = keys[0]; // new key
// re add to page list
_pageList.Add(page.FirstKey, new PageInfo(page.DiskPageNumber, page.tree.Count(), 0));
_pageList.Add(newpage.FirstKey, new PageInfo(newpage.DiskPageNumber, newpage.tree.Count(), 0));
_cache.Add(newpage.DiskPageNumber, newpage);
_totalsplits += FastDateTime.Now.Subtract(dt).TotalSeconds;
}
private Page LoadPage(T key, out PageInfo pageinfo)
{
int pagenum = -1;
// find page in list of pages
bool found = false;
int pos = 0;
if (key != null)
pos = FindPageOrLowerPosition(key, ref found);
pageinfo = _pageList.GetValue(pos);
pagenum = pageinfo.PageNumber;
Page page;
if (_cache.TryGetValue(pagenum, out page) == false)
{
//load page from disk
page = _index.LoadPageFromPageNumber(pagenum);
_cache.Add(pagenum, page);
}
return page;
}
private Page LoadPage(int pagenum)
{
Page page;
if (_cache.TryGetValue(pagenum, out page) == false)
{
//load page from disk
page = _index.LoadPageFromPageNumber(pagenum);
_cache.Add(pagenum, page);
}
return page;
}
private void SaveDuplicate(T key, ref KeyInfo ki)
{
if (ki.DuplicateBitmapNumber == -1)
ki.DuplicateBitmapNumber = _index.GetBitmapDuplaicateFreeRecordNumber();
_index.SetBitmapDuplicate(ki.DuplicateBitmapNumber, ki.RecordNumber);
}
private int FindPageOrLowerPosition(T key, ref bool found)
{
if (_pageList.Count() == 0)
return 0;
// binary search
int lastlower = 0;
int first = 0;
int last = _pageList.Count() - 1;
int mid = 0;
while (first <= last)
{
mid = (first + last) >> 1;
T k = _pageList.GetKey(mid);
int compare = k.CompareTo(key);
if (compare < 0)
{
lastlower = mid;
first = mid + 1;
}
if (compare == 0)
{
found = true;
return mid;
}
if (compare > 0)
{
last = mid - 1;
}
}
return lastlower;
}
#endregion
internal object[] GetKeys()
{
List