You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
403 lines
9.9 KiB
403 lines
9.9 KiB
2 years ago
|
using EC.Helper.RabbitFunc.Syntax;
|
||
|
|
||
|
namespace EC.Helper.RabbitFunc.Compiler;
|
||
|
|
||
|
internal class Tokenizer
|
||
|
{
|
||
|
#region Attr
|
||
|
|
||
|
private const int DefaultBufferCapacity = 1024;
|
||
|
|
||
|
private readonly TextReader reader;
|
||
|
private char[] buffer;
|
||
|
private int start;
|
||
|
private int end;
|
||
|
private int position;
|
||
|
private bool endOfStream;
|
||
|
private bool multiEolns;
|
||
|
private int tokenEnd;
|
||
|
private int tokenStartIndex;
|
||
|
private int tokenEndIndex;
|
||
|
private List<int> newLineLocations;
|
||
|
private SourceLocation initialLocation;
|
||
|
|
||
|
public bool EndOfStream => endOfStream;
|
||
|
|
||
|
public int Index => tokenStartIndex + Math.Min(position, end) - start;
|
||
|
|
||
|
public SourceLocation Position => IndexToLocation(Index);
|
||
|
|
||
|
public IndexSpan TokenSpan => new(tokenStartIndex, tokenEndIndex - tokenStartIndex);
|
||
|
|
||
|
private int TokenLength => tokenEnd - start;
|
||
|
|
||
|
#endregion Attr
|
||
|
|
||
|
public Tokenizer(TextReader reader)
|
||
|
{
|
||
|
this.reader = reader;
|
||
|
Init(DefaultBufferCapacity);
|
||
|
}
|
||
|
|
||
|
private void Init(int bufferCapacity)
|
||
|
{
|
||
|
multiEolns = true;
|
||
|
buffer = new char[bufferCapacity];
|
||
|
newLineLocations = new List<int>();
|
||
|
initialLocation = SourceLocation.MinValue;
|
||
|
}
|
||
|
|
||
|
private static void ResizeInternal(ref char[] array, int newSize, int start, int count)
|
||
|
{
|
||
|
char[] array2 = (newSize != array.Length) ? new char[newSize] : array;
|
||
|
Buffer.BlockCopy(array, start * 2, array2, 0, count * 2);
|
||
|
array = array2;
|
||
|
}
|
||
|
|
||
|
private static bool IsNameStart(int ch)
|
||
|
{
|
||
|
return char.IsLetter((char)ch) || ch == 95;
|
||
|
}
|
||
|
|
||
|
private static bool IsNamePart(int ch)
|
||
|
{
|
||
|
return char.IsLetterOrDigit((char)ch) || ch == 95;
|
||
|
}
|
||
|
|
||
|
private static Token BadChar(int ch)
|
||
|
{
|
||
|
return Token.Error(((char)ch).ToString());
|
||
|
}
|
||
|
|
||
|
private void RefillBuffer()
|
||
|
{
|
||
|
if (end == buffer.Length)
|
||
|
{
|
||
|
int newSize = Math.Max(Math.Max((end - start) * 2, buffer.Length), position);
|
||
|
ResizeInternal(ref buffer, newSize, start, end - start);
|
||
|
end -= start;
|
||
|
position -= start;
|
||
|
start = 0;
|
||
|
//_bufferResized = true;
|
||
|
}
|
||
|
|
||
|
end += reader.Read(buffer, end, buffer.Length - end);
|
||
|
}
|
||
|
|
||
|
private int Peek()
|
||
|
{
|
||
|
if (position >= end)
|
||
|
{
|
||
|
RefillBuffer();
|
||
|
if (position >= end)
|
||
|
{
|
||
|
endOfStream = true;
|
||
|
return -1;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return buffer[position];
|
||
|
}
|
||
|
|
||
|
private bool NextChar(int ch)
|
||
|
{
|
||
|
if (Peek() == ch)
|
||
|
{
|
||
|
position++;
|
||
|
return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
private int NextChar()
|
||
|
{
|
||
|
int result = Peek();
|
||
|
position++;
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
private int ReadLine()
|
||
|
{
|
||
|
int num;
|
||
|
do
|
||
|
{
|
||
|
num = NextChar();
|
||
|
} while (num != -1 && !IsEoln(num));
|
||
|
if (num == 10)
|
||
|
{
|
||
|
newLineLocations.Add(Index);
|
||
|
}
|
||
|
BufferBack();
|
||
|
return num;
|
||
|
}
|
||
|
|
||
|
private bool IsEoln(int current)
|
||
|
{
|
||
|
return current == 10 || (current == 13 && multiEolns);
|
||
|
}
|
||
|
|
||
|
private void BufferBack()
|
||
|
{
|
||
|
SeekRelative(-1);
|
||
|
}
|
||
|
|
||
|
private void SeekRelative(int disp)
|
||
|
{
|
||
|
position += disp;
|
||
|
}
|
||
|
|
||
|
private void MarkTokenEnd()
|
||
|
{
|
||
|
tokenEnd = Math.Min(position, end);
|
||
|
int num = tokenEnd - start;
|
||
|
tokenEndIndex = tokenStartIndex + num;
|
||
|
}
|
||
|
|
||
|
private void DiscardToken()
|
||
|
{
|
||
|
if (tokenEnd == -1)
|
||
|
{
|
||
|
MarkTokenEnd();
|
||
|
}
|
||
|
|
||
|
start = tokenEnd;
|
||
|
tokenStartIndex = tokenEndIndex;
|
||
|
tokenEnd = -1;
|
||
|
}
|
||
|
|
||
|
private string GetTokenString()
|
||
|
{
|
||
|
return new string(buffer, start, tokenEnd - start);
|
||
|
}
|
||
|
|
||
|
private string GetTokenSubstring(int offset)
|
||
|
{
|
||
|
return GetTokenSubstring(offset, tokenEnd - start - offset);
|
||
|
}
|
||
|
|
||
|
private string GetTokenSubstring(int offset, int length)
|
||
|
{
|
||
|
return new string(buffer, start + offset, length);
|
||
|
}
|
||
|
|
||
|
private int SkipWhiteSpace()
|
||
|
{
|
||
|
int num;
|
||
|
do
|
||
|
{
|
||
|
num = NextChar();
|
||
|
} while (num == 32 || num == 9);
|
||
|
|
||
|
BufferBack();
|
||
|
DiscardToken();
|
||
|
SeekRelative(1);
|
||
|
return num;
|
||
|
}
|
||
|
|
||
|
private SourceLocation IndexToLocation(int index)
|
||
|
{
|
||
|
int num = newLineLocations.BinarySearch(index);
|
||
|
if (num < 0)
|
||
|
{
|
||
|
if (num == -1)
|
||
|
{
|
||
|
int column = checked(index + initialLocation.Column);
|
||
|
if (TokenLength > 0) column -= TokenLength;
|
||
|
return new SourceLocation(index + initialLocation.Index, initialLocation.Line, column);
|
||
|
}
|
||
|
num = ~num - 1;
|
||
|
}
|
||
|
|
||
|
int fcolumn = index - newLineLocations[num] + initialLocation.Column;
|
||
|
if (TokenLength > 0) fcolumn -= TokenLength;
|
||
|
return new SourceLocation(index + initialLocation.Index, num + 1 + initialLocation.Line, fcolumn);
|
||
|
}
|
||
|
|
||
|
private Token ReadName()
|
||
|
{
|
||
|
BufferBack();
|
||
|
int num = NextChar();
|
||
|
if (!IsNameStart(num))
|
||
|
return BadChar(num);
|
||
|
|
||
|
while (IsNamePart(num))
|
||
|
{
|
||
|
num = this.NextChar();
|
||
|
}
|
||
|
|
||
|
BufferBack();
|
||
|
MarkTokenEnd();
|
||
|
return Token.Identifier(GetTokenString());
|
||
|
}
|
||
|
|
||
|
private Token ReadNumber()
|
||
|
{
|
||
|
int num;
|
||
|
do
|
||
|
{
|
||
|
num = NextChar();
|
||
|
} while (48 <= num && num <= 57);
|
||
|
if (num == 46)
|
||
|
{
|
||
|
do
|
||
|
{
|
||
|
num = NextChar();
|
||
|
} while (48 <= num && num <= 57);
|
||
|
}
|
||
|
|
||
|
BufferBack();
|
||
|
MarkTokenEnd();
|
||
|
return Token.Constant(double.Parse(GetTokenString()));
|
||
|
}
|
||
|
|
||
|
private Token ReadComment()
|
||
|
{
|
||
|
BufferBack();
|
||
|
int num = NextChar();
|
||
|
if (num == 42)
|
||
|
{
|
||
|
do
|
||
|
{
|
||
|
num = NextChar();
|
||
|
if (num == 10)
|
||
|
{
|
||
|
newLineLocations.Add(Index);
|
||
|
}
|
||
|
} while (!(num == 42 && NextChar(47)));
|
||
|
|
||
|
SeekRelative(-2);
|
||
|
MarkTokenEnd();
|
||
|
SeekRelative(2);
|
||
|
return Token.Comment(GetTokenSubstring(2));
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
ReadLine();
|
||
|
MarkTokenEnd();
|
||
|
return Token.Comment(GetTokenSubstring(2));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
public Token NextToken()
|
||
|
{
|
||
|
DiscardToken();
|
||
|
int num = NextChar();
|
||
|
while (true)
|
||
|
{
|
||
|
switch (num)
|
||
|
{
|
||
|
case -1:
|
||
|
return Tokens.EndOfFileToken;
|
||
|
|
||
|
case 10://'\n'
|
||
|
newLineLocations.Add(Index);
|
||
|
num = SkipWhiteSpace();
|
||
|
continue;
|
||
|
case 13://'\r'
|
||
|
case 32://' '
|
||
|
num = SkipWhiteSpace();
|
||
|
continue;
|
||
|
case 33://!
|
||
|
var notToken = NextChar(61) ? Tokens.NotEqualToken : Tokens.NotToken;
|
||
|
MarkTokenEnd();
|
||
|
return notToken;
|
||
|
|
||
|
case 37://'%'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.ModToken;
|
||
|
|
||
|
case 40://'('
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.LeftParenToken;
|
||
|
|
||
|
case 41://')'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.RightParenToken;
|
||
|
|
||
|
case 42://'*'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.MultiplyToken;
|
||
|
|
||
|
case 43://'+'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.AddToken;
|
||
|
|
||
|
case 44://','
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.CommaToken;
|
||
|
|
||
|
case 45://'-'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.SubtractToken;
|
||
|
|
||
|
case 46://'.'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.DotToken;
|
||
|
|
||
|
case 47://'/'
|
||
|
if (NextChar(42) || NextChar(47))
|
||
|
{
|
||
|
return ReadComment();
|
||
|
}
|
||
|
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.DivideToken;
|
||
|
|
||
|
case 48://'0'
|
||
|
case 49:
|
||
|
case 50:
|
||
|
case 51:
|
||
|
case 52:
|
||
|
case 53:
|
||
|
case 54:
|
||
|
case 55:
|
||
|
case 56:
|
||
|
case 57://'9'
|
||
|
return ReadNumber();
|
||
|
|
||
|
case 59://';'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.NewLineToken;
|
||
|
|
||
|
case 60://<
|
||
|
var lessToken = NextChar(61) ? Tokens.LessThanOrEqualToken : Tokens.LessThanToken;
|
||
|
MarkTokenEnd();
|
||
|
return lessToken;
|
||
|
|
||
|
case 61://'='
|
||
|
var assignOrEqualToken = NextChar(61) ? Tokens.EqualToken : Tokens.AssignToken;
|
||
|
MarkTokenEnd();
|
||
|
return assignOrEqualToken;
|
||
|
|
||
|
case 62://>
|
||
|
var greaterToken = NextChar(61) ? Tokens.GreaterThanOrEqualToken : Tokens.GreaterThanToken;
|
||
|
MarkTokenEnd();
|
||
|
return greaterToken;
|
||
|
|
||
|
case 91://'['
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.LeftBracketToken;
|
||
|
|
||
|
case 93://']'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.RightBracketToken;
|
||
|
|
||
|
case 94://'^'
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.PowerToken;
|
||
|
|
||
|
case 105://i
|
||
|
if (NextChar(102))
|
||
|
{
|
||
|
MarkTokenEnd();
|
||
|
return Tokens.IFToken;
|
||
|
}
|
||
|
|
||
|
return ReadName();
|
||
|
|
||
|
default:
|
||
|
return ReadName();
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|