package bluej.parser.lexer;
import java.io.IOException;
import java.io.Reader;
import java.util.HashMap;
import java.util.Map;
import bluej.parser.EscapedUnicodeReader;
import bluej.parser.TokenStream;
| A Java lexer. Breaks up a source stream into tokens.
|
| @author Marion Zalk
|
public final class JavaLexer
implements TokenStream{
private StringBuffer textBuffer = new StringBuffer();
private EscapedUnicodeReader reader;
private int rChar;
private int beginColumn, beginLine, beginPosition;
private int endColumn, endLine, endPosition;
private boolean generateWhitespaceTokens = false;
private boolean handleComments = true;
private static Map<String,Integer> keywords = new HashMap<String,Integer>();
static {
keywords.put("abstract", JavaTokenTypes.ABSTRACT);
keywords.put("assert", JavaTokenTypes.LITERAL_assert);
keywords.put("boolean", JavaTokenTypes.LITERAL_boolean);
keywords.put("break", JavaTokenTypes.LITERAL_break);
keywords.put("byte", JavaTokenTypes.LITERAL_byte);
keywords.put("case", JavaTokenTypes.LITERAL_case);
keywords.put("catch", JavaTokenTypes.LITERAL_catch);
keywords.put("char", JavaTokenTypes.LITERAL_char);
keywords.put("class", JavaTokenTypes.LITERAL_class);
keywords.put("continue", JavaTokenTypes.LITERAL_continue);
keywords.put("default", JavaTokenTypes.LITERAL_default);
keywords.put("do", JavaTokenTypes.LITERAL_do);
keywords.put("double", JavaTokenTypes.LITERAL_double);
keywords.put("else", JavaTokenTypes.LITERAL_else);
keywords.put("enum", JavaTokenTypes.LITERAL_enum);
keywords.put("extends", JavaTokenTypes.LITERAL_extends);
keywords.put("false", JavaTokenTypes.LITERAL_false);
keywords.put("final", JavaTokenTypes.FINAL);
keywords.put("finally", JavaTokenTypes.LITERAL_finally);
keywords.put("float", JavaTokenTypes.LITERAL_float);
keywords.put("for", JavaTokenTypes.LITERAL_for);
keywords.put("goto", JavaTokenTypes.GOTO);
keywords.put("if", JavaTokenTypes.LITERAL_if);
keywords.put("implements", JavaTokenTypes.LITERAL_implements);
keywords.put("import", JavaTokenTypes.LITERAL_import);
keywords.put("instanceof", JavaTokenTypes.LITERAL_instanceof);
keywords.put("int", JavaTokenTypes.LITERAL_int);
keywords.put("interface", JavaTokenTypes.LITERAL_interface);
keywords.put("long", JavaTokenTypes.LITERAL_long);
keywords.put("native", JavaTokenTypes.LITERAL_native);
keywords.put("new", JavaTokenTypes.LITERAL_new);
keywords.put("null", JavaTokenTypes.LITERAL_null);
keywords.put("package", JavaTokenTypes.LITERAL_package);
keywords.put("private", JavaTokenTypes.LITERAL_private);
keywords.put("protected", JavaTokenTypes.LITERAL_protected);
keywords.put("public", JavaTokenTypes.LITERAL_public);
keywords.put("return", JavaTokenTypes.LITERAL_return);
keywords.put("short", JavaTokenTypes.LITERAL_short);
keywords.put("static", JavaTokenTypes.LITERAL_static);
keywords.put("strictfp", JavaTokenTypes.STRICTFP);
keywords.put("super", JavaTokenTypes.LITERAL_super);
keywords.put("switch", JavaTokenTypes.LITERAL_switch);
keywords.put("synchronized", JavaTokenTypes.LITERAL_synchronized);
keywords.put("this", JavaTokenTypes.LITERAL_this);
keywords.put("throw", JavaTokenTypes.LITERAL_throw);
keywords.put("throws", JavaTokenTypes.LITERAL_throws);
keywords.put("transient", JavaTokenTypes.LITERAL_transient);
keywords.put("true", JavaTokenTypes.LITERAL_true);
keywords.put("try", JavaTokenTypes.LITERAL_try);
keywords.put("volatile", JavaTokenTypes.LITERAL_volatile);
keywords.put("while", JavaTokenTypes.LITERAL_while);
keywords.put("void", JavaTokenTypes.LITERAL_void);
}
| Construct a lexer which readers from the given Reader.
|
public JavaLexer(Reader in)
{
this(in, 1, 1, 0);
}
| Construct a lexer which readers from the given Reader.
|
public JavaLexer(Reader in, boolean handleComments)
{
this(in, 1, 1, 0);
this.handleComments = handleComments;
}
| Construct a lexer which readers from the given Reader, assuming that the
| reader is already positioned at the given line and column within the source
| document.
|
public JavaLexer(Reader in, int line, int col, int position)
{
reader = new EscapedUnicodeReader(in);
reader.setLineColPos(line, col, position);
endColumn = beginColumn = col;
endLine = beginLine = line;
endPosition = beginPosition = position;
try {
rChar = reader.read();
}
catch (IOException ioe) {
rChar = -1;
}
}
| Retrieve the next token.
|
public LocatableToken nextToken()
{
textBuffer.setLength(0);
if (generateWhitespaceTokens && Character.isWhitespace((char)rChar))
{
StringBuilder whitespaceBuffer = new StringBuilder();
while (Character.isWhitespace((char)rChar))
{
whitespaceBuffer.append((char)rChar);
readNextChar();
}
return makeToken(JavaTokenTypes.WHITESPACE, whitespaceBuffer.toString());
}
else
{
while (Character.isWhitespace((char)rChar)){
beginLine = reader.getLine();
beginColumn = reader.getColumn();
beginPosition = reader.getPosition();
readNextChar();
}
}
if (rChar == -1) {
return makeToken(JavaTokenTypes.EOF, null);
}
char nextChar = (char) rChar;
if (Character.isJavaIdentifierStart(nextChar)) {
return createWordToken(nextChar);
}
if (Character.isDigit(nextChar)) {
return makeToken(readDigitToken(nextChar, false), textBuffer.toString());
}
return makeToken(getSymbolType(nextChar), textBuffer.toString());
}
| Make a token of the given type, with the given text. The token
| begins where the previous token ended, and ends at the current
| position (as found in endLine and endColumn).
|
private LocatableToken makeToken(int type, String txt)
{
LocatableToken tok = new LocatableToken(type, txt);
tok.setPosition(beginLine, beginColumn, endLine, endColumn, beginPosition, endPosition - beginPosition);
beginColumn = endColumn;
beginLine = endLine;
beginPosition = endPosition;
return tok;
}
private LocatableToken createWordToken(char nextChar)
{
populateTextBuffer(nextChar);
return makeToken(getWordType(), textBuffer.toString());
}
private void populateTextBuffer(char ch)
{
char thisChar=ch;
do {
textBuffer.append(thisChar);
int rval = readNextChar();
if (rval==-1){
return;
}
thisChar=(char)rval;
} while (Character.isJavaIdentifierPart(thisChar)){;
}
private boolean getTokenText(char endChar){
char thisChar=endChar;
}
int rval=0;
boolean complete = false;
boolean escape = false;
while (!complete){
rval=readNextChar();
if (rval==-1){
return false;
}
thisChar = (char)rval;
if (thisChar=='\n'){
return false;
}
textBuffer.append(thisChar);
if (! escape) {
if (thisChar == '\\') {
escape = true;
}
if (thisChar == endChar) {
readNextChar();
return true;
}
}
else {
escape = false;
}
}
return complete;
}
private boolean isHexDigit(char ch)
{
if (Character.isDigit(ch)) {
return true;
}
if (ch >= 'a' && ch <= 'f') {
return true;
}
if (ch >= 'A' && ch <= 'F') {
return true;
}
return false;
}
| Read a numerical literal token.
|
| @param ch The first character of the token (must be a decimal digit)
| @param dot Whether there was a leading dot
|
private int readDigitToken(char ch, boolean dot)
{
int rval = ch;
textBuffer.append(ch);
int type = dot ? JavaTokenTypes.NUM_DOUBLE : JavaTokenTypes.NUM_INT;
boolean fpValid = true;
if (ch == '0' && ! dot) {
rval = readNextChar();
if (rval == 'x' || rval == 'X') {
textBuffer.append((char) rval);
rval = readNextChar();
if (!isHexDigit((char)rval)) {
return JavaTokenTypes.INVALID;
}
do {
textBuffer.append((char) rval);
rval = readNextChar();
} while (isHexDigit((char) rval) || rval == '_'){;
if (rval == 'p' || rval == 'P') {
textBuffer.append((char) rval);
}
return superFunkyHFPL();
}
fpValid = false;
}
else if (rval == 'b' || rval == 'B') {
textBuffer.append((char) rval);
rval = readNextChar();
if (rval != '0' && rval != '1') {
return JavaTokenTypes.INVALID;
}
do {
textBuffer.append((char) rval);
rval = readNextChar();
} while (rval == '0' || rval == '1' || rval == '_'){;
fpValid = false;
}
}
else if (Character.isDigit((char) rval)) {
do {
textBuffer.append((char) rval);
rval = readNextChar();
} while (Character.isDigit((char) rval) || rval == '_'){;
}
ch = (char) rval;
}
}
else {
rval = readNextChar();
while (Character.isDigit((char) rval) || rval == '_'){
textBuffer.append((char) rval);
rval = readNextChar();
}
}
if (rval == '.' && fpValid) {
textBuffer.append((char) rval);
rval = readNextChar();
while (Character.isDigit((char) rval) || rval == '_'){
textBuffer.append((char) rval);
rval = readNextChar();
}
if (rval == 'e' || rval == 'E') {
textBuffer.append((char) rval);
rval = readNextChar();
while (Character.isDigit((char) rval) || rval == '_'){
textBuffer.append((char) rval);
rval = readNextChar();
}
}
if (rval == 'f' || rval == 'F') {
textBuffer.append((char) rval);
rval = readNextChar();
return JavaTokenTypes.NUM_FLOAT;
}
if (rval == 'd' || rval == 'D') {
textBuffer.append((char) rval);
rval = readNextChar();
}
return JavaTokenTypes.NUM_DOUBLE;
}
if ((rval == 'e' || rval == 'E') && fpValid) {
textBuffer.append((char) rval);
rval = readNextChar();
while (Character.isDigit((char) rval) || rval == '_'){
textBuffer.append((char) rval);
rval = readNextChar();
}
type = JavaTokenTypes.NUM_DOUBLE;
}
else if (rval == 'l' || rval == 'L') {
textBuffer.append((char) rval);
rval = readNextChar();
return JavaTokenTypes.NUM_LONG;
}
if (fpValid) {
if (rval == 'f' || rval == 'F') {
textBuffer.append((char) rval);
rval = readNextChar();
return JavaTokenTypes.NUM_FLOAT;
}
if (rval == 'd' || rval == 'D') {
textBuffer.append((char) rval);
rval = readNextChar();
return JavaTokenTypes.NUM_DOUBLE;
}
}
return type;
}
private int superFunkyHFPL()
{
int rval = readNextChar();
if (rval == -1) {
return JavaTokenTypes.INVALID;
}
if (! Character.isDigit((char) rval) && rval != '-') {
return JavaTokenTypes.INVALID;
}
textBuffer.append((char) rval);
rval = readNextChar();
while (Character.isDigit((char) rval)){
textBuffer.append((char) rval);
rval = readNextChar();
}
if (rval == 'f' || rval == 'F') {
textBuffer.append((char) rval);
readNextChar();
return JavaTokenTypes.NUM_FLOAT;
}
if (rval == 'd' || rval == 'D') {
textBuffer.append((char) rval);
readNextChar();
}
return JavaTokenTypes.NUM_DOUBLE;
}
private int getMLCommentType(char ch)
{
do{
textBuffer.append(ch);
int rval = readNextChar();
if (rval == -1) {
return JavaTokenTypes.INVALID;
}
ch=(char)rval;
while (ch=='*'){
textBuffer.append((char)rval);
rval = readNextChar();
if (rval == -1) {
return JavaTokenTypes.INVALID;
}
if (rval == '/') {
textBuffer.append((char)rval);
readNextChar();
return JavaTokenTypes.ML_COMMENT;
}
ch=(char)rval;
}
} while (true){;
}
private int getSLCommentType(char ch){
int rval=ch;
}
do{
textBuffer.append((char)rval);
rval=readNextChar();
if (rval==-1 || rval == '\n') {
return JavaTokenTypes.SL_COMMENT;
}
} while (true){;
}
private int getSymbolType(char ch){
int type= JavaTokenTypes.INVALID;
}
textBuffer.append(ch);
if ('"' == ch)
return getStringLiteral();
if ('\'' == ch)
return getCharLiteral();
if ('?' == ch) {
readNextChar();
return JavaTokenTypes.QUESTION;
}
if (',' == ch) {
readNextChar();
return JavaTokenTypes.COMMA;
}
if (';' == ch) {
readNextChar();
return JavaTokenTypes.SEMI;
}
if (':' == ch) {
int rval = readNextChar();
if (rval == ':') {
textBuffer.append((char)rval);
readNextChar();
return JavaTokenTypes.METHOD_REFERENCE;
}
return JavaTokenTypes.COLON;
}
if ('^' == ch)
return getBXORType();
if ('~' == ch) {
readNextChar();
return JavaTokenTypes.BNOT;
}
if ('(' == ch) {
readNextChar();
return JavaTokenTypes.LPAREN;
}
if (')' == ch) {
readNextChar();
return JavaTokenTypes.RPAREN;
}
if ('[' == ch) {
readNextChar();
return JavaTokenTypes.LBRACK;
}
if (']' == ch) {
readNextChar();
return JavaTokenTypes.RBRACK;
}
if ('{' == ch) {
readNextChar();
return JavaTokenTypes.LCURLY;
}
if ('}' == ch) {
readNextChar();
return JavaTokenTypes.RCURLY;
}
if ('@' == ch) {
readNextChar();
return JavaTokenTypes.AT;
}
if ('&' == ch)
return getAndType();
if ('|' == ch)
return getOrType();
if ('!' == ch)
return getExclamationType();
if ('+' == ch)
return getPlusType();
if ('-' == ch)
return getMinusType();
if ('=' == ch)
return getEqualType();
if ('%' == ch)
return getModType();
if ('/' == ch)
return getForwardSlashType();
if ('.' == ch)
return getDotToken();
if ('*' == ch)
return getStarType();
if ('>' == ch)
return getGTType();
if ('<' == ch)
return getLTType();
readNextChar();
return type;
}
private int getBXORType()
{
char validChars[]=new char[1];
validChars[0]='=';
int rval=readNextChar();
if (rval != '=') {
return JavaTokenTypes.BXOR;
}
char thisChar=(char)rval;
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.BXOR_ASSIGN;
}
private int getAndType()
{
char validChars[]=new char[2];
validChars[0]='=';
validChars[1]='&';
int rval=readNextChar();
char thisChar = (char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.BAND_ASSIGN;
}
if (thisChar=='&'){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.LAND;
}
return JavaTokenTypes.BAND;
}
private int getStringLiteral()
{
boolean success=getTokenText('"');
if (success) {
return JavaTokenTypes.STRING_LITERAL;
}
return JavaTokenTypes.INVALID;
}
private int getCharLiteral()
{
boolean success=getTokenText('\'');
if (success) {
return JavaTokenTypes.CHAR_LITERAL;
}
return JavaTokenTypes.INVALID;
}
private int getOrType()
{
int rval=readNextChar();
char thisChar=(char)rval;
if (thisChar=='=') {
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.BOR_ASSIGN;
}
if (thisChar=='|') {
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.LOR;
}
return JavaTokenTypes.BOR;
}
private int getPlusType()
{
int rval=readNextChar();
char thisChar=(char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.PLUS_ASSIGN;
}
if (thisChar=='+'){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.INC;
}
return JavaTokenTypes.PLUS;
}
private int getMinusType()
{
int rval=readNextChar();
char thisChar=(char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.MINUS_ASSIGN;
}
if (thisChar=='-'){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.DEC;
}
if (thisChar == '>'){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.LAMBDA;
}
return JavaTokenTypes.MINUS;
}
private int getEqualType()
{
int rval = readNextChar();
char thisChar=(char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.EQUAL;
}
return JavaTokenTypes.ASSIGN;
}
private int getStarType()
{
int rval = readNextChar();
char thisChar=(char)rval;
if (thisChar == '=') {
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.STAR_ASSIGN;
}
return JavaTokenTypes.STAR;
}
private int getModType()
{
int rval=readNextChar();
char thisChar=(char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.MOD_ASSIGN;
}
return JavaTokenTypes.MOD;
}
private int getForwardSlashType()
{
int rval=readNextChar();
char thisChar=(char)rval;
if (thisChar=='=') {
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.DIV_ASSIGN;
}
if (thisChar=='/' && handleComments) {
return getSLCommentType(thisChar);
}
if (thisChar=='*' && handleComments) {
return getMLCommentType(thisChar);
}
return JavaTokenTypes.DIV;
}
private int getGTType()
{
int rval=readNextChar();
char thisChar=(char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.GE;
}
if (thisChar=='>'){
textBuffer.append(thisChar);
rval=readNextChar();
thisChar = (char)rval;
if (thisChar=='>') {
textBuffer.append(thisChar);
rval=readNextChar();
thisChar = (char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.BSR_ASSIGN;
}
return JavaTokenTypes.BSR;
}
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.SR_ASSIGN;
}
return JavaTokenTypes.SR;
}
return JavaTokenTypes.GT;
}
private int getLTType()
{
int rval=readNextChar();
char thisChar = (char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.LE;
}
if (thisChar=='<'){
textBuffer.append(thisChar);
rval=readNextChar();
thisChar = (char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.SL_ASSIGN;
}
return JavaTokenTypes.SL;
}
return JavaTokenTypes.LT;
}
private int getExclamationType()
{
int rval=readNextChar();
char thisChar = (char)rval;
if (thisChar=='='){
textBuffer.append(thisChar);
readNextChar();
return JavaTokenTypes.NOT_EQUAL;
}
return JavaTokenTypes.LNOT;
}
private int getDotToken()
{
int rval = readNextChar();
char ch = (char)rval;
if (Character.isDigit(ch)){
return readDigitToken(ch, true);
}
else if (ch=='.'){
textBuffer.append(ch);
rval= readNextChar();
if (rval==-1){
return JavaTokenTypes.INVALID;
}
ch = (char)rval;
if (ch=='.'){
textBuffer.append(ch);
readNextChar();
return JavaTokenTypes.TRIPLE_DOT;
}
else{ return JavaTokenTypes.INVALID;
}
}
return JavaTokenTypes.DOT;
}
private int readNextChar()
{
endColumn = reader.getColumn();
endLine = reader.getLine();
endPosition = reader.getPosition();
try{
rChar = reader.read();
} catch(IOException e) {
rChar = -1;
}
return rChar;
}
private int getWordType()
{
String text=textBuffer.toString();
Integer i = keywords.get(text);
if (i == null) {
return JavaTokenTypes.IDENT;
}
return i;
}
public void setGenerateWhitespaceTokens(boolean generateWhitespaceTokens)
{
this.generateWhitespaceTokens = generateWhitespaceTokens;
}
}
. - JavaLexer
. JavaLexer
. JavaLexer
. JavaLexer
. nextToken
. makeToken
. createWordToken
. populateTextBuffer
. getTokenText
. isHexDigit
. readDigitToken
. superFunkyHFPL
. getMLCommentType
. getSLCommentType
. getSymbolType
. getBXORType
. getAndType
. getStringLiteral
. getCharLiteral
. getOrType
. getPlusType
. getMinusType
. getEqualType
. getStarType
. getModType
. getForwardSlashType
. getGTType
. getLTType
. getExclamationType
. getDotToken
. readNextChar
. getWordType
. setGenerateWhitespaceTokens
1070 neLoCode
+ 14 LoComm