package Compiler; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** * The lexical scanner, which takes the source code and extracts a list of token objects */ public class TokenScanner { private String sourceCode; List tokens = new ArrayList<>(); private int tokenStart=0; private int currentLoc=0; private int line=0; /** * Method for extracting tokens, checking each character at a time * @param sourceCode the original source code as a string * @return a list of tokens represeting the source code */ List extractTokens(String sourceCode){ this.sourceCode=sourceCode; //Read until the end of the source code as been reached while (!checkEOF()){ tokenStart=currentLoc; readToken(); } //Add a EOF token on the end tokens.add(new Token(TokenType.EOF, "", null,line)); return tokens; } /** * Extract a single token */ private void readToken(){ //Get the current character and find the matching token char checkChar = sourceCode.charAt(currentLoc); switch(checkChar){ case ' ':break; //Advance line when line break found case '\n': line++; break; case '\r':break; case '\t': break; case '(': createTokenNull(TokenType.LEFT_PAREN); break; case ')': createTokenNull(TokenType.RIGHT_PAREN); break; case '+': createTokenNull(TokenType.PLUS); break; case '-': createTokenNull(TokenType.MINUS); break; case ';': createTokenNull(TokenType.SEMI_COLON); break; case ',': createTokenNull(TokenType.COMMA); break; //Some tokens are multiple characters long //so need to check next char as well case '*': if (checkNextChar('*')){ createTokenNull(TokenType.EXPONENT); break; } else{ createTokenNull(TokenType.STAR); break; } case '/': if (checkNextChar('=')){ createTokenNull(TokenType.NOT_EQUAL); break; } else{ createTokenNull(TokenType.SLASH); break; } case '=': if (checkNextChar('=')){ createTokenNull(TokenType.EQUALITY); break; } else { createTokenNull(TokenType.EQUALS); break; } case ':': if (checkNextChar(':')){ createTokenNull(TokenType.DEFINE); break; } else { createTokenNull(TokenType.COLON); break; } case '<': if (checkNextChar('=')){ createTokenNull(TokenType.LESS_EQUAL); break; } else { createTokenNull(TokenType.LESS); break; } case '>': if (checkNextChar('=')){ createTokenNull(TokenType.GREATER_EQUAL); break; } else { createTokenNull(TokenType.GREATER); break; } case '"': while(lookAhead()!='"' && !checkEOF()){ currentLoc++; } if(checkEOF()){ Language.displayError(line, "Strings must end with a closing \""); break; } currentLoc++; createToken(TokenType.STRING, sourceCode.substring(tokenStart, currentLoc+1)); break; case '!': while(lookAhead()!='\n' && !checkEOF()){ currentLoc++; } break; case '.': if(checkIsAlpha(lookAhead())) while (checkIsAlpha(lookAhead())){ currentLoc++; } String logical = sourceCode.substring(tokenStart+1, currentLoc+1); if (checkNextChar('.')){ if (logical.equals("and")){ createTokenNull(TokenType.AND); break; } else if(logical.equals("or")){ createTokenNull(TokenType.OR); break; } else if(logical.equals("not")){ createTokenNull(TokenType.NOT); break; } else{ Language.displayError(line, "Expected logical expression"); } } else { Language.displayError(line, "Expected '.' after logical expression"); } //Find tokens starting with alphanumeric characters default: //Check for numeric characters if (checkIsDigit(checkChar)){ String type = "int"; while (checkIsDigit(lookAhead())){ currentLoc++; } //Check if number contains a decimal point if (lookAhead()=='.' && checkIsDigit(lookTwoAhead())){ type="double"; currentLoc++; while (checkIsDigit(lookAhead())){ currentLoc++; } } if (type.equals("double")){ createToken(TokenType.NUMBER, Double.parseDouble(sourceCode.substring(tokenStart, currentLoc+1))); } else{ createToken(TokenType.NUMBER, Integer.parseInt(sourceCode.substring(tokenStart, currentLoc+1))); } } //Check alphabetical character else if (checkIsAlpha(checkChar)){ while (checkIsAlpha(lookAhead())){ currentLoc++; } String text = sourceCode.substring(tokenStart, currentLoc+1); //Compare against a list of keywords in the language TokenType type = keywords.get(text); if(type == null){ createToken(TokenType.IDENTIFIER, text); } else{ createToken(type, text); } } else { //Report an unknown character Language.displayError(line,"Unexpected Character"); } } currentLoc++; } /** * Method to check the end of the source code * @return if the end of the source code has been reached */ private boolean checkEOF(){ return currentLoc>=sourceCode.length(); } /** * Create a token with a value of null * @param type the token type */ private void createTokenNull(TokenType type){ createToken(type, null); } /** * Create a token and add to the list * @param type the token type * @param value the value of the token */ private void createToken(TokenType type, Object value){ String tokenText = sourceCode.substring(tokenStart, currentLoc+1); tokens.add(new Token(type, tokenText, value, line)); } /** * Compare the next character in the source code to a given character * @param matchChar the character to compare against * @return if the character matches */ private boolean checkNextChar(char matchChar){ if (checkEOF()){ return false; } if (sourceCode.charAt(currentLoc+1)==matchChar){ currentLoc++; return true; } return false; } /** * gets the next character in the source code * @return the next character */ private char lookAhead(){ if (currentLoc+1>=sourceCode.length()){ return ' '; } else { return sourceCode.charAt(currentLoc+1); } } /** * look at the character two ahead in the source code * @return the character two ahead */ private char lookTwoAhead(){ if (currentLoc+2>=sourceCode.length()){ return ' '; } else { return sourceCode.charAt(currentLoc+2); } } /** * checks if a given character is numerical * @param checkChar the character to check * @return if the character is numerical */ private boolean checkIsDigit(char checkChar){ return checkChar>='0' && checkChar<='9'; } /** * check if a character is alphabetical * @param checkChar the character to check * @return if the character is alphabetical */ private boolean checkIsAlpha(char checkChar){ return ('a'<=checkChar && checkChar<='z')|| ('A'<=checkChar && checkChar<='Z'); } //A hashmap of the keywords used in the language private static final Map keywords; static { keywords = new HashMap<>(); keywords.put("int", TokenType.INT); keywords.put("len", TokenType.LEN); keywords.put("real", TokenType.REAL); keywords.put("character", TokenType.STRING); keywords.put("print", TokenType.PRINT); keywords.put("endprint", TokenType.ENDPRINT); keywords.put("if", TokenType.IF); keywords.put("then", TokenType.THEN); keywords.put("end", TokenType.END); keywords.put("else", TokenType.ELSE); keywords.put("do", TokenType.DO); keywords.put("while", TokenType.WHILE); keywords.put("dimension", TokenType.DIMENSION); keywords.put("program", TokenType.PROGRAM); keywords.put("return", TokenType.RETURN); keywords.put("function", TokenType.FUNCTION); keywords.put("subroutine", TokenType.SUBROUTINE); } }