Token.java

/*
 * (c) Copyright 2021 Hasan Selman Kara. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package li.selman.jpbe.dsl.token;

import java.util.regex.Pattern;

/**
 * Restrictions on Regular Expressions:
 * <ul>
 * <li>Kleene star is restricted to <b>one or more</b> occurrences (instead of zero or more)</li>
 * <li>No disjunction operator allowed (or-operator)</li>
 * </ul>
 * These restrictions are in place to efficiently enumerate regular expressions.
 *
 * @author Hasan Selman Kara
 */
public abstract class Token {

    public static final Token START = new Token.StartToken();
    public static final Token END = new Token.EndToken();

    public static final Token ALPHA = new Token.AlphaToken();
    public static final Token LOWER_ALPHA = new Token.LowerAlphaToken();
    public static final Token UPPER_ALPHA = new Token.UpperAlphaToken();

    public static final Token LEADING_ZERO = new Token.LeadingZeroToken();
    public static final Token NUM = new Token.NumToken();
    public static final Token NUM_NO_LEADING_ZEROS = new NumNoLeadingZerosToken();
    public static final Token ALPHA_NUM = new Token.AlphaNumToken();
    public static final Token ALPHA_NUM_NO_LEADING_ZEROS = new Token.AlphaNumNoLeadingZerosToken();

    public static final Token SPACE = new Token.SpaceToken();

    public static final Token COLON = new Token.ColonToken();
    public static final Token SEMI_COLON = new Token.SemiColonToken();

    public static final Token DOT = new Token.DotToken();
    public static final Token COMMA = new Token.CommaToken();

    public static final Token HYPHEN = new Token.HyphenToken();
    public static final Token UNDERSCORE = new Token.UnderscoreToken();

    public static final Token BACK_SLASH = new Token.BackSlashToken();
    public static final Token FORWARD_SLASH = new Token.ForwardSlashToken();

    private final Pattern pattern;

    public Token(Pattern pattern) {
        if (pattern == null) throw new IllegalArgumentException("Pattern cannot be null");

        this.pattern = pattern;
    }

    public final Pattern getPattern() {
        return pattern;
    }

    public final String getRegexPattern() {
        return pattern.pattern();
    }

    /**
     * Check whether a string matches the pattern.
     *
     * @param s to match
     * @return {@code true} if the s matches the Regex
     */
    public boolean matches(String s) {
        return pattern.matcher(s).matches();
    }

    /**
     * Checks whether a single character matches the token.
     *
     * @param c to match
     * @return {@code true} if the c matches the Regex
     */
    public boolean matches(char c) {
        return matches(String.valueOf(c));
    }

    public boolean matches(String s, Token lastToken) {
        return matches(s);
    }

    public boolean matches(char c, Token lastToken) {
        return matches(c);
    }

    @Override
    public String toString() {
        return pattern.toString();
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Token token = (Token) o;

        return pattern.equals(token.pattern);
    }

    @Override
    public int hashCode() {
        return pattern.hashCode();
    }

    // TODO(#idea): do we need Token.EVERYTHING ?
    //  When a token sequence is empty. It matches everything.
    //  I.e. we could represent it with `TokenSequence.of(List.of(Token.EVERYTHING))`
    private static class EverythingToken extends Token {
        EverythingToken() {
            super(Pattern.compile("(.*?)"));
        }
    }

    private static class StartToken extends Token {
        StartToken() {
            super(Pattern.compile("^"));
        }
    }

    private static class EndToken extends Token {
        EndToken() {
            super(Pattern.compile("$"));
        }
    }

    private static class AlphaToken extends Token {
        AlphaToken() {
            super(Pattern.compile("[a-zA-Z]+"));
        }
    }

    private static class LowerAlphaToken extends Token {
        LowerAlphaToken() {
            super(Pattern.compile("[a-z]+"));
        }
    }

    private static class UpperAlphaToken extends Token {
        UpperAlphaToken() {
            super(Pattern.compile("[A-Z]+"));
        }
    }

    private static class LeadingZeroToken extends Token {
        LeadingZeroToken() {
            super(Pattern.compile("(^)[0]+"));
        }

        @Override
        public boolean matches(String s, Token lastToken) {
            return ("0".equals(s) && Token.START.equals(lastToken))
                           || ("0".equals(s) && Token.LEADING_ZERO.equals(lastToken));
        }

        @Override
        public boolean matches(char c, Token lastToken) {
            return this.matches(String.valueOf(c), lastToken);
        }
    }

    private static class NumNoLeadingZerosToken extends Token {
        NumNoLeadingZerosToken() {
            super(Pattern.compile("([1-9]+[0-9]*)"));
        }

        @Override
        public boolean matches(String s, Token lastToken) {
            if ("0".equals(s)) {
                return !Token.LEADING_ZERO.equals(lastToken);
            } else {
                return isPositiveNumeric(s);
            }
        }

        @Override
        public boolean matches(char c, Token lastToken) {
            if (c == '0') {
                return !Token.LEADING_ZERO.equals(lastToken);
            } else {
                return Character.isDigit(c);
            }
        }

        private static boolean isPositiveNumeric(String str) {
            for (int i = 0; i < str.length(); i++) {
                if (!Character.isDigit(str.charAt(i))) return false;
            }
            return true;
        }
    }

    private static class NumToken extends Token {
        NumToken() {
            super(Pattern.compile("[0-9]+"));
        }
    }

    private static class AlphaNumToken extends Token {
        AlphaNumToken() {
            super(Pattern.compile("[a-zA-Z0-9]+"));
        }

        @Override
        public String toString() {
            return "AN";
        }
    }

    private static class AlphaNumNoLeadingZerosToken extends Token {
        AlphaNumNoLeadingZerosToken() {
            super(Pattern.compile("([a-zA-Z1-9]+[0-9]*[a-zA-Z]*)"));
        }

        @Override
        public String toString() {
            return "AN-0";
        }
    }

    private static class SpaceToken extends Token {
        SpaceToken() {
            super(Pattern.compile("[\\s]+"));
        }

        @Override
        public String toString() {
            return "s+";
        }
    }

    private static class DotToken extends Token {
        DotToken() {
            super(Pattern.compile("[\\.]+"));
        }

        @Override
        public String toString() {
            return ".";
        }
    }

    private static class ColonToken extends Token {
        ColonToken() {
            super(Pattern.compile("[:]+"));
        }
    }

    private static class CommaToken extends Token {
        CommaToken() {
            super(Pattern.compile("[\\,]+"));
        }
    }

    private static class SemiColonToken extends Token {
        SemiColonToken() {
            super(Pattern.compile("[;]+"));
        }
    }

    private static class BackSlashToken extends Token {
        BackSlashToken() {
            super(Pattern.compile("[\\\\]+"));
        }
    }

    private static class ForwardSlashToken extends Token {
        ForwardSlashToken() {
            super(Pattern.compile("[/]+"));
        }
    }

    private static class HyphenToken extends Token {
        HyphenToken() {
            super(Pattern.compile("[-]+"));
        }
    }

    private static class UnderscoreToken extends Token {
        UnderscoreToken() {
            super(Pattern.compile("[_]+"));
        }
    }
}