commit - /dev/null
commit + 2228b436e294002e5f8b47f275655042c1ea6512
blob - /dev/null
blob + db4f930ea0b1dd5b430845e9ee25393b08910c6c (mode 644)
--- /dev/null
+++ Makefile
+.POSIX:
+
+PREFIX = /usr/local
+MANPREFIX = ${PREFIX}/man
+CFLAGS = -std=c99 -O2 -pipe -Wall -Wextra
+DIST = Makefile lexicgen.c lexicgen.1 example.lx
+
+all: lexicgen
+
+clean:
+ rm -f lexicgen lexicgen.tgz *.core
+
+man:
+ man ./lexicgen.1
+
+run: lexicgen example.lx
+ ./lexicgen -o - example.lx
+
+install: lexicgen
+ mkdir -p ${DESTDIR}${PREFIX}/bin ${DESTDIR}${MANPREFIX}/man1
+ cp -f lexicgen ${DESTDIR}${PREFIX}/bin/
+ cp -f lexicgen.1 ${DESTDIR}${MANPREFIX}/man1/
+
+dist:
+ rm -rf _dist
+ mkdir -p _dist/lexicgen
+ cp -f ${DIST} _dist/lexicgen/
+ tar -C _dist -czf lexicgen.tgz lexicgen
+ rm -rf _dist
+
blob - /dev/null
blob + d709235c3aeb1d28adba8813efebff0e99bfe54e (mode 644)
--- /dev/null
+++ example.lx
+// Declarations
+%import eu.bandm.tools.lexic.*
+%import java.io.IOException
+%import java.io.InputStreamReader
+%import java.nio.charset.StandardCharsets
+
+// Declare an end-of-file token called EOF.
+%eof EOF
+
+// Declare an error token called Error.
+%error Error
+
+Integer = "0" | [1-9][0-9]*;
+Ident = [a-zA-Z_][a-zA-Z_0-9]*;
+
+%%
+// Definitions
+
+Whitespace : [ \n\t\r\f]+;
+// A File can be either an integer, or an identifier.
+File : Integer | Ident;
+
+%%
+// Code
+
+public static void main(String[] args) {
+ // The construct() function is generated by lexicgen and returns a Lexer.
+ var lexer = construct();
+
+ try (var rdr = new InputStreamReader(System.in, StandardCharsets.UTF_8)) {
+ TokenSource<String, TokenType> tokens = lexer
+ .lex(CodePointSource.read(rdr, e -> {}))
+ .removeTypes(TokenType.Whitespace);
+ while (true) {
+ var token = tokens.get();
+ if (token.getType() == TokenType.EOF)
+ break;
+ System.out.println(token);
+ }
+ } catch (IOException e) {}
+}
blob - /dev/null
blob + 8d93f8228b1cff5be3a6ff158972b4d59b460fdb (mode 644)
--- /dev/null
+++ lexicgen.1
+.\"
+.\" Copyright (c) 2023 Benjamin Stürz
+.\"
+.\" Permission to use, copy, modify, and distribute this software for any
+.\" purpose with or without fee is hereby granted, provided that the above
+.\" copyright notice and this permission notice appear in all copies.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+.\" WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+.\" MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+.\" ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+.\" WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+.\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+.\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+.\"
+.Dd $Mdocdate$
+.Dt LEXICGEN 1
+.Os
+.Sh NAME
+.Nm lexicgen
+.Nd lexic generator
+.Sh SYNOPSIS
+.Nm
+.Op Fl v Ar visibility
+.Op Fl p Ar package
+.Op Fl o Ar output-file
+.Ar input-file
+.Sh DESCRIPTION
+.Nm
+is a clone of lex(1) for generating lexers using Lexic.
+See the FORMAT section for information about the format.
+.Pp
+The options are as follows:
+.Bl -tag -width -Ds
+.It Fl v Ar visibility
+Change the visibility of the lexer-internal data structures and functions. (default: private)
+.It Fl p Ar package
+Set a Java package for the generated class.
+.It Fl o Ar output-file
+Write the output to
+.Ar output-file
+instead of a generated path.
+If
+.Ar output-file
+is '-', then write to the standard output.
+.El
+.Sh FORMAT
+A valid file consists of three sections:
+.Bl -bullet -compact
+.It
+Declarations
+.It
+Definitions
+.It
+and Code (optional)
+.El
+
+The declaration section can contain the following statements:
+.Bl -tag -width -Ds
+.It Sy %import Ar package
+Import
+.Ar package .
+.It Sy %eof Ar name
+Declare an end of file token called
+.Ar name .
+
+Note: Should only be specified once.
+.It Sy %error Ar name
+Declare an error token called
+.Ar name .
+
+Note: Should only be specified once.
+.It Sy name No = Sy syntax No ;
+Define a variable called
+.Ar name
+containing
+.Ar syntax
+that can be refered to from token definitions.
+.El
+
+The definition section contains token definitions.
+A token definition consists of a
+.Ar name
+and a
+.Ar syntax expression .
+The name must consist of alphabetic characters.
+
+The following syntax expressions are supported:
+.Bl -tag -width -Ds
+.It Sy ident
+An identifier is a sequence of one or more alphabetic characters.
+Match if the rule identified by
+.Ar ident
+matches.
+.It Qq Sy string
+Match if the input matches all characters of
+.Ar string .
+.It Bq Sy range
+Match, if any of the fragments match.
+If
+.Ar range
+starts with the
+.Dq ^
+character, then negate the matching result.
+The range consists of fragments.
+A fragment can be a single character, an escape sequence or a character range.
+Only single-character escape sequences are supported.
+A character range has the syntax a-b,
+where a is the first character and b is the last.
+.It Pq expr
+Grouping.
+.It expr?
+Zero or one counts of
+.Ar expr .
+.It expr+
+One or more counts of
+.Ar expr .
+.It expr*
+Zero or more counts of
+.Ar expr .
+.It expr1 expr2
+.Ar expr1
+followed by
+.Ar expr2 .
+.It expr1 & expr2
+Only match if both
+.Ar expr1
+and
+.Ar expr2
+match.
+.It expr1 \e expr2
+Match only if
+.Ar expr1
+matches, and
+.Ar expr2
+doesn't.
+.It expr1 | expr2
+Match if either or both of
+.Ar expr1
+or
+.Ar expr2
+match.
+.El
+
+The code sections contains any arbitrary code that will be put into the generated file.
+
+.Sh EXAMPLES
+The following code is an example of a lexicgen file:
+.Bd -literal
+// Declarations
+%import eu.bandm.tools.lexic.*
+%import java.io.IOException
+%import java.io.InputStreamReader
+%import java.nio.charset.StandardCharsets
+
+// Declare an end-of-file token called EOF.
+%eof EOF
+
+// Declare an error token called Error.
+%error Error
+
+Integer = "0" | [1-9][0-9]*;
+Ident = [a-zA-Z_][a-zA-Z_0-9]*;
+
+%%
+// Definitions
+
+Whitespace : [ \en\et\er\ef]+;
+// A File can be either an integer, or an identifier.
+File : Integer | Ident;
+
+%%
+// Code
+
+public static void main(String[] args) {
+ // The construct() function is generated by lexicgen and returns a Lexer.
+ var lexer = construct();
+
+ try (var rdr = new InputStreamReader(System.in, StandardCharsets.UTF_8)) {
+ TokenSource<String, TokenType> tokens = lexer
+ .lex(CodePointSource.read(rdr, e -> {}))
+ .removeTypes(TokenType.Whitespace);
+ while (true) {
+ var token = tokens.get();
+ if (token.getType() == TokenType.EOF)
+ break;
+ System.out.println(token);
+ }
+ } catch (IOException e) {}
+}
+.Ed
+.Sh EXIT STATUS
+.Ex -std
+.Sh SEE ALSO
+.Xr lex 1
+.Sh AUTHORS
+.An Benjamin Stürz Aq Mt benni@stuerz.xyz
blob - /dev/null
blob + ae4d18076db38c922ca23c7c2b616c476942ce81 (mode 644)
--- /dev/null
+++ lexicgen.c
+#ifdef __OpenBSD__
+# define _BSD_SOURCE
+#elif defined(__GNUC__)
+# define _DEFAULT_SOURCE
+# define _GNU_SOURCE
+#endif
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <err.h>
+
+#define new(T) ((T *)calloc (1, sizeof (T)))
+
+enum syntax_type {
+ S_SUB,
+ S_STRING,
+ S_PLUS,
+ S_STAR,
+ S_OPT,
+ S_BINARY,
+ S_ANYOF,
+ S_EXCEPT,
+ S_EPSILON,
+ S_IDENT,
+};
+
+struct syntax {
+ enum syntax_type type;
+ union {
+ struct syntax *syn;
+ char *string;
+ struct {
+ const char *op;
+ struct syntax *left, *right;
+ };
+ struct {
+ char begin, end;
+ };
+
+ };
+};
+
+struct definition {
+ char *name;
+ struct syntax *syn;
+};
+
+enum Visibility {
+ V_PRIVATE,
+ V_PUBLIC,
+ V_PACKAGE,
+};
+
+static const char *visibilities[] = {
+ "private ",
+ "public ",
+ "",
+};
+
+static const char *strip_ws (const char *s)
+{
+
+ while (*s != '\0' && isspace (*s))
+ ++s;
+ return s;
+}
+
+static bool starts_with (const char *s, const char *prefix)
+{
+ return strncmp (s, prefix, strlen (prefix)) == 0;
+}
+
+static char *parse_string (const char **s, char end)
+{
+ char ch, *str;
+ size_t len, cap;
+ bool esc = false;
+
+ len = 0;
+ cap = 10;
+ str = malloc (cap + 1);
+
+ while (1) {
+ ch = *(*s)++;
+ if (ch == end && !esc)
+ break;
+ if (ch == '\0')
+ return NULL;
+
+ esc = (ch == '\\');
+
+ if (len == cap) {
+ cap *= 2;
+ str = realloc (str, cap + 1);
+ }
+
+ str[len++] = ch;
+ }
+ str[len] = '\0';
+ return str;
+}
+
+static char *parse_ident (const char **s)
+{
+ char *str;
+ size_t len, cap;
+
+ len = 0;
+ cap = 10;
+ str = malloc (cap + 1);
+
+ while (1) {
+ if (!isalpha (**s))
+ break;
+
+ if (len == cap) {
+ cap *= 2;
+ str = realloc (str, cap + 1);
+ }
+ str[len++] = *(*s)++;
+ }
+ str[len] = '\0';
+ return str;
+}
+
+static struct syntax *parse_range (struct syntax *syn, const char **s)
+{
+ size_t len, cap;
+ char ch, prev, *str;
+
+ if (**s == '^') {
+ syn->type = S_EXCEPT;
+ ++*s;
+ } else {
+ syn->type = S_ANYOF;
+ }
+
+ len = 0;
+ cap = 10;
+ str = malloc (cap + 1);
+
+ while (1) {
+ ch = *(*s)++;
+ switch (ch) {
+ case '\0':
+ return NULL;
+ case ']':
+ goto end;
+ case '-':
+ if (len == 0)
+ return NULL;
+ ch = *(*s)++;
+
+ if (ch == ']') {
+ if (len == cap) {
+ cap *= 2;
+ str = realloc (str, cap + 1);
+ }
+ str[len++] = '-';
+ goto end;
+ }
+
+ prev = str[len - 1];
+ if (prev > ch)
+ return NULL;
+
+ if ((int)(cap - len) < (ch - prev)) {
+ cap = cap * 2 + (ch - prev);
+ str = realloc (str, cap + 1);
+ }
+
+ for (char i = prev + 1; i <= ch; ++i)
+ str[len++] = i;
+ break;
+ case '\\':
+ ch = *(*s)++;
+ switch (ch) {
+ case 'a': ch = '\a'; break;
+ case 'b': ch = '\b'; break;
+ case 'f': ch = '\f'; break;
+ case 'n': ch = '\n'; break;
+ case 'r': ch = '\r'; break;
+ case 't': ch = '\t'; break;
+ case 'v': ch = '\v'; break;
+ case '\\': ch = '\\'; break;
+ case '\'': ch = '\''; break;
+ case '"': ch = '\"'; break;
+ default: break;
+ }
+
+ /* FALLTHROUGH */
+ default:
+ if (len == cap) {
+ cap *= 2;
+ str = realloc (str, cap + 1);
+ }
+ str[len++] = ch;
+ break;
+ }
+
+ }
+end:
+ str[len++] = '\0';
+ syn->string = realloc (str, len);
+ return syn;
+}
+
+static struct syntax *parse_syn (const char **s);
+static struct syntax *syn_atom (const char **s)
+{
+ struct syntax *syn = new (struct syntax);
+ char ch;
+
+ for (; isspace (**s); ++*s);
+
+ ch = *(*s)++;
+
+ switch (ch) {
+ case '(':
+ syn->type = S_SUB;
+ syn->syn = parse_syn (s);
+ if (syn->syn == NULL || **s != ')')
+ return NULL;
+ ++*s;
+ break;
+ case '[':
+ return parse_range (syn, s);
+ case '"':
+ syn->type = S_STRING;
+ syn->string = parse_string (s, '"');
+ if (syn->string == NULL)
+ return NULL;
+ break;
+ case ';':
+ case ')':
+ case '|':
+ case '&':
+ syn->type = S_EPSILON;
+ --*s;
+ break;
+ default:
+ if (!isalpha (ch)) {
+ warnx ("invalid input: '%c'", ch);
+ return NULL;
+ }
+ --*s;
+ syn->type = S_IDENT;
+ syn->string = parse_ident (s);
+ break;
+ }
+
+ return syn;
+}
+
+static struct syntax *syn_suffix (const char **s)
+{
+ struct syntax *syn, *n;
+ char ch;
+
+ syn = syn_atom (s);
+ if (syn == NULL)
+ return NULL;
+
+ while (**s == '*' || **s == '+' || **s == '?') {
+ n = new (struct syntax);
+ ch = *(*s)++;
+ switch (ch) {
+ case '*':
+ n->type = S_STAR;
+ break;
+ case '+':
+ n->type = S_PLUS;
+ break;
+ case '?':
+ n->type = S_OPT;
+ break;
+ }
+ n->syn = syn;
+ syn = n;
+ }
+
+ return syn;
+}
+
+static struct syntax *syn_andthen (const char **s)
+{
+ struct syntax *syn, *n;
+
+ syn = syn_suffix (s);
+ if (syn == NULL)
+ return NULL;
+
+ while (1) {
+ for (; isspace (**s); ++*s);
+ if (**s == ';' || **s == '|' || **s == ')')
+ break;
+
+ n = new (struct syntax);
+ n->type = S_BINARY;
+ n->op = "andThen";
+ n->left = syn;
+ n->right = syn_suffix (s);
+ if (n->right == NULL)
+ return NULL;
+ syn = n;
+ }
+
+ return syn;
+}
+
+static struct syntax *syn_butonly (const char **s)
+{
+ struct syntax *syn, *n;
+
+ syn = syn_andthen (s);
+ if (syn == NULL)
+ return NULL;
+
+ while (1) {
+ for (; isspace (**s); ++*s);
+ if (**s != '&')
+ break;
+ ++*s;
+
+ n = new (struct syntax);
+ n->type = S_BINARY;
+ n->op = "butOnly";
+ n->left = syn;
+ n->right = syn_andthen (s);
+ if (n->right == NULL)
+ return NULL;
+ syn = n;
+ }
+
+ return syn;
+}
+
+static struct syntax *syn_butnot (const char **s)
+{
+ struct syntax *syn, *n;
+
+ syn = syn_butonly (s);
+ if (syn == NULL)
+ return NULL;
+
+ while (1) {
+ for (; isspace (**s); ++*s);
+ if (**s != '\\')
+ break;
+ ++*s;
+
+ n = new (struct syntax);
+ n->type = S_BINARY;
+ n->op = "butNot";
+ n->left = syn;
+ n->right = syn_butonly (s);
+ if (n->right == NULL)
+ return NULL;
+ syn = n;
+ }
+
+ return syn;
+}
+
+static struct syntax *syn_or (const char **s)
+{
+ struct syntax *syn, *n;
+
+ syn = syn_butnot (s);
+ if (syn == NULL)
+ return NULL;
+
+ while (1) {
+ for (; isspace (**s); ++*s);
+ if (**s != '|')
+ break;
+ ++*s;
+
+ n = new (struct syntax);
+ n->type = S_BINARY;
+ n->op = "orElse";
+ n->left = syn;
+ n->right = syn_butnot (s);
+ if (n->right == NULL)
+ return NULL;
+ syn = n;
+ }
+
+ return syn;
+}
+
+static struct syntax *parse_syn (const char **s)
+{
+ return syn_or (s);
+}
+
+static void print_char (FILE *file, char ch)
+{
+ char str[3] = { '\\', '\0', '\0' };
+ switch (ch) {
+ case '\a': str[1] = 'a'; break;
+ case '\b': str[1] = 'b'; break;
+ case '\f': str[1] = 'f'; break;
+ case '\n': str[1] = 'n'; break;
+ case '\r': str[1] = 'r'; break;
+ case '\t': str[1] = 't'; break;
+ case '\v': str[1] = 'v'; break;
+ case '\\': str[1] = '\\'; break;
+ case '\'': str[1] = '\''; break;
+ default:
+ fprintf (file, "'%c'", ch);
+ return;
+ }
+ fprintf (file, "'%s'", str);
+}
+
+static void print_syntax (FILE *file, const struct syntax *syn, int nesting)
+{
+ switch (syn->type) {
+ case S_EPSILON:
+ fprintf (file, "TokenFragment.epsilon()");
+ break;
+ case S_SUB:
+ print_syntax (file, syn->syn, nesting);
+ break;
+ case S_STRING:
+ fprintf (file, "TokenFragment.of(\"%s\")", syn->string);
+ break;
+ case S_ANYOF:
+ case S_EXCEPT:
+ fprintf (file, "TokenFragment.%s(", syn->type == S_ANYOF ? "anyOf" : "except");
+ print_char (file, syn->string[0]);
+ for (size_t i = 1; syn->string[i] != '\0'; ++i) {
+ fputs (", ", file);
+ print_char (file, syn->string[i]);
+ }
+ fputc (')', file);
+ break;
+ case S_PLUS:
+ print_syntax (file, syn->syn, nesting);
+ fprintf (file, ".plus()");
+ break;
+ case S_STAR:
+ print_syntax (file, syn->syn, nesting);
+ fprintf (file, ".star()");
+ break;
+ case S_OPT:
+ print_syntax (file, syn->syn, nesting);
+ fprintf (file, ".optional()");
+ break;
+ case S_BINARY:
+ print_syntax (file, syn->left, nesting);
+ ++nesting;
+ fputc ('\n', file);
+ for (int i = 0; i < nesting; ++i)
+ fputc ('\t', file);
+ fprintf (file, ".%s(", syn->op);
+ print_syntax (file, syn->right, nesting);
+ fprintf (file, ")");
+ break;
+ case S_IDENT:
+ fprintf (file, "%c%s", tolower (syn->string[0]), syn->string + 1);
+ break;
+ }
+}
+
+static int parse_def (const char *s, struct definition *def)
+{
+ size_t i;
+
+ for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
+ if (i == 0)
+ return 1;
+
+ def->name = strndup (s, i);
+ s += i;
+
+ for (; isspace (*s); ++s);
+
+ if (*s++ != ':')
+ return 1;
+
+ def->syn = parse_syn (&s);
+ if (def->syn == NULL)
+ return 1;
+
+ if (*s != ';')
+ return 1;
+
+ return 0;
+}
+
+static int parse_vardef (const char *s, struct definition *def)
+{
+ size_t i;
+
+ for (i = 0; s[i] != '\0' && isalpha (s[i]); ++i);
+ if (i == 0)
+ return 1;
+
+ def->name = strndup (s, i);
+ s += i;
+
+ for (; isspace (*s); ++s);
+ if (*s++ != '=')
+ return 1;
+
+ def->syn = parse_syn (&s);
+ if (def->syn == NULL)
+ return 1;
+
+ if (*s != ';')
+ return 1;
+
+ return 0;
+}
+
+static int compile (FILE *infile, const char *infilename, FILE *outfile, const char *vis, const char *package, const char *classname)
+{
+ size_t linenum = 0, ntok, tokcap, nvar, varcap;
+ char line[1024];
+ const char *s;
+ struct definition *tokens, *vars;
+ char *eof = NULL;
+ char *error = NULL;
+
+ if (package != NULL)
+ fprintf (outfile, "package %s;\n\n", package);
+
+ nvar = 0;
+ varcap = 10;
+ vars = calloc (varcap, sizeof (struct definition));
+
+ // Parse declarations
+ while (1) {
+ ++linenum;
+ if (fgets (line, sizeof (line), infile) == NULL) {
+ warnx ("%s: %zu: unexpected end of file", infilename, linenum);
+ return 1;
+ }
+
+ line[strcspn (line, "\n")] = '\0';
+ s = strip_ws (line);
+ if (*s == '\0' || starts_with (s, "//"))
+ continue;
+
+ if (strcmp (s, "%%") == 0)
+ break;
+
+ if (starts_with (s, "%import ")) {
+ fprintf (outfile, "%s;\n", s + 1);
+ } else if (starts_with (s, "%eof ")) {
+ s = strip_ws (s + 4);
+ eof = strdup (s);
+ } else if (starts_with (s, "%error ")) {
+ s = strip_ws (s + 6);
+ error = strdup (s);
+ } else {
+ if (nvar == varcap) {
+ varcap *= 2;
+ vars = reallocarray (vars, varcap, sizeof (struct definition));
+ }
+
+ if (parse_vardef (s, &vars[nvar++]) != 0) {
+ warnx ("%s: %zu: unrecognized statement: %s", infilename, linenum, s);
+ return 1;
+ }
+ }
+ }
+
+ if (eof == NULL)
+ warnx ("%s: %zu: end-of-file token not defined. Use `%%eof NAME` to define an EOF token.", infilename, linenum);
+ if (error == NULL)
+ warnx ("%s: %zu: error token not defined. Use `%%error NAME` to define an error token.", infilename, linenum);
+
+ fprintf (outfile, "\npublic class %s {\n", classname);
+
+ ntok = 0;
+ tokcap = 10;
+ tokens = calloc (tokcap, sizeof (struct definition));
+
+ // Parse definitions
+ while (fgets (line, sizeof (line), infile) != NULL) {
+ ++linenum;
+ line[strcspn (line, "\n")] = '\0';
+ s = strip_ws (line);
+ if (*s == '\0' || starts_with (s, "//"))
+ continue;
+
+ if (strcmp (s, "%%") == 0)
+ break;
+
+ if (ntok == tokcap) {
+ tokcap *= 2;
+ tokens = reallocarray (tokens, tokcap, sizeof (struct definition));
+ }
+
+ if (parse_def (s, &tokens[ntok++]) != 0) {
+ warnx ("%s: %zu: invalid token definition", infilename, linenum);
+ return 1;
+ }
+ }
+
+ if (ntok == 0) {
+ warnx ("%s: %zu: no token definitions", infilename, linenum);
+ return 1;
+ }
+
+ fprintf (outfile, "\t%sstatic enum TokenType {\n", vis);
+ if (eof != NULL)
+ fprintf (outfile, "\t\t%s,\n", eof);
+ if (error != NULL)
+ fprintf (outfile, "\t\t%s,\n", error);
+ for (size_t i = 0; i < ntok; ++i) {
+ fprintf (outfile, "\t\t%s,\n", tokens[i].name);
+ }
+ fprintf (outfile, "\t}\n\n");
+
+ if (nvar > 0) {
+ fprintf (outfile, "\t// Variable definitions\n");
+ for (size_t i = 0; i < nvar; ++i) {
+ const struct definition *var = &vars[i];
+ fprintf (outfile,
+ "\t%sstatic final TokenFragment %c%s = \n\t\t",
+ vis,
+ tolower (var->name[0]),
+ var->name + 1);
+ print_syntax (outfile, var->syn, 2);
+ fprintf (outfile, ";\n");
+ }
+ fputc ('\n', outfile);
+ }
+
+
+ fprintf (outfile, "\t// Token Definitions\n");
+ for (size_t i = 0; i < ntok; ++i) {
+ const struct definition *tok = &tokens[i];
+ fprintf (outfile,
+ "\t%sstatic final TokenRule<TokenType> %c%s = new TokenRule<>(TokenType.%s,\n\t\t",
+ vis,
+ tolower (tok->name[0]),
+ tok->name + 1,
+ tok->name);
+ print_syntax (outfile, tok->syn, 2);
+ fprintf (outfile, "\n\t);\n");
+ }
+ fputc ('\n', outfile);
+
+ fprintf (outfile, "\t%sstatic final Lexer construct() {\n", vis);
+ fprintf (outfile, "\t\tfinal var rules = new java.util.ArrayList<TokenRule<TokenType>>();\n");
+ for (size_t i = 0; i < ntok; ++i) {
+ const struct definition *tok = &tokens[i];
+ fprintf (outfile, "\t\trules.add(%c%s);\n", tolower (tok->name[0]), tok->name + 1);
+ }
+
+ fprintf (outfile, "\t\treturn new Lexer(new TokenRuleSet(rules))");
+ if (eof)
+ fprintf (outfile, "\n\t\t\t.setEndType(TokenType.%s)", eof);
+ if (error)
+ fprintf (outfile, "\n\t\t\t.setErrorType(TokenType.%s)", error);
+ fprintf (outfile, ";\n\t}\n\n");
+
+ while (fgets (line, sizeof (line), infile) != NULL) {
+ fputc ('\t', outfile);
+ fputs (line, outfile);
+ }
+
+ fputs ("}\n", outfile);
+
+ return 0;
+}
+
+
+
+
+static int usage (void)
+{
+ fputs ("usage: lexicgen [-v VISIBILITY] [-p PACKAGE] [-o FILE] file\n", stderr);
+ return 1;
+}
+
+static int gen_names (const char *filename, char **outfilename, char **classname)
+{
+ const char *slash, *dot;
+
+ slash = strrchr (filename, '/');
+ if (slash == NULL)
+ slash = filename - 1;
+
+ dot = strchr (slash + 1, '.');
+ if (dot == NULL)
+ dot = slash + 1 + strlen (slash + 1);
+
+ if ((dot - slash) < 2)
+ return -1;
+
+ if (outfilename != NULL)
+ asprintf (outfilename, "%.*s.java", (int)(dot - filename), filename);
+ asprintf (classname, "%.*s", (int)(dot - slash - 1), slash + 1);
+ return 0;
+}
+
+
+int main (int argc, char *argv[])
+{
+ int option;
+ char *package = NULL;
+ char *infilename = NULL, *outfilename = NULL, *classname;
+ FILE *infile, *outfile;
+ enum Visibility vis = V_PRIVATE;
+ int ret;
+
+#ifdef __OpenBSD__
+ if (pledge ("stdio rpath wpath cpath", NULL) != 0)
+ err (1, "pledge()");
+#endif
+
+ while ((option = getopt (argc, argv, "p:v:o:")) != -1) {
+ switch (option) {
+ case 'p':
+ package = optarg;
+ break;
+ case 'v':
+ if (strcmp (optarg, "public") == 0) {
+ vis = V_PUBLIC;
+ } else if (strcmp (optarg, "private") == 0) {
+ vis = V_PRIVATE;
+ } else if (strcmp (optarg, "package") == 0) {
+ vis = V_PACKAGE;
+ } else {
+ errx (1, "Invalid visibility: '%s', expected any of: private, public, package", optarg);
+ }
+ break;
+ case 'o':
+ outfilename = optarg;
+ break;
+ default:
+ return usage ();
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+
+ if (argc != 1)
+ return usage ();
+
+ infilename = argv[0];
+ if (gen_names (infilename, outfilename ? NULL : &outfilename, &classname) != 0)
+ errx (1, "invalid file name: %s", infilename);
+
+ infile = fopen (infilename, "r");
+ if (infile == NULL)
+ err (1, "fopen(\"%s\")", infilename);
+
+ if (strcmp (outfilename, "-") == 0) {
+ outfile = stdout;
+ } else {
+ outfile = fopen (outfilename, "w");
+ if (outfile == NULL)
+ err (1, "fopen(\"%s\")", outfilename);
+ }
+
+#ifdef __OpenBSD__
+ if (pledge ("stdio", NULL) != 0)
+ err (1, "pledge()");
+#endif
+
+ ret = compile (infile, infilename, outfile, visibilities[vis], package, classname);
+
+ fclose (outfile);
+ fclose (infile);
+ if (ret != 0)
+ remove (outfilename);
+ return ret;
+}
+