tagger benchmark
[IRC.git] / Robust / src / Benchmarks / mlp / tagger / original-java / src / tagger / StandardEngine.java
diff --git a/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/StandardEngine.java b/Robust/src/Benchmarks/mlp/tagger/original-java/src/tagger/StandardEngine.java
new file mode 100755 (executable)
index 0000000..b6a600d
--- /dev/null
@@ -0,0 +1,377 @@
+/**\r
+ * StandardEngine class\r
+ * Standard registration of actions\r
+ * Implemented as a subclass of Engine for no good reason\r
+ *\r
+ * @author  Daniel Jackson\r
+ * @version 0, 07/08/01\r
+ */\r
+\r
+package tagger;\r
+import java.io.*;\r
+import java.util.*;\r
+\r
+public class StandardEngine extends Engine {\r
+\r
+       static Engine STANDARD;\r
+\r
+       // reserved words for property files\r
+\r
+       // character table\r
+       static final String index_prop_name = "index";\r
+       static final String font_prop_name = "font";\r
+\r
+       static final String apostrophe_char_name = "quoteright";\r
+       static final String prime_char_name = "prime";\r
+       static final String opensinglequote_char_name = "quoteleft";\r
+       static final String closesinglequote_char_name = "quoteright";\r
+       static final String opendoublequote_char_name = "quotedblleft";\r
+       static final String closedoublequote_char_name = "quotedblright";\r
+       static final String hyphen_char_name = "hyphen";\r
+       static final String endash_char_name = "endash";\r
+       static final String emdash_char_name = "emdash";\r
+       static final String period_char_name = "period";\r
+       static final String twodotleader_char_name = "twodotleader";\r
+       static final String ellipsis_char_name = "ellipsis";\r
+\r
+       static final String ROMAN_COMMANDNAME = "roman";\r
+       static final String BOLD_COMMANDNAME = "bold";\r
+       static final String ITALICS_COMMANDNAME = "italic";\r
+       static final String SUBSCRIPT_COMMANDNAME = "sub";\r
+       static final String SUPERSCRIPT_COMMANDNAME = "sup";\r
+\r
+       // style sheet\r
+       static final String next_style_prop_name = "next";\r
+       static final String default_style_name = "body";\r
+\r
+       public StandardEngine (\r
+                       final Generator generator,\r
+                       final PropertyMap style_map,\r
+                       final PrintStream error_stream, final PrintStream index_stream\r
+                       ) {\r
+\r
+               final PropertyMap char_map = new PropertyMap ();\r
+               final Numbering numbering = new Numbering (style_map, error_stream);\r
+\r
+               // a hack to work around lack of proper closures in Java\r
+               // can't assign to local variable within actions\r
+               class StringBox {\r
+                       String string;\r
+                       StringBox (String s) {string = s;}\r
+                       void set (String s) {string = s;}\r
+               }\r
+               final StringBox current_para_style = new StringBox (default_style_name);\r
+\r
+               // special action for start of paragraph\r
+               // created once, but dynamically inserted and removed\r
+               // so that it's performed once at the start of each paragraph\r
+               final Action paragraph_action = new Action () {\r
+                       boolean first_para = true;\r
+                       public void perform (Token t, Iterator iter) {\r
+                               if (t.type != Token.PARASTYLECOMMAND) {\r
+                                       if (!first_para) generator.linefeed ();\r
+                                       generator.new_para (current_para_style.string);\r
+                                       String numstr = numbering.get_numbering_string (current_para_style.string);\r
+                                       if (numstr.length() != 0) {\r
+                                               // display numbering as evidence of progress\r
+                                               error_stream.println (numstr);\r
+                                               /*\r
+                                               // this doesn't work. not sure why.\r
+                                               // because it becomes a recursive call!\r
+                                               // need an impoverished engine specially for this, without paras?\r
+\r
+                                               Reader numreader = new StringReader (numstr);\r
+                                               try {\r
+                                               Tagger.consume_source (StandardEngine.STANDARD, style_map, numreader);\r
+                                               }\r
+                                               catch (IOException e) {Assert.unreachable ();}\r
+                                               */\r
+                                               generator.plaintext (numstr);\r
+                                               }\r
+\r
+                                       iter.remove ();\r
+                                       first_para = false;\r
+                                       }\r
+                               }};\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               generator.plaintext (t.arg);\r
+                               }},\r
+                       Token.ALPHABETIC);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               generator.plaintext (t.arg);\r
+                               }},\r
+                       Token.NUMERIC);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               generator.plaintext (t.arg);\r
+                               }},\r
+                       Token.WHITESPACE);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               generator.new_line ();\r
+                               }},\r
+                       Token.LINEBREAK);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               put_special_char (generator, char_map, apostrophe_char_name, error_stream, t.line);\r
+                               }},\r
+                       Token.APOSTROPHE);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               put_special_char (generator, char_map, prime_char_name, error_stream, t.line);\r
+                               }},\r
+                       Token.PRIME);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               put_special_char (generator, char_map, opensinglequote_char_name, error_stream, t.line);\r
+                               }},\r
+                       Token.OPENSINGLEQUOTE);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               put_special_char (generator, char_map, closesinglequote_char_name, error_stream, t.line);\r
+                               }},\r
+                       Token.CLOSESINGLEQUOTE);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               put_special_char (generator, char_map, opendoublequote_char_name, error_stream, t.line);\r
+                               }},\r
+                       Token.OPENDOUBLEQUOTE);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               put_special_char (generator, char_map, closedoublequote_char_name, error_stream, t.line);\r
+                               }},\r
+                       Token.CLOSEDOUBLEQUOTE);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               int len = t.arg.length ();\r
+                               if (len == 1)\r
+                                       put_special_char (generator, char_map, hyphen_char_name, error_stream, t.line);\r
+                               else if (len == 2)\r
+                                       put_special_char (generator, char_map, endash_char_name, error_stream, t.line);\r
+                               else if (len == 3)\r
+                                       put_special_char (generator, char_map, emdash_char_name, error_stream, t.line);\r
+                               else\r
+                                       error_stream.println (t.line + ": Too many hyphens: " + t.arg);\r
+                               }},\r
+                       Token.HYPHENS);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               int len = t.arg.length ();\r
+                               if (len == 1)\r
+                                       generator.plaintext (".");\r
+                               else if (len == 2)\r
+                                       put_special_char (generator, char_map, twodotleader_char_name, error_stream, t.line);\r
+                               else if (len == 3)\r
+                                       put_special_char (generator, char_map, ellipsis_char_name, error_stream, t.line);\r
+                               else\r
+                                       error_stream.println (t.line + ": Too many dots: " + t.arg);\r
+                               }},\r
+                       Token.DOTS);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               // open file with given name and load char map from it\r
+                               String file_name = t.arg;\r
+                               try {\r
+                                       File f = new File (file_name);\r
+                                       FileInputStream s = new FileInputStream (f);\r
+                                       InputStreamReader r = new InputStreamReader (s);\r
+                                       PropertyParser p = new PropertyParser (r, error_stream);\r
+                                       char_map.incorporate (p);\r
+                                       } catch (IOException e) {\r
+                                       error_stream.println (t.line + ": Can't open char map file: " + file_name);\r
+                                       }\r
+                               }},\r
+                       Token.LOADCHARMAPCOMMAND);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               // open file with given name and load char map from it\r
+                               String file_name = t.arg;\r
+                               try {\r
+                                       File f = new File (file_name);\r
+                                       FileInputStream s = new FileInputStream (f);\r
+                                       InputStreamReader r = new InputStreamReader (s);\r
+                                       PropertyParser p = new PropertyParser (r, error_stream);\r
+                                       style_map.incorporate (p);\r
+                                       numbering.incorporate ();\r
+                                       } catch (IOException e) {\r
+                                       error_stream.println (t.line + ": Can't open style sheet file: " + file_name);\r
+                                       }\r
+                               }},\r
+                       Token.LOADSTYLESHEETCOMMAND);\r
+\r
+               final Action unsuppress_action = new Action () {\r
+                       public void perform (Token t, Iterator i) {\r
+                               generator.suppress_off ();\r
+                               i.remove ();\r
+                       }};\r
+\r
+               // preamble command switches on output suppression\r
+               // registers action to turn suppression off with paragraph break command\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               generator.suppress_on ();\r
+                               register_by_type (unsuppress_action, Token.PARABREAK);\r
+                               }},\r
+                       Token.PREAMBLECOMMAND);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               String next_style = style_map.get_property (current_para_style.string, next_style_prop_name);\r
+                               if (next_style == null) {\r
+                                       error_stream.println (t.line + ": No next style property given for style: " + current_para_style.string);\r
+                                       return;\r
+                                       }\r
+                               current_para_style.set (next_style);\r
+                               register_for_all (paragraph_action);\r
+                               }},\r
+                       Token.PARABREAK);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               current_para_style.set (t.arg);\r
+                               }},\r
+                       Token.PARASTYLECOMMAND);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               String index = char_map.get_property (t.arg, index_prop_name);\r
+                               if (index == null) {\r
+                                       error_stream.println (t.line + ": No index property given for character: " + t.arg);\r
+                                       return;\r
+                                       }\r
+                               String font = char_map.get_property (t.arg, font_prop_name);\r
+                               // if no font is listed, generate special character in standard font\r
+                               if (font == null)\r
+                                       generator.special_char (index);\r
+                               else\r
+                                       generator.special_char (font, index);\r
+                               }},\r
+                       Token.CHARCOMMAND);\r
+\r
+               register_by_type (new Action () {\r
+                       boolean italic_mode_on = false;\r
+                       public void perform (Token t) {\r
+                               if (italic_mode_on) {\r
+                                       italic_mode_on = false;\r
+                                       generator.pop_format ();\r
+                                       }\r
+                               else {\r
+                                       italic_mode_on = true;\r
+                                       generator.push_format (Generator.ITALICS);\r
+                                       }\r
+                               }},\r
+                       Token.UNDERSCORE);\r
+\r
+               // used to italicize alphabetic tokens in math mode\r
+               final Action push_italics_action = new Action () {\r
+                       public void perform (Token t, Iterator iter) {\r
+                               Assert.assert (t.type == Token.ALPHABETIC);\r
+                               generator.push_format (Generator.ITALICS);\r
+                               }};\r
+               final Action pop_italics_action = new Action () {\r
+                       public void perform (Token t, Iterator iter) {\r
+                               Assert.assert (t.type == Token.ALPHABETIC);\r
+                               generator.pop_format ();\r
+                               }};\r
+\r
+               register_by_type (new Action () {\r
+                       boolean math_mode_on = false;\r
+                       public void perform (Token t) {\r
+                               if (math_mode_on) {\r
+                                       math_mode_on = false;\r
+                                       unregister_by_type (push_italics_action, Token.ALPHABETIC);\r
+                                       unregister_by_type (pop_italics_action, Token.ALPHABETIC);\r
+                                       }\r
+                               else {\r
+                                       math_mode_on = true;\r
+                                       register_by_type_back (pop_italics_action, Token.ALPHABETIC);\r
+                                       register_by_type_front (push_italics_action, Token.ALPHABETIC);\r
+                                       }\r
+                               }},\r
+                       Token.DOLLAR);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               if (t.arg.equals (ROMAN_COMMANDNAME))\r
+                                       generator.push_format (Generator.ROMAN);\r
+                               else if (t.arg.equals (BOLD_COMMANDNAME))\r
+                                       generator.push_format (Generator.BOLD);\r
+                               else if (t.arg.equals (ITALICS_COMMANDNAME))\r
+                                       generator.push_format (Generator.ITALICS);\r
+                               else if (t.arg.equals (SUBSCRIPT_COMMANDNAME))\r
+                                       generator.push_format (Generator.SUBSCRIPT);\r
+                               else if (t.arg.equals (SUPERSCRIPT_COMMANDNAME))\r
+                                       generator.push_format (Generator.SUPERSCRIPT);\r
+                               }},\r
+                       Token.FORMATCOMMAND);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               generator.pop_format ();\r
+                               }},\r
+                       Token.POPFORMATCOMMAND);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               generator.plaintext (t.arg);\r
+                               }},\r
+                       Token.OTHER);\r
+\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               error_stream.println ("... done");\r
+                               }},\r
+                       Token.ENDOFSTREAM);\r
+\r
+               STANDARD = this;\r
+       }\r
+\r
+       /* no actions for these token types:\r
+       COMMENT\r
+       SEPARATORCOMMAND\r
+       */\r
+\r
+/*\r
+       not yet coded:\r
+\r
+       public static final int REFCOMMAND = 32;\r
+       public static final int TAGCOMMAND = 33;\r
+       public static final int CITECOMMAND = 34;\r
+*/\r
+\r
+\r
+       /* general form of action registration is this:\r
+               register_by_type (new Action () {\r
+                       public void perform (Token t) {\r
+                               // put code to be executed for token type here\r
+                               }},\r
+                       Token.TYPENAME);\r
+       */\r
+\r
+       void put_special_char (Generator generator, PropertyMap char_map,\r
+               String char_name, PrintStream error_stream, int line) {\r
+                       String index = char_map.get_property (char_name, index_prop_name);\r
+                       if (index == null) {\r
+                               error_stream.println (line + ": Unresolved character: " + char_name);\r
+                               }\r
+                       else\r
+                               generator.special_char (index);\r
+               }\r
+\r
+}
\ No newline at end of file