java/alt2xml-in-ini/src/cz/frantovo/alt2xml/in/ini/Reader.java
author František Kučera <franta-hg@frantovo.cz>
Sat, 06 Sep 2014 23:44:26 +0200
changeset 94 f5b287fa69b6
parent 93 0d8e8903d638
child 95 c03497563ce3
permissions -rw-r--r--
in-ini: regex clean-up
     1 /**
     2  * Alt2XML
     3  * Copyright © 2014 František Kučera (frantovo.cz)
     4  *
     5  * This program is free software: you can redistribute it and/or modify
     6  * it under the terms of the GNU General Public License as published by
     7  * the Free Software Foundation, either version 3 of the License, or
     8  * (at your option) any later version.
     9  *
    10  * This program is distributed in the hope that it will be useful,
    11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  * GNU General Public License for more details.
    14  *
    15  * You should have received a copy of the GNU General Public License
    16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 package cz.frantovo.alt2xml.in.ini;
    19 
    20 import cz.frantovo.alt2xml.AbstractAlt2XmlReader;
    21 import cz.frantovo.alt2xml.in.Alt2ContentHandler;
    22 import cz.frantovo.alt2xml.in.Functions;
    23 import java.io.BufferedReader;
    24 import java.io.IOException;
    25 import java.io.InputStreamReader;
    26 import java.util.ArrayList;
    27 import java.util.List;
    28 import java.util.logging.Level;
    29 import java.util.logging.Logger;
    30 import java.util.regex.Matcher;
    31 import java.util.regex.Pattern;
    32 import org.xml.sax.InputSource;
    33 import org.xml.sax.SAXException;
    34 import org.xml.sax.helpers.AttributesImpl;
    35 
    36 /**
    37  *
    38  * @author Ing. František Kučera (frantovo.cz)
    39  */
    40 public class Reader extends AbstractAlt2XmlReader {
    41 
    42 	public static final String ROOT_ELEMENT = "ini";
    43 	private static final Logger log = Logger.getLogger(Reader.class.getName());
    44 
    45 	@Override
    46 	public void parse(InputSource input) throws IOException, SAXException {
    47 		outputStart();
    48 
    49 		try (BufferedReader br = new BufferedReader(new InputStreamReader(input.getByteStream()))) {
    50 			FileContext fc = new FileContext(contentHandler);
    51 			for (String currentLine = br.readLine(); currentLine != null; currentLine = br.readLine()) {
    52 				fc.lineNumber++;
    53 				boolean lineProcessed = false;
    54 				for (LINE_TYPE lineType : LINE_TYPE.values()) {
    55 					lineProcessed = lineType.processLine(currentLine, fc);
    56 					if (lineProcessed) {
    57 						break;
    58 					}
    59 				}
    60 				if (!lineProcessed) {
    61 					log.log(Level.SEVERE, "Invalid line in INI file: {0}", currentLine);
    62 				}
    63 			}
    64 			fc.outputEndSection(fc.lastSection);
    65 
    66 		}
    67 
    68 		outputEnd();
    69 	}
    70 
    71 	private void outputStart() throws SAXException {
    72 		contentHandler.startDocument();
    73 		contentHandler.lineBreak();
    74 		contentHandler.startElement(null, null, ROOT_ELEMENT, null);
    75 		contentHandler.lineBreak();
    76 	}
    77 
    78 	private void outputEnd() throws SAXException {
    79 		contentHandler.endElement(null, null, "ini");
    80 		contentHandler.lineBreak();
    81 		contentHandler.endDocument();
    82 	}
    83 
    84 	private static class FileContext {
    85 
    86 		private final Alt2ContentHandler contentHandler;
    87 		private String lastSection;
    88 		private int lineNumber;
    89 
    90 		public FileContext(Alt2ContentHandler contentHandler) {
    91 			this.contentHandler = contentHandler;
    92 		}
    93 
    94 		protected void outputStartSection(String name) throws SAXException {
    95 			contentHandler.indentation(1);
    96 			contentHandler.startElement(null, null, name, null);
    97 			contentHandler.lineBreak();
    98 		}
    99 
   100 		protected void outputEndSection(String name) throws SAXException {
   101 			if (name != null) {
   102 				contentHandler.indentation(1);
   103 				contentHandler.endElement(null, null, name);
   104 				contentHandler.lineBreak();
   105 			}
   106 		}
   107 	}
   108 
   109 	private static String encodeXmlName(String originalName, int lineNumber) {
   110 		String encodedName = Functions.encodeXmlName(originalName);
   111 		if (!encodedName.equals(originalName)) {
   112 			log.log(Level.FINE, "Line {0}: name „{1} was encoded to „{2}““", new Object[]{lineNumber, originalName, encodedName});
   113 		}
   114 		return encodedName;
   115 	}
   116 
   117 	private static class LineContext {
   118 
   119 		private final Matcher matcher;
   120 
   121 		public LineContext(Matcher matcher) {
   122 			this.matcher = matcher;
   123 		}
   124 	}
   125 
   126 	private enum LINE_TYPE {
   127 
   128 		BLANK_LINE("\\s*") {
   129 					@Override
   130 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   131 						log.log(Level.FINEST, "Line {0}: skipping blank line", fc.lineNumber);
   132 					}
   133 				},
   134 		COMMENT("\\s*(;|#)\\s*(?<comment>.*)") {
   135 					@Override
   136 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   137 						// TODO: comment → LexicalHandler
   138 						log.log(Level.FINER, "Line {0}: comment: {1}", new Object[]{fc.lineNumber, lc.matcher.group("comment")});
   139 					}
   140 
   141 				},
   142 		SECTION("\\s*\\[\\s*(?<name>[^\\]\\]]+)\\s*\\]\\s*") {
   143 					@Override
   144 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   145 						String name = encodeXmlName(lc.matcher.group("name"), fc.lineNumber);
   146 						fc.outputEndSection(fc.lastSection);
   147 						fc.outputStartSection(name);
   148 						fc.lastSection = name;
   149 					}
   150 
   151 				},
   152 		ENTRY(
   153 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*\"(?<value>[^']+)\"\\s*((;|#)\\s*(?<comment>.*))?", // quoted value → include spaces + might have comment
   154 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*'(?<value>[^']+)'\\s*((;|#)\\s*(?<comment>.*))?", // apostrophed value → include spaces + might have comment
   155 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*(?<value>.+)" // unquoted value → strip spaces + no comments
   156 		) {
   157 					@Override
   158 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   159 						String key = encodeXmlName(lc.matcher.group("key"), fc.lineNumber);
   160 						String value = lc.matcher.group("value");
   161 
   162 						if (lc.matcher.groupCount() > 4) {
   163 							String comment = lc.matcher.group("comment");
   164 							// TODO: comment → LexicalHandler
   165 							log.log(Level.FINER, "Line {0}: comment for entry „{1}“ is: {2}", new Object[]{fc.lineNumber, key, comment});
   166 						}
   167 
   168 						AttributesImpl attributes = null;
   169 						String subkey = lc.matcher.group("subkey");
   170 						if (subkey != null) {
   171 							attributes = new AttributesImpl();
   172 							attributes.addAttribute(null, "sub", "sub", "xs:string", subkey);
   173 						}
   174 
   175 						fc.contentHandler.indentation(fc.lastSection == null ? 1 : 2);
   176 						fc.contentHandler.textElement(value, null, null, key, attributes);
   177 						fc.contentHandler.lineBreak();
   178 
   179 					}
   180 
   181 				},;
   182 
   183 		private LINE_TYPE(String... patterns) {
   184 			for (String pattern : patterns) {
   185 				this.patterns.add(Pattern.compile(pattern));
   186 			}
   187 		}
   188 
   189 		private final List<Pattern> patterns = new ArrayList<>();
   190 
   191 		protected boolean processLine(String currentLine, FileContext fc) throws SAXException {
   192 			for (Pattern pattern : patterns) {
   193 				Matcher m = pattern.matcher(currentLine);
   194 				if (m.matches()) {
   195 					log.log(Level.FINEST, "Line {0}: pattern „{1}“ matches „{2}“", new Object[]{fc.lineNumber, pattern, currentLine});
   196 					processLine(new LineContext(m), fc);
   197 					return true;
   198 				}
   199 			}
   200 			return false;
   201 		}
   202 
   203 		public abstract void processLine(LineContext lc, FileContext fc) throws SAXException;
   204 	}
   205 
   206 }