java/alt2xml-in-ini/src/cz/frantovo/alt2xml/in/ini/Reader.java
author František Kučera <franta-hg@frantovo.cz>
Tue, 28 Oct 2014 00:33:17 +0100
changeset 102 a7f7b9094cc3
parent 95 c03497563ce3
child 111 e4900596abdb
permissions -rw-r--r--
in-ini: use constant for element name
     1 /**
     2  * Alt2XML
     3  * Copyright © 2014 František Kučera (frantovo.cz)
     4  *
     5  * This program is free software: you can redistribute it and/or modify
     6  * it under the terms of the GNU General Public License as published by
     7  * the Free Software Foundation, either version 3 of the License, or
     8  * (at your option) any later version.
     9  *
    10  * This program is distributed in the hope that it will be useful,
    11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  * GNU General Public License for more details.
    14  *
    15  * You should have received a copy of the GNU General Public License
    16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 package cz.frantovo.alt2xml.in.ini;
    19 
    20 import cz.frantovo.alt2xml.AbstractAlt2XmlReader;
    21 import cz.frantovo.alt2xml.in.Alt2ContentHandler;
    22 import cz.frantovo.alt2xml.in.Functions;
    23 import java.io.BufferedReader;
    24 import java.io.IOException;
    25 import java.io.InputStreamReader;
    26 import java.util.ArrayList;
    27 import java.util.List;
    28 import java.util.logging.Level;
    29 import java.util.logging.Logger;
    30 import java.util.regex.Matcher;
    31 import java.util.regex.Pattern;
    32 import org.xml.sax.InputSource;
    33 import org.xml.sax.SAXException;
    34 import org.xml.sax.helpers.AttributesImpl;
    35 
    36 /**
    37  * Reads INI files with sections and entries.
    38  * Example:
    39  * <pre>; this is comment
    40  *random=value outside of any groups
    41  *
    42  *[some_section]
    43  *
    44  *; simple entry:
    45  *key=value
    46  *
    47  *; entry starting/ending with whitespace
    48  *white="  spaces everywhere  " ; might have comment
    49  *alternative='  spaces everywhere  ' ; same
    50  *
    51  *; entries with subkeys:
    52  *key[subkey_a]=value
    53  *key[subkey_b]=value
    54  *
    55  *# alternative way to comment
    56  *
    57  *[another secion]
    58  *yes=there might be spaces in names
    59  *because=they are encoded before putting into XML element names
    60  * </pre>
    61  *
    62  * @author Ing. František Kučera (frantovo.cz)
    63  */
    64 public class Reader extends AbstractAlt2XmlReader {
    65 
    66 	public static final String ROOT_ELEMENT = "ini";
    67 	private static final Logger log = Logger.getLogger(Reader.class.getName());
    68 
    69 	@Override
    70 	public void parse(InputSource input) throws IOException, SAXException {
    71 		outputStart();
    72 
    73 		try (BufferedReader br = new BufferedReader(new InputStreamReader(input.getByteStream()))) {
    74 			FileContext fc = new FileContext(contentHandler);
    75 			for (String currentLine = br.readLine(); currentLine != null; currentLine = br.readLine()) {
    76 				fc.lineNumber++;
    77 				boolean lineProcessed = false;
    78 				for (LINE_TYPE lineType : LINE_TYPE.values()) {
    79 					lineProcessed = lineType.processLine(currentLine, fc);
    80 					if (lineProcessed) {
    81 						break;
    82 					}
    83 				}
    84 				if (!lineProcessed) {
    85 					log.log(Level.SEVERE, "Invalid line in INI file: {0}", currentLine);
    86 				}
    87 			}
    88 			fc.outputEndSection(fc.lastSection);
    89 
    90 		}
    91 
    92 		outputEnd();
    93 	}
    94 
    95 	private void outputStart() throws SAXException {
    96 		contentHandler.startDocument();
    97 		contentHandler.lineBreak();
    98 		contentHandler.startElement(null, null, ROOT_ELEMENT, null);
    99 		contentHandler.lineBreak();
   100 	}
   101 
   102 	private void outputEnd() throws SAXException {
   103 		contentHandler.endElement(null, null, ROOT_ELEMENT);
   104 		contentHandler.lineBreak();
   105 		contentHandler.endDocument();
   106 	}
   107 
   108 	private static class FileContext {
   109 
   110 		private final Alt2ContentHandler contentHandler;
   111 		private String lastSection;
   112 		private int lineNumber;
   113 
   114 		public FileContext(Alt2ContentHandler contentHandler) {
   115 			this.contentHandler = contentHandler;
   116 		}
   117 
   118 		protected void outputStartSection(String name) throws SAXException {
   119 			contentHandler.indentation(1);
   120 			contentHandler.startElement(null, null, name, null);
   121 			contentHandler.lineBreak();
   122 		}
   123 
   124 		protected void outputEndSection(String name) throws SAXException {
   125 			if (name != null) {
   126 				contentHandler.indentation(1);
   127 				contentHandler.endElement(null, null, name);
   128 				contentHandler.lineBreak();
   129 			}
   130 		}
   131 	}
   132 
   133 	private static String encodeXmlName(String originalName, int lineNumber) {
   134 		String encodedName = Functions.encodeXmlName(originalName);
   135 		if (!encodedName.equals(originalName)) {
   136 			log.log(Level.FINE, "Line {0}: name „{1} was encoded to „{2}““", new Object[]{lineNumber, originalName, encodedName});
   137 		}
   138 		return encodedName;
   139 	}
   140 
   141 	private static class LineContext {
   142 
   143 		private final Matcher matcher;
   144 
   145 		public LineContext(Matcher matcher) {
   146 			this.matcher = matcher;
   147 		}
   148 	}
   149 
   150 	private enum LINE_TYPE {
   151 
   152 		BLANK_LINE("\\s*") {
   153 					@Override
   154 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   155 						log.log(Level.FINEST, "Line {0}: skipping blank line", fc.lineNumber);
   156 					}
   157 				},
   158 		COMMENT("\\s*(;|#)\\s*(?<comment>.*)") {
   159 					@Override
   160 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   161 						// TODO: comment → LexicalHandler
   162 						log.log(Level.FINER, "Line {0}: comment: {1}", new Object[]{fc.lineNumber, lc.matcher.group("comment")});
   163 					}
   164 
   165 				},
   166 		SECTION("\\s*\\[\\s*(?<name>[^\\]]+)\\s*\\]\\s*") {
   167 					@Override
   168 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   169 						String name = encodeXmlName(lc.matcher.group("name"), fc.lineNumber);
   170 						fc.outputEndSection(fc.lastSection);
   171 						fc.outputStartSection(name);
   172 						fc.lastSection = name;
   173 					}
   174 
   175 				},
   176 		ENTRY(
   177 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*\"(?<value>[^']+)\"\\s*((;|#)\\s*(?<comment>.*))?", // quoted value → include spaces + might have comment
   178 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*'(?<value>[^']+)'\\s*((;|#)\\s*(?<comment>.*))?", // apostrophed value → include spaces + might have comment
   179 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*(?<value>.+)" // unquoted value → strip spaces + no comments
   180 		) {
   181 					@Override
   182 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   183 						String key = encodeXmlName(lc.matcher.group("key"), fc.lineNumber);
   184 						String value = lc.matcher.group("value");
   185 
   186 						if (lc.matcher.groupCount() > 4) {
   187 							String comment = lc.matcher.group("comment");
   188 							// TODO: comment → LexicalHandler
   189 							log.log(Level.FINER, "Line {0}: comment for entry „{1}“ is: {2}", new Object[]{fc.lineNumber, key, comment});
   190 						}
   191 
   192 						AttributesImpl attributes = null;
   193 						String subkey = lc.matcher.group("subkey");
   194 						if (subkey != null) {
   195 							attributes = new AttributesImpl();
   196 							attributes.addAttribute(null, "sub", "sub", "xs:string", subkey);
   197 						}
   198 
   199 						fc.contentHandler.indentation(fc.lastSection == null ? 1 : 2);
   200 						fc.contentHandler.textElement(value, null, null, key, attributes);
   201 						fc.contentHandler.lineBreak();
   202 
   203 					}
   204 
   205 				},;
   206 
   207 		/**
   208 		 * @param patterns regular expression (or expressions) that describes this line type
   209 		 */
   210 		private LINE_TYPE(String... patterns) {
   211 			for (String pattern : patterns) {
   212 				this.patterns.add(Pattern.compile(pattern));
   213 			}
   214 		}
   215 
   216 		private final List<Pattern> patterns = new ArrayList<>();
   217 
   218 		/**
   219 		 *
   220 		 * @param currentLine input line to be parsed
   221 		 * @param fc
   222 		 * @return whether line matches and was thus processed
   223 		 * @throws SAXException
   224 		 */
   225 		protected boolean processLine(String currentLine, FileContext fc) throws SAXException {
   226 			for (Pattern pattern : patterns) {
   227 				Matcher m = pattern.matcher(currentLine);
   228 				if (m.matches()) {
   229 					log.log(Level.FINEST, "Line {0}: pattern „{1}“ matches „{2}“", new Object[]{fc.lineNumber, pattern, currentLine});
   230 					processLine(new LineContext(m), fc);
   231 					return true;
   232 				}
   233 			}
   234 			return false;
   235 		}
   236 
   237 		public abstract void processLine(LineContext lc, FileContext fc) throws SAXException;
   238 	}
   239 }