java/alt2xml-in-ini/src/cz/frantovo/alt2xml/in/ini/Reader.java
author František Kučera <franta-hg@frantovo.cz>
Thu, 24 Oct 2019 21:56:03 +0200
changeset 111 e4900596abdb
parent 102 a7f7b9094cc3
child 113 871c05ca7118
permissions -rw-r--r--
fix license version: GNU GPLv3
     1 /**
     2  * Alt2XML
     3  * Copyright © 2014 František Kučera (frantovo.cz)
     4  *
     5  * This program is free software: you can redistribute it and/or modify
     6  * it under the terms of the GNU General Public License as published by
     7  * the Free Software Foundation, version 3 of the License.
     8  *
     9  * This program is distributed in the hope that it will be useful,
    10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    12  * GNU General Public License for more details.
    13  *
    14  * You should have received a copy of the GNU General Public License
    15  * along with this program. If not, see <http://www.gnu.org/licenses/>.
    16  */
    17 package cz.frantovo.alt2xml.in.ini;
    18 
    19 import cz.frantovo.alt2xml.AbstractAlt2XmlReader;
    20 import cz.frantovo.alt2xml.in.Alt2ContentHandler;
    21 import cz.frantovo.alt2xml.in.Functions;
    22 import java.io.BufferedReader;
    23 import java.io.IOException;
    24 import java.io.InputStreamReader;
    25 import java.util.ArrayList;
    26 import java.util.List;
    27 import java.util.logging.Level;
    28 import java.util.logging.Logger;
    29 import java.util.regex.Matcher;
    30 import java.util.regex.Pattern;
    31 import org.xml.sax.InputSource;
    32 import org.xml.sax.SAXException;
    33 import org.xml.sax.helpers.AttributesImpl;
    34 
    35 /**
    36  * Reads INI files with sections and entries.
    37  * Example:
    38  * <pre>; this is comment
    39  *random=value outside of any groups
    40  *
    41  *[some_section]
    42  *
    43  *; simple entry:
    44  *key=value
    45  *
    46  *; entry starting/ending with whitespace
    47  *white="  spaces everywhere  " ; might have comment
    48  *alternative='  spaces everywhere  ' ; same
    49  *
    50  *; entries with subkeys:
    51  *key[subkey_a]=value
    52  *key[subkey_b]=value
    53  *
    54  *# alternative way to comment
    55  *
    56  *[another secion]
    57  *yes=there might be spaces in names
    58  *because=they are encoded before putting into XML element names
    59  * </pre>
    60  *
    61  * @author Ing. František Kučera (frantovo.cz)
    62  */
    63 public class Reader extends AbstractAlt2XmlReader {
    64 
    65 	public static final String ROOT_ELEMENT = "ini";
    66 	private static final Logger log = Logger.getLogger(Reader.class.getName());
    67 
    68 	@Override
    69 	public void parse(InputSource input) throws IOException, SAXException {
    70 		outputStart();
    71 
    72 		try (BufferedReader br = new BufferedReader(new InputStreamReader(input.getByteStream()))) {
    73 			FileContext fc = new FileContext(contentHandler);
    74 			for (String currentLine = br.readLine(); currentLine != null; currentLine = br.readLine()) {
    75 				fc.lineNumber++;
    76 				boolean lineProcessed = false;
    77 				for (LINE_TYPE lineType : LINE_TYPE.values()) {
    78 					lineProcessed = lineType.processLine(currentLine, fc);
    79 					if (lineProcessed) {
    80 						break;
    81 					}
    82 				}
    83 				if (!lineProcessed) {
    84 					log.log(Level.SEVERE, "Invalid line in INI file: {0}", currentLine);
    85 				}
    86 			}
    87 			fc.outputEndSection(fc.lastSection);
    88 
    89 		}
    90 
    91 		outputEnd();
    92 	}
    93 
    94 	private void outputStart() throws SAXException {
    95 		contentHandler.startDocument();
    96 		contentHandler.lineBreak();
    97 		contentHandler.startElement(null, null, ROOT_ELEMENT, null);
    98 		contentHandler.lineBreak();
    99 	}
   100 
   101 	private void outputEnd() throws SAXException {
   102 		contentHandler.endElement(null, null, ROOT_ELEMENT);
   103 		contentHandler.lineBreak();
   104 		contentHandler.endDocument();
   105 	}
   106 
   107 	private static class FileContext {
   108 
   109 		private final Alt2ContentHandler contentHandler;
   110 		private String lastSection;
   111 		private int lineNumber;
   112 
   113 		public FileContext(Alt2ContentHandler contentHandler) {
   114 			this.contentHandler = contentHandler;
   115 		}
   116 
   117 		protected void outputStartSection(String name) throws SAXException {
   118 			contentHandler.indentation(1);
   119 			contentHandler.startElement(null, null, name, null);
   120 			contentHandler.lineBreak();
   121 		}
   122 
   123 		protected void outputEndSection(String name) throws SAXException {
   124 			if (name != null) {
   125 				contentHandler.indentation(1);
   126 				contentHandler.endElement(null, null, name);
   127 				contentHandler.lineBreak();
   128 			}
   129 		}
   130 	}
   131 
   132 	private static String encodeXmlName(String originalName, int lineNumber) {
   133 		String encodedName = Functions.encodeXmlName(originalName);
   134 		if (!encodedName.equals(originalName)) {
   135 			log.log(Level.FINE, "Line {0}: name „{1} was encoded to „{2}““", new Object[]{lineNumber, originalName, encodedName});
   136 		}
   137 		return encodedName;
   138 	}
   139 
   140 	private static class LineContext {
   141 
   142 		private final Matcher matcher;
   143 
   144 		public LineContext(Matcher matcher) {
   145 			this.matcher = matcher;
   146 		}
   147 	}
   148 
   149 	private enum LINE_TYPE {
   150 
   151 		BLANK_LINE("\\s*") {
   152 					@Override
   153 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   154 						log.log(Level.FINEST, "Line {0}: skipping blank line", fc.lineNumber);
   155 					}
   156 				},
   157 		COMMENT("\\s*(;|#)\\s*(?<comment>.*)") {
   158 					@Override
   159 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   160 						// TODO: comment → LexicalHandler
   161 						log.log(Level.FINER, "Line {0}: comment: {1}", new Object[]{fc.lineNumber, lc.matcher.group("comment")});
   162 					}
   163 
   164 				},
   165 		SECTION("\\s*\\[\\s*(?<name>[^\\]]+)\\s*\\]\\s*") {
   166 					@Override
   167 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   168 						String name = encodeXmlName(lc.matcher.group("name"), fc.lineNumber);
   169 						fc.outputEndSection(fc.lastSection);
   170 						fc.outputStartSection(name);
   171 						fc.lastSection = name;
   172 					}
   173 
   174 				},
   175 		ENTRY(
   176 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*\"(?<value>[^']+)\"\\s*((;|#)\\s*(?<comment>.*))?", // quoted value → include spaces + might have comment
   177 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*'(?<value>[^']+)'\\s*((;|#)\\s*(?<comment>.*))?", // apostrophed value → include spaces + might have comment
   178 				"\\s*(?<key>[^=\\]]+?[^=\\s\\]]*)(\\[(?<subkey>[^\\]]+)\\])?\\s*=\\s*(?<value>.+)" // unquoted value → strip spaces + no comments
   179 		) {
   180 					@Override
   181 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   182 						String key = encodeXmlName(lc.matcher.group("key"), fc.lineNumber);
   183 						String value = lc.matcher.group("value");
   184 
   185 						if (lc.matcher.groupCount() > 4) {
   186 							String comment = lc.matcher.group("comment");
   187 							// TODO: comment → LexicalHandler
   188 							log.log(Level.FINER, "Line {0}: comment for entry „{1}“ is: {2}", new Object[]{fc.lineNumber, key, comment});
   189 						}
   190 
   191 						AttributesImpl attributes = null;
   192 						String subkey = lc.matcher.group("subkey");
   193 						if (subkey != null) {
   194 							attributes = new AttributesImpl();
   195 							attributes.addAttribute(null, "sub", "sub", "xs:string", subkey);
   196 						}
   197 
   198 						fc.contentHandler.indentation(fc.lastSection == null ? 1 : 2);
   199 						fc.contentHandler.textElement(value, null, null, key, attributes);
   200 						fc.contentHandler.lineBreak();
   201 
   202 					}
   203 
   204 				},;
   205 
   206 		/**
   207 		 * @param patterns regular expression (or expressions) that describes this line type
   208 		 */
   209 		private LINE_TYPE(String... patterns) {
   210 			for (String pattern : patterns) {
   211 				this.patterns.add(Pattern.compile(pattern));
   212 			}
   213 		}
   214 
   215 		private final List<Pattern> patterns = new ArrayList<>();
   216 
   217 		/**
   218 		 *
   219 		 * @param currentLine input line to be parsed
   220 		 * @param fc
   221 		 * @return whether line matches and was thus processed
   222 		 * @throws SAXException
   223 		 */
   224 		protected boolean processLine(String currentLine, FileContext fc) throws SAXException {
   225 			for (Pattern pattern : patterns) {
   226 				Matcher m = pattern.matcher(currentLine);
   227 				if (m.matches()) {
   228 					log.log(Level.FINEST, "Line {0}: pattern „{1}“ matches „{2}“", new Object[]{fc.lineNumber, pattern, currentLine});
   229 					processLine(new LineContext(m), fc);
   230 					return true;
   231 				}
   232 			}
   233 			return false;
   234 		}
   235 
   236 		public abstract void processLine(LineContext lc, FileContext fc) throws SAXException;
   237 	}
   238 }