java/alt2xml-in-ini/src/cz/frantovo/alt2xml/in/ini/Reader.java
author František Kučera <franta-hg@frantovo.cz>
Sat, 06 Sep 2014 21:15:47 +0200
changeset 88 9860586b3b87
parent 86 1d31d9cd28c8
child 89 46c7cc4863c1
permissions -rw-r--r--
in-ini: encode section names with spaces
     1 /**
     2  * Alt2XML
     3  * Copyright © 2014 František Kučera (frantovo.cz)
     4  *
     5  * This program is free software: you can redistribute it and/or modify
     6  * it under the terms of the GNU General Public License as published by
     7  * the Free Software Foundation, either version 3 of the License, or
     8  * (at your option) any later version.
     9  *
    10  * This program is distributed in the hope that it will be useful,
    11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
    13  * GNU General Public License for more details.
    14  *
    15  * You should have received a copy of the GNU General Public License
    16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 package cz.frantovo.alt2xml.in.ini;
    19 
    20 import cz.frantovo.alt2xml.AbstractAlt2XmlReader;
    21 import cz.frantovo.alt2xml.in.Alt2ContentHandler;
    22 import static cz.frantovo.alt2xml.in.Functions.encodeXmlName;
    23 import java.io.BufferedReader;
    24 import java.io.IOException;
    25 import java.io.InputStreamReader;
    26 import java.util.ArrayList;
    27 import java.util.List;
    28 import java.util.logging.Level;
    29 import java.util.logging.Logger;
    30 import java.util.regex.Matcher;
    31 import java.util.regex.Pattern;
    32 import org.xml.sax.InputSource;
    33 import org.xml.sax.SAXException;
    34 
    35 /**
    36  *
    37  * @author Ing. František Kučera (frantovo.cz)
    38  */
    39 public class Reader extends AbstractAlt2XmlReader {
    40 
    41 	public static final String ROOT_ELEMENT = "ini";
    42 	private static final Logger log = Logger.getLogger(Reader.class.getName());
    43 
    44 	@Override
    45 	public void parse(InputSource input) throws IOException, SAXException {
    46 		outputStart();
    47 
    48 		try (BufferedReader br = new BufferedReader(new InputStreamReader(input.getByteStream()))) {
    49 			FileContext fc = new FileContext(contentHandler);
    50 			for (String currentLine = br.readLine(); currentLine != null; currentLine = br.readLine()) {
    51 				fc.lineNumber++;
    52 				boolean lineProcessed = false;
    53 				for (LINE_TYPE lineType : LINE_TYPE.values()) {
    54 					lineProcessed = lineType.processLine(currentLine, fc);
    55 					if (lineProcessed) {
    56 						break;
    57 					}
    58 				}
    59 				if (!lineProcessed) {
    60 					log.log(Level.SEVERE, "Invalid line in INI file: {0}", currentLine);
    61 				}
    62 			}
    63 			fc.outputEndSection(fc.lastSection);
    64 
    65 		}
    66 
    67 		outputEnd();
    68 	}
    69 
    70 	private void outputStart() throws SAXException {
    71 		contentHandler.startDocument();
    72 		contentHandler.lineBreak();
    73 		contentHandler.startElement(null, null, ROOT_ELEMENT, null);
    74 		contentHandler.lineBreak();
    75 	}
    76 
    77 	private void outputEnd() throws SAXException {
    78 		contentHandler.endElement(null, null, "ini");
    79 		contentHandler.lineBreak();
    80 		contentHandler.endDocument();
    81 	}
    82 
    83 	private static class FileContext {
    84 
    85 		private final Alt2ContentHandler contentHandler;
    86 		private String lastSection;
    87 		private int lineNumber;
    88 
    89 		public FileContext(Alt2ContentHandler contentHandler) {
    90 			this.contentHandler = contentHandler;
    91 		}
    92 
    93 		protected void outputStartSection(String name) throws SAXException {
    94 			contentHandler.indentation(1);
    95 			contentHandler.startElement(null, null, name, null);
    96 			contentHandler.lineBreak();
    97 		}
    98 
    99 		protected void outputEndSection(String name) throws SAXException {
   100 			if (name != null) {
   101 				contentHandler.indentation(1);
   102 				contentHandler.endElement(null, null, name);
   103 				contentHandler.lineBreak();
   104 			}
   105 		}
   106 	}
   107 
   108 	private static class LineContext {
   109 
   110 		private final Matcher matcher;
   111 
   112 		public LineContext(Matcher matcher) {
   113 			this.matcher = matcher;
   114 		}
   115 	}
   116 
   117 	private enum LINE_TYPE {
   118 
   119 		BLANK_LINE("\\s*") {
   120 					@Override
   121 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   122 						log.log(Level.FINEST, "Line {0}: skipping blank line", fc.lineNumber);
   123 					}
   124 				},
   125 		COMMENT("\\s*(;|#)\\s*(?<comment>.*)") {
   126 					@Override
   127 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   128 						// TODO: comment → LexicalHandler
   129 						log.log(Level.FINER, "Line {0}: comment: {1}", new Object[]{fc.lineNumber, lc.matcher.group("comment")});
   130 					}
   131 
   132 				},
   133 		SECTION("\\s*\\[\\s*(?<name>[^\\]\\]]+)\\s*\\]\\s*") {
   134 					@Override
   135 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   136 						String originalName = lc.matcher.group("name");
   137 						String encodedName = encodeXmlName(originalName);
   138 
   139 						if (!encodedName.equals(originalName)) {
   140 							log.log(Level.FINE, "Line {0}: section name „{1} was encoded to „{2}““", new Object[]{fc.lineNumber, originalName, encodedName});
   141 						}
   142 
   143 						fc.outputEndSection(fc.lastSection);
   144 						fc.outputStartSection(encodedName);
   145 						fc.lastSection = encodedName;
   146 					}
   147 
   148 				},
   149 		ENTRY(
   150 				"\\s*(?<key>[^=\\s]+)\\s*=\\s*\"(?<value>[^']+)\"\\s*((;|#)\\s*(?<comment>.*)){0,1}", // quoted value → include spaces + might have comment
   151 				"\\s*(?<key>[^=\\s]+)\\s*=\\s*'(?<value>[^']+)'\\s*((;|#)\\s*(?<comment>.*)){0,1}", // apostrophed value → include spaces + might have comment
   152 				"\\s*(?<key>[^=\\s]+)\\s*=\\s*(?<value>.+)" // unquoted value → strip spaces + no comments
   153 		) {
   154 					@Override
   155 					public void processLine(LineContext lc, FileContext fc) throws SAXException {
   156 						String key = lc.matcher.group("key");
   157 						String value = lc.matcher.group("value");
   158 
   159 						if (lc.matcher.groupCount() > 2) {
   160 							String comment = lc.matcher.group("comment");
   161 							// TODO: comment → LexicalHandler
   162 							log.log(Level.FINER, "Line {0}: comment for entry „{1}“ is: {2}", new Object[]{fc.lineNumber, key, comment});
   163 
   164 						}
   165 
   166 						fc.contentHandler.indentation(fc.lastSection == null ? 1 : 2);
   167 						fc.contentHandler.textElement(value, null, null, key, null);
   168 						fc.contentHandler.lineBreak();
   169 
   170 					}
   171 
   172 				},;
   173 
   174 		private LINE_TYPE(String... patterns) {
   175 			for (String pattern : patterns) {
   176 				this.patterns.add(Pattern.compile(pattern));
   177 			}
   178 		}
   179 
   180 		private final List<Pattern> patterns = new ArrayList<>();
   181 
   182 		protected boolean processLine(String currentLine, FileContext fc) throws SAXException {
   183 			for (Pattern pattern : patterns) {
   184 				Matcher m = pattern.matcher(currentLine);
   185 				if (m.matches()) {
   186 					log.log(Level.FINEST, "Line {0}: pattern „{1}“ matches „{2}“", new Object[]{fc.lineNumber, pattern, currentLine});
   187 					processLine(new LineContext(m), fc);
   188 					return true;
   189 				}
   190 			}
   191 			return false;
   192 		}
   193 
   194 		public abstract void processLine(LineContext lc, FileContext fc) throws SAXException;
   195 	}
   196 
   197 }