diff -r d78b74a2abe9 -r 93208f791318 java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Mon Jul 08 23:38:22 2013 +0200 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Tue Jul 09 18:59:07 2013 +0200 @@ -17,13 +17,189 @@ */ package cz.frantovo.telco.dictionary; +import static cz.frantovo.telco.dictionary.Xmlns.*; +import static cz.frantovo.telco.dictionary.Functions.*; +import java.io.BufferedWriter; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; +import javax.xml.xpath.XPath; +import javax.xml.xpath.XPathConstants; +import javax.xml.xpath.XPathExpression; +import javax.xml.xpath.XPathExpressionException; +import javax.xml.xpath.XPathFactory; +import org.w3c.dom.Document; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + /** + *
+ * Generates dictionary files in StarDict format from source in our XML format. + *
+ * + *+ * Number format should be: 32-bits unsigned number in network byte order + *
* * @author Ing. František Kučera (frantovo.cz) */ public class Generator { + private static final Logger log = Logger.getLogger(Generator.class.getName()); + private static final String EMAIL_TOKEN = "ixumhht68"; + private final DocumentBuilderFactory documentBuilderFactory; + private final DocumentBuilder documentBuilder; + private final XPathFactory xpathFactory; + private final XPath xpath; + private final TransformerFactory xslFactory; + private final Transformer xsl; + private final Charset utf8; + + public Generator() throws ParserConfigurationException, TransformerConfigurationException { + utf8 = Charset.forName("UTF-8"); + + documentBuilderFactory = DocumentBuilderFactory.newInstance(); + documentBuilderFactory.setNamespaceAware(true); + documentBuilder = documentBuilderFactory.newDocumentBuilder(); + + xslFactory = TransformerFactory.newInstance(); + xsl = xslFactory.newTransformer(new StreamSource("concept.xsl")); + + xpathFactory = XPathFactory.newInstance(); + xpath = xpathFactory.newXPath(); + xpath.setNamespaceContext(getNamespaceContext()); + } + + private void generate(File folder, String filePrefix) { + File infoFile = new File(folder, filePrefix + ".ifo"); + File dictFile = new File(folder, filePrefix + ".dict"); + File indexFile = new File(folder, filePrefix + ".idx"); + File synonymFile = new File(folder, filePrefix + ".syn"); + + FileOutputStream dictOutputStream = null; + DataOutputStream synonymOutputStream = null; + DataOutputStream indexOutputStream = null; + BufferedWriter infoWriter = null; + + try { + dictOutputStream = new FileOutputStream(dictFile); + synonymOutputStream = new DataOutputStream(new FileOutputStream(synonymFile)); + indexOutputStream = new DataOutputStream(new FileOutputStream(indexFile)); + infoWriter = new BufferedWriter(new FileWriter(infoFile)); + + Document sourceDocument = documentBuilder.parse("../../data/dictionary.xml"); + XPathExpression termsXPath = xpath.compile("d:term/@completeForm|d:term/@abbreviation"); + // TODO: tags - labels/descriptions + xsl.setParameter("tags", sourceDocument.getElementsByTagNameNS(DICTIONARY, "tags").item(0)); + + /** + * TODO: sorting + */ + long offset = 0; + long conceptIndex = 0; + long synonymCount = 0; + for (Node conceptNode : nodeIterable(sourceDocument.getElementsByTagNameNS(DICTIONARY, "concept"))) { + ByteArrayOutputStream conceptXhtml = new ByteArrayOutputStream(); + xsl.transform(new DOMSource(conceptNode), new StreamResult(conceptXhtml)); + int length = conceptXhtml.size(); + dictOutputStream.write(conceptXhtml.toByteArray()); + + NodeList nameNodes = (NodeList) termsXPath.evaluate(conceptNode, XPathConstants.NODESET); + List