# HG changeset patch # User František Kučera # Date 1373482098 -7200 # Node ID e7c9a8722f76a2a735aa79b41060ad753e3fea93 # Parent aecdfc3b19500db48d839994aaca57449f2daeda generator: support multiple modes – XHTML (h) and plain text (m) diff -r aecdfc3b1950 -r e7c9a8722f76 java/dictionary-generator/concept.h.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/java/dictionary-generator/concept.h.xsl Wed Jul 10 20:48:18 2013 +0200 @@ -0,0 +1,75 @@ + + + + + + + + +
+ + + + + + + + + + + +
+ +

+ + +

+ Tags: + + + + + , + +

+
+
+
+ +
diff -r aecdfc3b1950 -r e7c9a8722f76 java/dictionary-generator/concept.m.xsl --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/java/dictionary-generator/concept.m.xsl Wed Jul 10 20:48:18 2013 +0200 @@ -0,0 +1,48 @@ + + + + + + + + + + + : + + + + + + + + + + + + + + + Tags: + + + + , + + + + + diff -r aecdfc3b1950 -r e7c9a8722f76 java/dictionary-generator/concept.xsl --- a/java/dictionary-generator/concept.xsl Wed Jul 10 14:32:45 2013 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,72 +0,0 @@ - - - - - - - -
- - - - - - - - - - - -
- -

- - -

- Tags: - - - - - , - -

-
-
-
- -
diff -r aecdfc3b1950 -r e7c9a8722f76 java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Wed Jul 10 14:32:45 2013 +0200 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Wed Jul 10 20:48:18 2013 +0200 @@ -66,9 +66,10 @@ * @author Ing. František Kučera (frantovo.cz) */ public class Generator { - + private static final Logger log = Logger.getLogger(Generator.class.getName()); private static final String EML_TO_KEN = "ixumhht68"; + private String mode; private final DocumentBuilderFactory documentBuilderFactory; private final DocumentBuilder documentBuilder; private final XPathFactory xpathFactory; @@ -76,45 +77,53 @@ private final TransformerFactory xslFactory; private final Transformer xsl; private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd"); - - public Generator() throws ParserConfigurationException, TransformerConfigurationException { - documentBuilderFactory = DocumentBuilderFactory.newInstance(); - documentBuilderFactory.setNamespaceAware(true); - documentBuilder = documentBuilderFactory.newDocumentBuilder(); - - xslFactory = TransformerFactory.newInstance(); - xsl = xslFactory.newTransformer(new StreamSource("concept.xsl")); - - xpathFactory = XPathFactory.newInstance(); - xpath = xpathFactory.newXPath(); - xpath.setNamespaceContext(getNamespaceContext()); + + public Generator(String mode) throws ParserConfigurationException, TransformerConfigurationException { + this.mode = mode; + + File templateFile = new File("concept." + mode + ".xsl"); + if (templateFile.exists()) { + + documentBuilderFactory = DocumentBuilderFactory.newInstance(); + documentBuilderFactory.setNamespaceAware(true); + documentBuilder = documentBuilderFactory.newDocumentBuilder(); + + xslFactory = TransformerFactory.newInstance(); + xsl = xslFactory.newTransformer(new StreamSource(templateFile)); + + xpathFactory = XPathFactory.newInstance(); + xpath = xpathFactory.newXPath(); + xpath.setNamespaceContext(getNamespaceContext()); + } else { + throw new IllegalArgumentException("Invalid mode: " + mode + ". File " + templateFile + " does not exist"); + } } - + private void generate(File folder, String filePrefix) { File infoFile = new File(folder, filePrefix + ".ifo"); File dictFile = new File(folder, filePrefix + ".dict"); File indexFile = new File(folder, filePrefix + ".idx"); File synonymFile = new File(folder, filePrefix + ".syn"); - + FileOutputStream dictOutputStream = null; DataOutputStream synonymOutputStream = null; DataOutputStream indexOutputStream = null; BufferedWriter infoWriter = null; - + SortedSet indexEntries = new TreeSet<>(); SortedSet synonymsEntries = new TreeSet<>(); - + try { dictOutputStream = new FileOutputStream(dictFile); synonymOutputStream = new DataOutputStream(new FileOutputStream(synonymFile)); indexOutputStream = new DataOutputStream(new FileOutputStream(indexFile)); infoWriter = new BufferedWriter(new FileWriter(infoFile)); - + Document sourceDocument = documentBuilder.parse("../../data/dictionary.xml"); XPathExpression termsXPath = xpath.compile("d:term/@completeForm|d:term/@abbreviation"); // TODO: tags - labels/descriptions xsl.setParameter("tags", sourceDocument.getElementsByTagNameNS(DICTIONARY, "tags").item(0)); - + long offset = 0; long conceptIndex = 0; for (Node conceptNode : nodeIterable(sourceDocument.getElementsByTagNameNS(DICTIONARY, "concept"))) { @@ -122,35 +131,35 @@ xsl.transform(new DOMSource(conceptNode), new StreamResult(conceptXhtml)); int length = conceptXhtml.size(); dictOutputStream.write(conceptXhtml.toByteArray()); - + NodeList nameNodes = (NodeList) termsXPath.evaluate(conceptNode, XPathConstants.NODESET); List names = new ArrayList<>(); - + for (Node nameNode : nodeIterable(nameNodes)) { String name = nameNode.getTextContent().trim(); if (!name.isEmpty()) { names.add(name); } } - + String baseName = names.get(0); IndexEntry indexEntry = new IndexEntry(baseName, offset, length); indexEntries.add(indexEntry); - + for (int i = 1; i < names.size(); i++) { String name = names.get(i); if (!baseName.equals(name)) { synonymsEntries.add(new SynonymsEntry(indexEntry, name)); } } - + offset = offset + length; conceptIndex++; } - + writeIndex(indexOutputStream, indexEntries); writeSynonyms(synonymOutputStream, synonymsEntries); - + indexOutputStream.flush(); writeInfo(infoWriter, sourceDocument, conceptIndex, synonymsEntries.size(), indexFile.length()); } catch (SAXException | IOException | TransformerException | XPathExpressionException e) { @@ -162,7 +171,7 @@ close(infoWriter); } } - + private void writeIndex(DataOutputStream indexOutputStream, SortedSet indexEntries) throws IOException { long ordinal = 0; for (IndexEntry e : indexEntries) { @@ -170,13 +179,13 @@ e.setOrdinal(ordinal++); } } - + private void writeSynonyms(DataOutputStream synonymOutputStream, SortedSet synonymsEntries) throws IOException { for (SynonymsEntry s : synonymsEntries) { s.serialize(synonymOutputStream); } } - + private void writeInfo(BufferedWriter infoWriter, Document sourceDocument, long wordcount, long synwourdcount, long idxfilesize) throws IOException { // TODO: values from document metadata infoWriter.write("StarDict's dict ifo file\n"); @@ -191,18 +200,26 @@ infoWriter.write("website=https://telco.frantovo.cz\n"); infoWriter.write("description=A dictionary for telecommunications licensed under GNU FDL\n"); infoWriter.write("date=" + dateFormat.format(new Date()) + "\n"); - infoWriter.write("sametypesequence=h\n"); + infoWriter.write("sametypesequence=" + mode + "\n"); } - + public static void main(String[] args) { File outputFolder = new File("../../delivery/free-telco-dictionary"); outputFolder.mkdir(); - + try { - Generator g = new Generator(); + Generator g = new Generator(parseMode(args)); g.generate(outputFolder, "telco"); } catch (ParserConfigurationException | TransformerConfigurationException e) { log.log(Level.SEVERE, "error during initialization", e); } } + + private static String parseMode(String[] args) { + if (args.length == 1) { + return args[0]; + } else { + return "h"; + } + } }