1.1 --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Wed Jul 10 14:32:45 2013 +0200
1.2 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Wed Jul 10 20:48:18 2013 +0200
1.3 @@ -66,9 +66,10 @@
1.4 * @author Ing. František Kučera (frantovo.cz)
1.5 */
1.6 public class Generator {
1.7 -
1.8 +
1.9 private static final Logger log = Logger.getLogger(Generator.class.getName());
1.10 private static final String EML_TO_KEN = "ixumhht68";
1.11 + private String mode;
1.12 private final DocumentBuilderFactory documentBuilderFactory;
1.13 private final DocumentBuilder documentBuilder;
1.14 private final XPathFactory xpathFactory;
1.15 @@ -76,45 +77,53 @@
1.16 private final TransformerFactory xslFactory;
1.17 private final Transformer xsl;
1.18 private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd");
1.19 -
1.20 - public Generator() throws ParserConfigurationException, TransformerConfigurationException {
1.21 - documentBuilderFactory = DocumentBuilderFactory.newInstance();
1.22 - documentBuilderFactory.setNamespaceAware(true);
1.23 - documentBuilder = documentBuilderFactory.newDocumentBuilder();
1.24 -
1.25 - xslFactory = TransformerFactory.newInstance();
1.26 - xsl = xslFactory.newTransformer(new StreamSource("concept.xsl"));
1.27 -
1.28 - xpathFactory = XPathFactory.newInstance();
1.29 - xpath = xpathFactory.newXPath();
1.30 - xpath.setNamespaceContext(getNamespaceContext());
1.31 +
1.32 + public Generator(String mode) throws ParserConfigurationException, TransformerConfigurationException {
1.33 + this.mode = mode;
1.34 +
1.35 + File templateFile = new File("concept." + mode + ".xsl");
1.36 + if (templateFile.exists()) {
1.37 +
1.38 + documentBuilderFactory = DocumentBuilderFactory.newInstance();
1.39 + documentBuilderFactory.setNamespaceAware(true);
1.40 + documentBuilder = documentBuilderFactory.newDocumentBuilder();
1.41 +
1.42 + xslFactory = TransformerFactory.newInstance();
1.43 + xsl = xslFactory.newTransformer(new StreamSource(templateFile));
1.44 +
1.45 + xpathFactory = XPathFactory.newInstance();
1.46 + xpath = xpathFactory.newXPath();
1.47 + xpath.setNamespaceContext(getNamespaceContext());
1.48 + } else {
1.49 + throw new IllegalArgumentException("Invalid mode: " + mode + ". File " + templateFile + " does not exist");
1.50 + }
1.51 }
1.52 -
1.53 +
1.54 private void generate(File folder, String filePrefix) {
1.55 File infoFile = new File(folder, filePrefix + ".ifo");
1.56 File dictFile = new File(folder, filePrefix + ".dict");
1.57 File indexFile = new File(folder, filePrefix + ".idx");
1.58 File synonymFile = new File(folder, filePrefix + ".syn");
1.59 -
1.60 +
1.61 FileOutputStream dictOutputStream = null;
1.62 DataOutputStream synonymOutputStream = null;
1.63 DataOutputStream indexOutputStream = null;
1.64 BufferedWriter infoWriter = null;
1.65 -
1.66 +
1.67 SortedSet<IndexEntry> indexEntries = new TreeSet<>();
1.68 SortedSet<SynonymsEntry> synonymsEntries = new TreeSet<>();
1.69 -
1.70 +
1.71 try {
1.72 dictOutputStream = new FileOutputStream(dictFile);
1.73 synonymOutputStream = new DataOutputStream(new FileOutputStream(synonymFile));
1.74 indexOutputStream = new DataOutputStream(new FileOutputStream(indexFile));
1.75 infoWriter = new BufferedWriter(new FileWriter(infoFile));
1.76 -
1.77 +
1.78 Document sourceDocument = documentBuilder.parse("../../data/dictionary.xml");
1.79 XPathExpression termsXPath = xpath.compile("d:term/@completeForm|d:term/@abbreviation");
1.80 // TODO: tags - labels/descriptions
1.81 xsl.setParameter("tags", sourceDocument.getElementsByTagNameNS(DICTIONARY, "tags").item(0));
1.82 -
1.83 +
1.84 long offset = 0;
1.85 long conceptIndex = 0;
1.86 for (Node conceptNode : nodeIterable(sourceDocument.getElementsByTagNameNS(DICTIONARY, "concept"))) {
1.87 @@ -122,35 +131,35 @@
1.88 xsl.transform(new DOMSource(conceptNode), new StreamResult(conceptXhtml));
1.89 int length = conceptXhtml.size();
1.90 dictOutputStream.write(conceptXhtml.toByteArray());
1.91 -
1.92 +
1.93 NodeList nameNodes = (NodeList) termsXPath.evaluate(conceptNode, XPathConstants.NODESET);
1.94 List<String> names = new ArrayList<>();
1.95 -
1.96 +
1.97 for (Node nameNode : nodeIterable(nameNodes)) {
1.98 String name = nameNode.getTextContent().trim();
1.99 if (!name.isEmpty()) {
1.100 names.add(name);
1.101 }
1.102 }
1.103 -
1.104 +
1.105 String baseName = names.get(0);
1.106 IndexEntry indexEntry = new IndexEntry(baseName, offset, length);
1.107 indexEntries.add(indexEntry);
1.108 -
1.109 +
1.110 for (int i = 1; i < names.size(); i++) {
1.111 String name = names.get(i);
1.112 if (!baseName.equals(name)) {
1.113 synonymsEntries.add(new SynonymsEntry(indexEntry, name));
1.114 }
1.115 }
1.116 -
1.117 +
1.118 offset = offset + length;
1.119 conceptIndex++;
1.120 }
1.121 -
1.122 +
1.123 writeIndex(indexOutputStream, indexEntries);
1.124 writeSynonyms(synonymOutputStream, synonymsEntries);
1.125 -
1.126 +
1.127 indexOutputStream.flush();
1.128 writeInfo(infoWriter, sourceDocument, conceptIndex, synonymsEntries.size(), indexFile.length());
1.129 } catch (SAXException | IOException | TransformerException | XPathExpressionException e) {
1.130 @@ -162,7 +171,7 @@
1.131 close(infoWriter);
1.132 }
1.133 }
1.134 -
1.135 +
1.136 private void writeIndex(DataOutputStream indexOutputStream, SortedSet<IndexEntry> indexEntries) throws IOException {
1.137 long ordinal = 0;
1.138 for (IndexEntry e : indexEntries) {
1.139 @@ -170,13 +179,13 @@
1.140 e.setOrdinal(ordinal++);
1.141 }
1.142 }
1.143 -
1.144 +
1.145 private void writeSynonyms(DataOutputStream synonymOutputStream, SortedSet<SynonymsEntry> synonymsEntries) throws IOException {
1.146 for (SynonymsEntry s : synonymsEntries) {
1.147 s.serialize(synonymOutputStream);
1.148 }
1.149 }
1.150 -
1.151 +
1.152 private void writeInfo(BufferedWriter infoWriter, Document sourceDocument, long wordcount, long synwourdcount, long idxfilesize) throws IOException {
1.153 // TODO: values from document metadata
1.154 infoWriter.write("StarDict's dict ifo file\n");
1.155 @@ -191,18 +200,26 @@
1.156 infoWriter.write("website=https://telco.frantovo.cz\n");
1.157 infoWriter.write("description=A dictionary for telecommunications licensed under GNU FDL\n");
1.158 infoWriter.write("date=" + dateFormat.format(new Date()) + "\n");
1.159 - infoWriter.write("sametypesequence=h\n");
1.160 + infoWriter.write("sametypesequence=" + mode + "\n");
1.161 }
1.162 -
1.163 +
1.164 public static void main(String[] args) {
1.165 File outputFolder = new File("../../delivery/free-telco-dictionary");
1.166 outputFolder.mkdir();
1.167 -
1.168 +
1.169 try {
1.170 - Generator g = new Generator();
1.171 + Generator g = new Generator(parseMode(args));
1.172 g.generate(outputFolder, "telco");
1.173 } catch (ParserConfigurationException | TransformerConfigurationException e) {
1.174 log.log(Level.SEVERE, "error during initialization", e);
1.175 }
1.176 }
1.177 +
1.178 + private static String parseMode(String[] args) {
1.179 + if (args.length == 1) {
1.180 + return args[0];
1.181 + } else {
1.182 + return "h";
1.183 + }
1.184 + }
1.185 }