1.1 --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Tue Jul 09 22:41:49 2013 +0200
1.2 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Tue Jul 09 22:42:32 2013 +0200
1.3 @@ -26,10 +26,10 @@
1.4 import java.io.FileOutputStream;
1.5 import java.io.FileWriter;
1.6 import java.io.IOException;
1.7 -import java.nio.ByteBuffer;
1.8 -import java.nio.charset.Charset;
1.9 import java.util.ArrayList;
1.10 import java.util.List;
1.11 +import java.util.SortedSet;
1.12 +import java.util.TreeSet;
1.13 import java.util.logging.Level;
1.14 import java.util.logging.Logger;
1.15 import javax.xml.parsers.DocumentBuilder;
1.16 @@ -66,18 +66,15 @@
1.17 public class Generator {
1.18
1.19 private static final Logger log = Logger.getLogger(Generator.class.getName());
1.20 - private static final String EMAIL_TOKEN = "ixumhht68";
1.21 + private static final String EML_TO_KEN = "ixumhht68";
1.22 private final DocumentBuilderFactory documentBuilderFactory;
1.23 private final DocumentBuilder documentBuilder;
1.24 private final XPathFactory xpathFactory;
1.25 private final XPath xpath;
1.26 private final TransformerFactory xslFactory;
1.27 private final Transformer xsl;
1.28 - private final Charset utf8;
1.29
1.30 public Generator() throws ParserConfigurationException, TransformerConfigurationException {
1.31 - utf8 = Charset.forName("UTF-8");
1.32 -
1.33 documentBuilderFactory = DocumentBuilderFactory.newInstance();
1.34 documentBuilderFactory.setNamespaceAware(true);
1.35 documentBuilder = documentBuilderFactory.newDocumentBuilder();
1.36 @@ -101,6 +98,9 @@
1.37 DataOutputStream indexOutputStream = null;
1.38 BufferedWriter infoWriter = null;
1.39
1.40 + SortedSet<IndexEntry> indexEntries = new TreeSet<>();
1.41 + SortedSet<SynonymsEntry> synonymsEntries = new TreeSet<>();
1.42 +
1.43 try {
1.44 dictOutputStream = new FileOutputStream(dictFile);
1.45 synonymOutputStream = new DataOutputStream(new FileOutputStream(synonymFile));
1.46 @@ -112,12 +112,8 @@
1.47 // TODO: tags - labels/descriptions
1.48 xsl.setParameter("tags", sourceDocument.getElementsByTagNameNS(DICTIONARY, "tags").item(0));
1.49
1.50 - /**
1.51 - * TODO: sorting
1.52 - */
1.53 long offset = 0;
1.54 long conceptIndex = 0;
1.55 - long synonymCount = 0;
1.56 for (Node conceptNode : nodeIterable(sourceDocument.getElementsByTagNameNS(DICTIONARY, "concept"))) {
1.57 ByteArrayOutputStream conceptXhtml = new ByteArrayOutputStream();
1.58 xsl.transform(new DOMSource(conceptNode), new StreamResult(conceptXhtml));
1.59 @@ -134,15 +130,23 @@
1.60 }
1.61 }
1.62
1.63 - synonymCount = +writeSynonyms(synonymOutputStream, names, conceptIndex);
1.64 - writeIndex(indexOutputStream, names.get(0), offset, length);
1.65 + IndexEntry indexEntry = new IndexEntry(names.get(0), offset, length);
1.66 + indexEntries.add(indexEntry);
1.67 +
1.68 + for (int i = 1; i < names.size(); i++) {
1.69 + String name = names.get(i);
1.70 + synonymsEntries.add(new SynonymsEntry(indexEntry, name));
1.71 + }
1.72
1.73 offset = offset + length;
1.74 conceptIndex++;
1.75 }
1.76
1.77 + writeIndex(indexOutputStream, indexEntries);
1.78 + writeSynonyms(synonymOutputStream, synonymsEntries);
1.79 +
1.80 indexOutputStream.flush();
1.81 - writeInfo(infoWriter, sourceDocument, conceptIndex + 1, synonymCount, indexFile.length());
1.82 + writeInfo(infoWriter, sourceDocument, conceptIndex + 1, synonymsEntries.size(), indexFile.length());
1.83 } catch (SAXException | IOException | TransformerException | XPathExpressionException e) {
1.84 log.log(Level.SEVERE, "unable to generate", e);
1.85 } finally {
1.86 @@ -153,24 +157,17 @@
1.87 }
1.88 }
1.89
1.90 - private void writeIndex(DataOutputStream indexOutputStream, String name, long offset, long length) throws IOException {
1.91 - indexOutputStream.write(name.getBytes(utf8));
1.92 - indexOutputStream.write(0);
1.93 - indexOutputStream.writeInt((int) offset); // unsigned int 32
1.94 - indexOutputStream.writeInt((int) length); // unsigned int 32
1.95 + private void writeIndex(DataOutputStream indexOutputStream, SortedSet<IndexEntry> indexEntries) throws IOException {
1.96 + long ordinal = 0;
1.97 + for (IndexEntry e : indexEntries) {
1.98 + e.serialize(indexOutputStream);
1.99 + e.setOrdinal(ordinal++);
1.100 + }
1.101 }
1.102
1.103 - private int writeSynonyms(DataOutputStream synonymOutputStream, List<String> names, long baseIndex) throws IOException {
1.104 - if (names.size() > 1) {
1.105 - for (int i = 1; i < names.size(); i++) {
1.106 - String name = names.get(i);
1.107 - synonymOutputStream.write(name.getBytes(utf8));
1.108 - synonymOutputStream.write(0);
1.109 - synonymOutputStream.writeInt((int) baseIndex); // unsigned int 32
1.110 - }
1.111 - return names.size() - 1;
1.112 - } else {
1.113 - return 0;
1.114 + private void writeSynonyms(DataOutputStream synonymOutputStream, SortedSet<SynonymsEntry> synonymsEntries) throws IOException {
1.115 + for (SynonymsEntry s : synonymsEntries) {
1.116 + s.serialize(synonymOutputStream);
1.117 }
1.118 }
1.119
1.120 @@ -184,7 +181,7 @@
1.121 infoWriter.write("idxfilesize=" + idxfilesize + "\n");
1.122 infoWriter.write("idxoffsetbits=32\n");
1.123 infoWriter.write("author=František Kučera\n");
1.124 - infoWriter.write("email=telco-dictionary." + EMAIL_TOKEN + "@" + "frantovo.cz\n");
1.125 + infoWriter.write("em" + "ail=telco" + "-dictionary." + EML_TO_KEN + "@" + "fran" + "tovo.cz\n");
1.126 infoWriter.write("website=https://telco.frantovo.cz\n");
1.127 infoWriter.write("description=A dictionary for telecommunications licensed under GNU FDL\n");
1.128 infoWriter.write("date=2013.07.09\n");
2.1 --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/IndexEntry.java Tue Jul 09 22:41:49 2013 +0200
2.2 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/IndexEntry.java Tue Jul 09 22:42:32 2013 +0200
2.3 @@ -17,14 +17,69 @@
2.4 */
2.5 package cz.frantovo.telco.dictionary;
2.6
2.7 +import java.io.DataOutputStream;
2.8 +import java.io.IOException;
2.9 +import java.nio.charset.StandardCharsets;
2.10 +import java.util.Objects;
2.11 +
2.12 /**
2.13 *
2.14 * @author Ing. František Kučera (frantovo.cz)
2.15 */
2.16 public class IndexEntry implements Comparable<IndexEntry> {
2.17
2.18 + private String name;
2.19 + private long offset;
2.20 + private long length;
2.21 + private long ordinal;
2.22 +
2.23 + public IndexEntry(String name, long offset, long length) {
2.24 + this.name = name;
2.25 + this.offset = offset;
2.26 + this.length = length;
2.27 + }
2.28 +
2.29 + public void serialize(DataOutputStream indexOutputStream) throws IOException {
2.30 + indexOutputStream.write(name.getBytes(StandardCharsets.UTF_8));
2.31 + indexOutputStream.write(0);
2.32 + indexOutputStream.writeInt((int) offset); // unsigned int 32
2.33 + indexOutputStream.writeInt((int) length); // unsigned int 32
2.34 + }
2.35 +
2.36 + public void setOrdinal(long ordinal) {
2.37 + this.ordinal = ordinal;
2.38 + }
2.39 +
2.40 + public long getOrdinal() {
2.41 + return ordinal;
2.42 + }
2.43 +
2.44 @Override
2.45 public int compareTo(IndexEntry o) {
2.46 - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
2.47 + int nameDiff = name.compareTo(o.name);
2.48 + if (nameDiff == 0) {
2.49 + int offsetDiff = ((Long) offset).compareTo(o.offset);
2.50 + if (offsetDiff == 0) {
2.51 + return ((Long) length).compareTo(o.length);
2.52 + } else {
2.53 + return offsetDiff;
2.54 + }
2.55 + } else {
2.56 + return nameDiff;
2.57 + }
2.58 + }
2.59 +
2.60 + @Override
2.61 + public boolean equals(Object o) {
2.62 + return o instanceof IndexEntry && compareTo((IndexEntry) o) == 0;
2.63 + }
2.64 +
2.65 + @Override
2.66 + public int hashCode() {
2.67 + int hash = 5;
2.68 + hash = 53 * hash + Objects.hashCode(this.name);
2.69 + hash = 53 * hash + (int) (this.offset ^ (this.offset >>> 32));
2.70 + hash = 53 * hash + (int) (this.length ^ (this.length >>> 32));
2.71 + return hash;
2.72 }
2.73 }
3.1 --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/SynonymsEntry.java Tue Jul 09 22:41:49 2013 +0200
3.2 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/SynonymsEntry.java Tue Jul 09 22:42:32 2013 +0200
3.3 @@ -17,14 +17,51 @@
3.4 */
3.5 package cz.frantovo.telco.dictionary;
3.6
3.7 +import java.io.DataOutputStream;
3.8 +import java.io.IOException;
3.9 +import java.nio.charset.StandardCharsets;
3.10 +import java.util.Objects;
3.11 +
3.12 /**
3.13 *
3.14 * @author Ing. František Kučera (frantovo.cz)
3.15 */
3.16 public class SynonymsEntry implements Comparable<SynonymsEntry> {
3.17
3.18 + private IndexEntry base;
3.19 + private String name;
3.20 +
3.21 + public SynonymsEntry(IndexEntry base, String name) {
3.22 + this.base = base;
3.23 + this.name = name;
3.24 + }
3.25 +
3.26 + public void serialize(DataOutputStream synonymOutputStream) throws IOException {
3.27 + synonymOutputStream.write(name.getBytes(StandardCharsets.UTF_8));
3.28 + synonymOutputStream.write(0);
3.29 + synonymOutputStream.writeInt((int) base.getOrdinal()); // unsigned int 32
3.30 + }
3.31 +
3.32 @Override
3.33 public int compareTo(SynonymsEntry o) {
3.34 - throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates.
3.35 + int nameDiff = name.compareTo(o.name);
3.36 + if (nameDiff == 0) {
3.37 + return base.compareTo(o.base);
3.38 + } else {
3.39 + return nameDiff;
3.40 + }
3.41 + }
3.42 +
3.43 + @Override
3.44 + public boolean equals(Object o) {
3.45 + return o instanceof IndexEntry && compareTo((SynonymsEntry) o) == 0;
3.46 + }
3.47 +
3.48 + @Override
3.49 + public int hashCode() {
3.50 + int hash = 3;
3.51 + hash = 47 * hash + Objects.hashCode(this.base);
3.52 + hash = 47 * hash + Objects.hashCode(this.name);
3.53 + return hash;
3.54 }
3.55 }