1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/java/dictionary-generator/concept.h.xsl Wed Jul 10 20:48:18 2013 +0200
1.3 @@ -0,0 +1,75 @@
1.4 +<?xml version="1.0" encoding="UTF-8"?>
1.5 +<xsl:stylesheet version="1.0"
1.6 + xmlns="http://www.w3.org/1999/xhtml"
1.7 + xmlns:h="http://www.w3.org/1999/xhtml"
1.8 + xmlns:d="https://telco.frantovo.cz/xmlns/dictionary"
1.9 + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
1.10 + xmlns:fn="http://www.w3.org/2005/xpath-functions"
1.11 + xmlns:xs="http://www.w3.org/2001/XMLSchema"
1.12 + exclude-result-prefixes="fn h d xs">
1.13 + <xsl:output
1.14 + method="xml"
1.15 + indent="no"
1.16 + encoding="UTF-8"
1.17 + omit-xml-declaration="yes"/>
1.18 +
1.19 + <xsl:param name="tags"/>
1.20 +
1.21 + <!--
1.22 + XHTML template
1.23 + -->
1.24 + <xsl:template match="d:concept">
1.25 + <div>
1.26 + <!--
1.27 + This template should be shortened,
1.28 + if used for dictionaries containing many words.
1.29 + -->
1.30 + <style type="text/css">
1.31 + table {
1.32 + border-collapse:collapse;
1.33 + box-shadow: 3px 3px 3px grey;
1.34 + margin-top: 10px;
1.35 + margin-bottom: 10px;
1.36 + }
1.37 + td, th {
1.38 + border: 1px solid black;
1.39 + padding-top: 4px;
1.40 + padding-bottom: 4px;
1.41 + padding-left: 6px;
1.42 + padding-right: 6px;
1.43 + font-weight: normal;
1.44 + }
1.45 + p.tags {
1.46 + font-size: 80%;
1.47 + }
1.48 + </style>
1.49 + <table style="color: red;">
1.50 + <tbody>
1.51 + <xsl:for-each select="d:term">
1.52 + <tr>
1.53 + <td><xsl:value-of select="@abbreviation"/></td>
1.54 + <td><xsl:value-of select="@completeForm"/></td>
1.55 + </tr>
1.56 + </xsl:for-each>
1.57 + </tbody>
1.58 + </table>
1.59 +
1.60 + <p><xsl:apply-templates select="d:explanation"/></p>
1.61 +
1.62 + <xsl:if test="d:tag">
1.63 + <p class="tags">
1.64 + <xsl:text>Tags: </xsl:text>
1.65 + <xsl:for-each select="d:tag">
1.66 + <xsl:variable name="tagID" select="text()"/>
1.67 + <xsl:apply-templates/>
1.68 + <!--
1.69 + <xsl:value-of select="$tags/d:tag[@id=$tagID]/@name"/>
1.70 + -->
1.71 + <xsl:if test="not(position() = last())"><xsl:text>, </xsl:text></xsl:if>
1.72 + </xsl:for-each>
1.73 + </p>
1.74 + </xsl:if>
1.75 + </div>
1.76 + </xsl:template>
1.77 +
1.78 +</xsl:stylesheet>
2.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
2.2 +++ b/java/dictionary-generator/concept.m.xsl Wed Jul 10 20:48:18 2013 +0200
2.3 @@ -0,0 +1,48 @@
2.4 +<?xml version="1.0" encoding="UTF-8"?>
2.5 +<xsl:stylesheet version="1.0"
2.6 + xmlns="http://www.w3.org/1999/xhtml"
2.7 + xmlns:h="http://www.w3.org/1999/xhtml"
2.8 + xmlns:d="https://telco.frantovo.cz/xmlns/dictionary"
2.9 + xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
2.10 + xmlns:fn="http://www.w3.org/2005/xpath-functions"
2.11 + xmlns:xs="http://www.w3.org/2001/XMLSchema"
2.12 + exclude-result-prefixes="fn h d xs">
2.13 + <xsl:output
2.14 + method="text"
2.15 + indent="no"
2.16 + encoding="UTF-8"
2.17 + omit-xml-declaration="yes"/>
2.18 +
2.19 + <xsl:param name="tags"/>
2.20 +
2.21 + <!--
2.22 + Plain text template
2.23 + -->
2.24 + <xsl:template match="d:concept">
2.25 + <xsl:for-each select="d:term">
2.26 + <xsl:value-of select="@abbreviation"/>
2.27 + <xsl:if test="normalize-space(@abbreviation) and normalize-space(@completeForm)">: </xsl:if>
2.28 + <xsl:value-of select="@completeForm"/>
2.29 + <xsl:text> </xsl:text>
2.30 + </xsl:for-each>
2.31 +
2.32 + <xsl:for-each select="d:explanation">
2.33 + <xsl:if test="normalize-space(.)">
2.34 + <xsl:text> </xsl:text>
2.35 + <xsl:apply-templates/>
2.36 + <xsl:text> </xsl:text>
2.37 + </xsl:if>
2.38 + </xsl:for-each>
2.39 +
2.40 + <xsl:if test="d:tag">
2.41 + <xsl:text> </xsl:text>
2.42 + <xsl:text>Tags: </xsl:text>
2.43 + <xsl:for-each select="d:tag">
2.44 + <xsl:variable name="tagID" select="text()"/>
2.45 + <xsl:apply-templates/>
2.46 + <xsl:if test="not(position() = last())"><xsl:text>, </xsl:text></xsl:if>
2.47 + </xsl:for-each>
2.48 + </xsl:if>
2.49 + </xsl:template>
2.50 +
2.51 +</xsl:stylesheet>
3.1 --- a/java/dictionary-generator/concept.xsl Wed Jul 10 14:32:45 2013 +0200
3.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000
3.3 @@ -1,72 +0,0 @@
3.4 -<?xml version="1.0" encoding="UTF-8"?>
3.5 -<xsl:stylesheet version="1.0"
3.6 - xmlns="http://www.w3.org/1999/xhtml"
3.7 - xmlns:h="http://www.w3.org/1999/xhtml"
3.8 - xmlns:d="https://telco.frantovo.cz/xmlns/dictionary"
3.9 - xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
3.10 - xmlns:fn="http://www.w3.org/2005/xpath-functions"
3.11 - xmlns:xs="http://www.w3.org/2001/XMLSchema"
3.12 - exclude-result-prefixes="fn h d xs">
3.13 - <xsl:output
3.14 - method="xml"
3.15 - indent="no"
3.16 - encoding="UTF-8"
3.17 - omit-xml-declaration="yes"/>
3.18 -
3.19 - <xsl:param name="tags"/>
3.20 -
3.21 - <xsl:template match="d:concept">
3.22 - <div>
3.23 - <!--
3.24 - This template should be shortened,
3.25 - if used for dictionaries containing many words.
3.26 - -->
3.27 - <style type="text/css">
3.28 - table {
3.29 - border-collapse:collapse;
3.30 - box-shadow: 3px 3px 3px grey;
3.31 - margin-top: 10px;
3.32 - margin-bottom: 10px;
3.33 - }
3.34 - td, th {
3.35 - border: 1px solid black;
3.36 - padding-top: 4px;
3.37 - padding-bottom: 4px;
3.38 - padding-left: 6px;
3.39 - padding-right: 6px;
3.40 - font-weight: normal;
3.41 - }
3.42 - p.tags {
3.43 - font-size: 80%;
3.44 - }
3.45 - </style>
3.46 - <table>
3.47 - <tbody>
3.48 - <xsl:for-each select="d:term">
3.49 - <tr>
3.50 - <td><xsl:value-of select="@abbreviation"/></td>
3.51 - <td><xsl:value-of select="@completeForm"/></td>
3.52 - </tr>
3.53 - </xsl:for-each>
3.54 - </tbody>
3.55 - </table>
3.56 -
3.57 - <p><xsl:apply-templates select="d:explanation"/></p>
3.58 -
3.59 - <xsl:if test="d:tag">
3.60 - <p class="tags">
3.61 - <xsl:text>Tags: </xsl:text>
3.62 - <xsl:for-each select="d:tag">
3.63 - <xsl:variable name="tagID" select="text()"/>
3.64 - <xsl:apply-templates/>
3.65 - <!--
3.66 - <xsl:value-of select="$tags/d:tag[@id=$tagID]/@name"/>
3.67 - -->
3.68 - <xsl:if test="not(position() = last())"><xsl:text>, </xsl:text></xsl:if>
3.69 - </xsl:for-each>
3.70 - </p>
3.71 - </xsl:if>
3.72 - </div>
3.73 - </xsl:template>
3.74 -
3.75 -</xsl:stylesheet>
4.1 --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Wed Jul 10 14:32:45 2013 +0200
4.2 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java Wed Jul 10 20:48:18 2013 +0200
4.3 @@ -66,9 +66,10 @@
4.4 * @author Ing. František Kučera (frantovo.cz)
4.5 */
4.6 public class Generator {
4.7 -
4.8 +
4.9 private static final Logger log = Logger.getLogger(Generator.class.getName());
4.10 private static final String EML_TO_KEN = "ixumhht68";
4.11 + private String mode;
4.12 private final DocumentBuilderFactory documentBuilderFactory;
4.13 private final DocumentBuilder documentBuilder;
4.14 private final XPathFactory xpathFactory;
4.15 @@ -76,45 +77,53 @@
4.16 private final TransformerFactory xslFactory;
4.17 private final Transformer xsl;
4.18 private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd");
4.19 -
4.20 - public Generator() throws ParserConfigurationException, TransformerConfigurationException {
4.21 - documentBuilderFactory = DocumentBuilderFactory.newInstance();
4.22 - documentBuilderFactory.setNamespaceAware(true);
4.23 - documentBuilder = documentBuilderFactory.newDocumentBuilder();
4.24 -
4.25 - xslFactory = TransformerFactory.newInstance();
4.26 - xsl = xslFactory.newTransformer(new StreamSource("concept.xsl"));
4.27 -
4.28 - xpathFactory = XPathFactory.newInstance();
4.29 - xpath = xpathFactory.newXPath();
4.30 - xpath.setNamespaceContext(getNamespaceContext());
4.31 +
4.32 + public Generator(String mode) throws ParserConfigurationException, TransformerConfigurationException {
4.33 + this.mode = mode;
4.34 +
4.35 + File templateFile = new File("concept." + mode + ".xsl");
4.36 + if (templateFile.exists()) {
4.37 +
4.38 + documentBuilderFactory = DocumentBuilderFactory.newInstance();
4.39 + documentBuilderFactory.setNamespaceAware(true);
4.40 + documentBuilder = documentBuilderFactory.newDocumentBuilder();
4.41 +
4.42 + xslFactory = TransformerFactory.newInstance();
4.43 + xsl = xslFactory.newTransformer(new StreamSource(templateFile));
4.44 +
4.45 + xpathFactory = XPathFactory.newInstance();
4.46 + xpath = xpathFactory.newXPath();
4.47 + xpath.setNamespaceContext(getNamespaceContext());
4.48 + } else {
4.49 + throw new IllegalArgumentException("Invalid mode: " + mode + ". File " + templateFile + " does not exist");
4.50 + }
4.51 }
4.52 -
4.53 +
4.54 private void generate(File folder, String filePrefix) {
4.55 File infoFile = new File(folder, filePrefix + ".ifo");
4.56 File dictFile = new File(folder, filePrefix + ".dict");
4.57 File indexFile = new File(folder, filePrefix + ".idx");
4.58 File synonymFile = new File(folder, filePrefix + ".syn");
4.59 -
4.60 +
4.61 FileOutputStream dictOutputStream = null;
4.62 DataOutputStream synonymOutputStream = null;
4.63 DataOutputStream indexOutputStream = null;
4.64 BufferedWriter infoWriter = null;
4.65 -
4.66 +
4.67 SortedSet<IndexEntry> indexEntries = new TreeSet<>();
4.68 SortedSet<SynonymsEntry> synonymsEntries = new TreeSet<>();
4.69 -
4.70 +
4.71 try {
4.72 dictOutputStream = new FileOutputStream(dictFile);
4.73 synonymOutputStream = new DataOutputStream(new FileOutputStream(synonymFile));
4.74 indexOutputStream = new DataOutputStream(new FileOutputStream(indexFile));
4.75 infoWriter = new BufferedWriter(new FileWriter(infoFile));
4.76 -
4.77 +
4.78 Document sourceDocument = documentBuilder.parse("../../data/dictionary.xml");
4.79 XPathExpression termsXPath = xpath.compile("d:term/@completeForm|d:term/@abbreviation");
4.80 // TODO: tags - labels/descriptions
4.81 xsl.setParameter("tags", sourceDocument.getElementsByTagNameNS(DICTIONARY, "tags").item(0));
4.82 -
4.83 +
4.84 long offset = 0;
4.85 long conceptIndex = 0;
4.86 for (Node conceptNode : nodeIterable(sourceDocument.getElementsByTagNameNS(DICTIONARY, "concept"))) {
4.87 @@ -122,35 +131,35 @@
4.88 xsl.transform(new DOMSource(conceptNode), new StreamResult(conceptXhtml));
4.89 int length = conceptXhtml.size();
4.90 dictOutputStream.write(conceptXhtml.toByteArray());
4.91 -
4.92 +
4.93 NodeList nameNodes = (NodeList) termsXPath.evaluate(conceptNode, XPathConstants.NODESET);
4.94 List<String> names = new ArrayList<>();
4.95 -
4.96 +
4.97 for (Node nameNode : nodeIterable(nameNodes)) {
4.98 String name = nameNode.getTextContent().trim();
4.99 if (!name.isEmpty()) {
4.100 names.add(name);
4.101 }
4.102 }
4.103 -
4.104 +
4.105 String baseName = names.get(0);
4.106 IndexEntry indexEntry = new IndexEntry(baseName, offset, length);
4.107 indexEntries.add(indexEntry);
4.108 -
4.109 +
4.110 for (int i = 1; i < names.size(); i++) {
4.111 String name = names.get(i);
4.112 if (!baseName.equals(name)) {
4.113 synonymsEntries.add(new SynonymsEntry(indexEntry, name));
4.114 }
4.115 }
4.116 -
4.117 +
4.118 offset = offset + length;
4.119 conceptIndex++;
4.120 }
4.121 -
4.122 +
4.123 writeIndex(indexOutputStream, indexEntries);
4.124 writeSynonyms(synonymOutputStream, synonymsEntries);
4.125 -
4.126 +
4.127 indexOutputStream.flush();
4.128 writeInfo(infoWriter, sourceDocument, conceptIndex, synonymsEntries.size(), indexFile.length());
4.129 } catch (SAXException | IOException | TransformerException | XPathExpressionException e) {
4.130 @@ -162,7 +171,7 @@
4.131 close(infoWriter);
4.132 }
4.133 }
4.134 -
4.135 +
4.136 private void writeIndex(DataOutputStream indexOutputStream, SortedSet<IndexEntry> indexEntries) throws IOException {
4.137 long ordinal = 0;
4.138 for (IndexEntry e : indexEntries) {
4.139 @@ -170,13 +179,13 @@
4.140 e.setOrdinal(ordinal++);
4.141 }
4.142 }
4.143 -
4.144 +
4.145 private void writeSynonyms(DataOutputStream synonymOutputStream, SortedSet<SynonymsEntry> synonymsEntries) throws IOException {
4.146 for (SynonymsEntry s : synonymsEntries) {
4.147 s.serialize(synonymOutputStream);
4.148 }
4.149 }
4.150 -
4.151 +
4.152 private void writeInfo(BufferedWriter infoWriter, Document sourceDocument, long wordcount, long synwourdcount, long idxfilesize) throws IOException {
4.153 // TODO: values from document metadata
4.154 infoWriter.write("StarDict's dict ifo file\n");
4.155 @@ -191,18 +200,26 @@
4.156 infoWriter.write("website=https://telco.frantovo.cz\n");
4.157 infoWriter.write("description=A dictionary for telecommunications licensed under GNU FDL\n");
4.158 infoWriter.write("date=" + dateFormat.format(new Date()) + "\n");
4.159 - infoWriter.write("sametypesequence=h\n");
4.160 + infoWriter.write("sametypesequence=" + mode + "\n");
4.161 }
4.162 -
4.163 +
4.164 public static void main(String[] args) {
4.165 File outputFolder = new File("../../delivery/free-telco-dictionary");
4.166 outputFolder.mkdir();
4.167 -
4.168 +
4.169 try {
4.170 - Generator g = new Generator();
4.171 + Generator g = new Generator(parseMode(args));
4.172 g.generate(outputFolder, "telco");
4.173 } catch (ParserConfigurationException | TransformerConfigurationException e) {
4.174 log.log(Level.SEVERE, "error during initialization", e);
4.175 }
4.176 }
4.177 +
4.178 + private static String parseMode(String[] args) {
4.179 + if (args.length == 1) {
4.180 + return args[0];
4.181 + } else {
4.182 + return "h";
4.183 + }
4.184 + }
4.185 }