generator: support multiple modes – XHTML (h) and plain text (m)
authorFrantišek Kučera <franta-hg@frantovo.cz>
Wed, 10 Jul 2013 20:48:18 +0200
changeset 21e7c9a8722f76
parent 20 aecdfc3b1950
child 22 e003e66c9752
generator: support multiple modes – XHTML (h) and plain text (m)
java/dictionary-generator/concept.h.xsl
java/dictionary-generator/concept.m.xsl
java/dictionary-generator/concept.xsl
java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/java/dictionary-generator/concept.h.xsl	Wed Jul 10 20:48:18 2013 +0200
     1.3 @@ -0,0 +1,75 @@
     1.4 +<?xml version="1.0" encoding="UTF-8"?>
     1.5 +<xsl:stylesheet version="1.0"
     1.6 +	xmlns="http://www.w3.org/1999/xhtml"
     1.7 +	xmlns:h="http://www.w3.org/1999/xhtml"
     1.8 +	xmlns:d="https://telco.frantovo.cz/xmlns/dictionary"
     1.9 +	xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    1.10 +	xmlns:fn="http://www.w3.org/2005/xpath-functions"
    1.11 +	xmlns:xs="http://www.w3.org/2001/XMLSchema"
    1.12 +	exclude-result-prefixes="fn h d xs">
    1.13 +	<xsl:output 
    1.14 +		method="xml" 
    1.15 +		indent="no" 
    1.16 +		encoding="UTF-8"
    1.17 +		omit-xml-declaration="yes"/>
    1.18 +		
    1.19 +	<xsl:param name="tags"/>
    1.20 +	
    1.21 +	<!--
    1.22 +		XHTML template
    1.23 +	-->
    1.24 +	<xsl:template match="d:concept">
    1.25 +		<div>
    1.26 +			<!--
    1.27 +				This template should be shortened,
    1.28 +				if used for dictionaries containing many words.
    1.29 +			-->
    1.30 +			<style type="text/css">
    1.31 +			table {
    1.32 +				border-collapse:collapse;
    1.33 +				box-shadow: 3px 3px 3px grey;
    1.34 +				margin-top: 10px;
    1.35 +				margin-bottom: 10px;
    1.36 +			}
    1.37 +			td, th {
    1.38 +				border: 1px solid black;
    1.39 +				padding-top: 4px;
    1.40 +				padding-bottom: 4px;
    1.41 +				padding-left: 6px;
    1.42 +				padding-right: 6px;
    1.43 +				font-weight: normal;
    1.44 +			}
    1.45 +			p.tags {
    1.46 +				font-size: 80%;
    1.47 +			}
    1.48 +			</style>
    1.49 +			<table style="color: red;">
    1.50 +				<tbody>
    1.51 +					<xsl:for-each select="d:term">
    1.52 +						<tr>
    1.53 +							<td><xsl:value-of select="@abbreviation"/></td>
    1.54 +							<td><xsl:value-of select="@completeForm"/></td>
    1.55 +						</tr>
    1.56 +					</xsl:for-each>
    1.57 +				</tbody>
    1.58 +			</table>
    1.59 +			
    1.60 +			<p><xsl:apply-templates select="d:explanation"/></p>
    1.61 +			
    1.62 +			<xsl:if test="d:tag">
    1.63 +			<p class="tags">
    1.64 +				<xsl:text>Tags: </xsl:text>
    1.65 +				<xsl:for-each select="d:tag">
    1.66 +					<xsl:variable name="tagID" select="text()"/>
    1.67 +					<xsl:apply-templates/>
    1.68 +					<!--
    1.69 +					<xsl:value-of select="$tags/d:tag[@id=$tagID]/@name"/>
    1.70 +					-->
    1.71 +					<xsl:if test="not(position() = last())"><xsl:text>, </xsl:text></xsl:if>
    1.72 +				</xsl:for-each>
    1.73 +			</p>
    1.74 +			</xsl:if>
    1.75 +		</div>
    1.76 +	</xsl:template>
    1.77 +
    1.78 +</xsl:stylesheet>
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/java/dictionary-generator/concept.m.xsl	Wed Jul 10 20:48:18 2013 +0200
     2.3 @@ -0,0 +1,48 @@
     2.4 +<?xml version="1.0" encoding="UTF-8"?>
     2.5 +<xsl:stylesheet version="1.0"
     2.6 +	xmlns="http://www.w3.org/1999/xhtml"
     2.7 +	xmlns:h="http://www.w3.org/1999/xhtml"
     2.8 +	xmlns:d="https://telco.frantovo.cz/xmlns/dictionary"
     2.9 +	xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    2.10 +	xmlns:fn="http://www.w3.org/2005/xpath-functions"
    2.11 +	xmlns:xs="http://www.w3.org/2001/XMLSchema"
    2.12 +	exclude-result-prefixes="fn h d xs">
    2.13 +	<xsl:output 
    2.14 +		method="text" 
    2.15 +		indent="no" 
    2.16 +		encoding="UTF-8"
    2.17 +		omit-xml-declaration="yes"/>
    2.18 +		
    2.19 +	<xsl:param name="tags"/>
    2.20 +	
    2.21 +	<!--
    2.22 +		Plain text template
    2.23 +	-->
    2.24 +	<xsl:template match="d:concept">
    2.25 +			<xsl:for-each select="d:term">
    2.26 +				<xsl:value-of select="@abbreviation"/>
    2.27 +				<xsl:if test="normalize-space(@abbreviation) and normalize-space(@completeForm)">: </xsl:if>
    2.28 +				<xsl:value-of select="@completeForm"/>
    2.29 +				<xsl:text>&#10;</xsl:text>
    2.30 +			</xsl:for-each>
    2.31 +
    2.32 +			<xsl:for-each select="d:explanation">
    2.33 +				<xsl:if test="normalize-space(.)">
    2.34 +					<xsl:text>&#10;</xsl:text>
    2.35 +					<xsl:apply-templates/>
    2.36 +					<xsl:text>&#10;</xsl:text>
    2.37 +				</xsl:if>
    2.38 +			</xsl:for-each>
    2.39 +			
    2.40 +			<xsl:if test="d:tag">
    2.41 +				<xsl:text>&#10;</xsl:text>
    2.42 +				<xsl:text>Tags: </xsl:text>
    2.43 +				<xsl:for-each select="d:tag">
    2.44 +					<xsl:variable name="tagID" select="text()"/>
    2.45 +					<xsl:apply-templates/>
    2.46 +					<xsl:if test="not(position() = last())"><xsl:text>, </xsl:text></xsl:if>
    2.47 +				</xsl:for-each>
    2.48 +			</xsl:if>
    2.49 +	</xsl:template>
    2.50 +
    2.51 +</xsl:stylesheet>
     3.1 --- a/java/dictionary-generator/concept.xsl	Wed Jul 10 14:32:45 2013 +0200
     3.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     3.3 @@ -1,72 +0,0 @@
     3.4 -<?xml version="1.0" encoding="UTF-8"?>
     3.5 -<xsl:stylesheet version="1.0"
     3.6 -	xmlns="http://www.w3.org/1999/xhtml"
     3.7 -	xmlns:h="http://www.w3.org/1999/xhtml"
     3.8 -	xmlns:d="https://telco.frantovo.cz/xmlns/dictionary"
     3.9 -	xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
    3.10 -	xmlns:fn="http://www.w3.org/2005/xpath-functions"
    3.11 -	xmlns:xs="http://www.w3.org/2001/XMLSchema"
    3.12 -	exclude-result-prefixes="fn h d xs">
    3.13 -	<xsl:output 
    3.14 -		method="xml" 
    3.15 -		indent="no" 
    3.16 -		encoding="UTF-8"
    3.17 -		omit-xml-declaration="yes"/>
    3.18 -		
    3.19 -	<xsl:param name="tags"/>
    3.20 -	
    3.21 -	<xsl:template match="d:concept">
    3.22 -		<div>
    3.23 -			<!--
    3.24 -				This template should be shortened,
    3.25 -				if used for dictionaries containing many words.
    3.26 -			-->
    3.27 -			<style type="text/css">
    3.28 -			table {
    3.29 -				border-collapse:collapse;
    3.30 -				box-shadow: 3px 3px 3px grey;
    3.31 -				margin-top: 10px;
    3.32 -				margin-bottom: 10px;
    3.33 -			}
    3.34 -			td, th {
    3.35 -				border: 1px solid black;
    3.36 -				padding-top: 4px;
    3.37 -				padding-bottom: 4px;
    3.38 -				padding-left: 6px;
    3.39 -				padding-right: 6px;
    3.40 -				font-weight: normal;
    3.41 -			}
    3.42 -			p.tags {
    3.43 -				font-size: 80%;
    3.44 -			}
    3.45 -			</style>
    3.46 -			<table>
    3.47 -				<tbody>
    3.48 -					<xsl:for-each select="d:term">
    3.49 -						<tr>
    3.50 -							<td><xsl:value-of select="@abbreviation"/></td>
    3.51 -							<td><xsl:value-of select="@completeForm"/></td>
    3.52 -						</tr>
    3.53 -					</xsl:for-each>
    3.54 -				</tbody>
    3.55 -			</table>
    3.56 -			
    3.57 -			<p><xsl:apply-templates select="d:explanation"/></p>
    3.58 -			
    3.59 -			<xsl:if test="d:tag">
    3.60 -			<p class="tags">
    3.61 -				<xsl:text>Tags: </xsl:text>
    3.62 -				<xsl:for-each select="d:tag">
    3.63 -					<xsl:variable name="tagID" select="text()"/>
    3.64 -					<xsl:apply-templates/>
    3.65 -					<!--
    3.66 -					<xsl:value-of select="$tags/d:tag[@id=$tagID]/@name"/>
    3.67 -					-->
    3.68 -					<xsl:if test="not(position() = last())"><xsl:text>, </xsl:text></xsl:if>
    3.69 -				</xsl:for-each>
    3.70 -			</p>
    3.71 -			</xsl:if>
    3.72 -		</div>
    3.73 -	</xsl:template>
    3.74 -
    3.75 -</xsl:stylesheet>
     4.1 --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java	Wed Jul 10 14:32:45 2013 +0200
     4.2 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java	Wed Jul 10 20:48:18 2013 +0200
     4.3 @@ -66,9 +66,10 @@
     4.4   * @author Ing. František Kučera (frantovo.cz)
     4.5   */
     4.6  public class Generator {
     4.7 -	
     4.8 +
     4.9  	private static final Logger log = Logger.getLogger(Generator.class.getName());
    4.10  	private static final String EML_TO_KEN = "ixumhht68";
    4.11 +	private String mode;
    4.12  	private final DocumentBuilderFactory documentBuilderFactory;
    4.13  	private final DocumentBuilder documentBuilder;
    4.14  	private final XPathFactory xpathFactory;
    4.15 @@ -76,45 +77,53 @@
    4.16  	private final TransformerFactory xslFactory;
    4.17  	private final Transformer xsl;
    4.18  	private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd");
    4.19 -	
    4.20 -	public Generator() throws ParserConfigurationException, TransformerConfigurationException {
    4.21 -		documentBuilderFactory = DocumentBuilderFactory.newInstance();
    4.22 -		documentBuilderFactory.setNamespaceAware(true);
    4.23 -		documentBuilder = documentBuilderFactory.newDocumentBuilder();
    4.24 -		
    4.25 -		xslFactory = TransformerFactory.newInstance();
    4.26 -		xsl = xslFactory.newTransformer(new StreamSource("concept.xsl"));
    4.27 -		
    4.28 -		xpathFactory = XPathFactory.newInstance();
    4.29 -		xpath = xpathFactory.newXPath();
    4.30 -		xpath.setNamespaceContext(getNamespaceContext());
    4.31 +
    4.32 +	public Generator(String mode) throws ParserConfigurationException, TransformerConfigurationException {
    4.33 +		this.mode = mode;
    4.34 +
    4.35 +		File templateFile = new File("concept." + mode + ".xsl");
    4.36 +		if (templateFile.exists()) {
    4.37 +
    4.38 +			documentBuilderFactory = DocumentBuilderFactory.newInstance();
    4.39 +			documentBuilderFactory.setNamespaceAware(true);
    4.40 +			documentBuilder = documentBuilderFactory.newDocumentBuilder();
    4.41 +
    4.42 +			xslFactory = TransformerFactory.newInstance();
    4.43 +			xsl = xslFactory.newTransformer(new StreamSource(templateFile));
    4.44 +
    4.45 +			xpathFactory = XPathFactory.newInstance();
    4.46 +			xpath = xpathFactory.newXPath();
    4.47 +			xpath.setNamespaceContext(getNamespaceContext());
    4.48 +		} else {
    4.49 +			throw new IllegalArgumentException("Invalid mode: " + mode + ". File " + templateFile + " does not exist");
    4.50 +		}
    4.51  	}
    4.52 -	
    4.53 +
    4.54  	private void generate(File folder, String filePrefix) {
    4.55  		File infoFile = new File(folder, filePrefix + ".ifo");
    4.56  		File dictFile = new File(folder, filePrefix + ".dict");
    4.57  		File indexFile = new File(folder, filePrefix + ".idx");
    4.58  		File synonymFile = new File(folder, filePrefix + ".syn");
    4.59 -		
    4.60 +
    4.61  		FileOutputStream dictOutputStream = null;
    4.62  		DataOutputStream synonymOutputStream = null;
    4.63  		DataOutputStream indexOutputStream = null;
    4.64  		BufferedWriter infoWriter = null;
    4.65 -		
    4.66 +
    4.67  		SortedSet<IndexEntry> indexEntries = new TreeSet<>();
    4.68  		SortedSet<SynonymsEntry> synonymsEntries = new TreeSet<>();
    4.69 -		
    4.70 +
    4.71  		try {
    4.72  			dictOutputStream = new FileOutputStream(dictFile);
    4.73  			synonymOutputStream = new DataOutputStream(new FileOutputStream(synonymFile));
    4.74  			indexOutputStream = new DataOutputStream(new FileOutputStream(indexFile));
    4.75  			infoWriter = new BufferedWriter(new FileWriter(infoFile));
    4.76 -			
    4.77 +
    4.78  			Document sourceDocument = documentBuilder.parse("../../data/dictionary.xml");
    4.79  			XPathExpression termsXPath = xpath.compile("d:term/@completeForm|d:term/@abbreviation");
    4.80  			// TODO: tags - labels/descriptions
    4.81  			xsl.setParameter("tags", sourceDocument.getElementsByTagNameNS(DICTIONARY, "tags").item(0));
    4.82 -			
    4.83 +
    4.84  			long offset = 0;
    4.85  			long conceptIndex = 0;
    4.86  			for (Node conceptNode : nodeIterable(sourceDocument.getElementsByTagNameNS(DICTIONARY, "concept"))) {
    4.87 @@ -122,35 +131,35 @@
    4.88  				xsl.transform(new DOMSource(conceptNode), new StreamResult(conceptXhtml));
    4.89  				int length = conceptXhtml.size();
    4.90  				dictOutputStream.write(conceptXhtml.toByteArray());
    4.91 -				
    4.92 +
    4.93  				NodeList nameNodes = (NodeList) termsXPath.evaluate(conceptNode, XPathConstants.NODESET);
    4.94  				List<String> names = new ArrayList<>();
    4.95 -				
    4.96 +
    4.97  				for (Node nameNode : nodeIterable(nameNodes)) {
    4.98  					String name = nameNode.getTextContent().trim();
    4.99  					if (!name.isEmpty()) {
   4.100  						names.add(name);
   4.101  					}
   4.102  				}
   4.103 -				
   4.104 +
   4.105  				String baseName = names.get(0);
   4.106  				IndexEntry indexEntry = new IndexEntry(baseName, offset, length);
   4.107  				indexEntries.add(indexEntry);
   4.108 -				
   4.109 +
   4.110  				for (int i = 1; i < names.size(); i++) {
   4.111  					String name = names.get(i);
   4.112  					if (!baseName.equals(name)) {
   4.113  						synonymsEntries.add(new SynonymsEntry(indexEntry, name));
   4.114  					}
   4.115  				}
   4.116 -				
   4.117 +
   4.118  				offset = offset + length;
   4.119  				conceptIndex++;
   4.120  			}
   4.121 -			
   4.122 +
   4.123  			writeIndex(indexOutputStream, indexEntries);
   4.124  			writeSynonyms(synonymOutputStream, synonymsEntries);
   4.125 -			
   4.126 +
   4.127  			indexOutputStream.flush();
   4.128  			writeInfo(infoWriter, sourceDocument, conceptIndex, synonymsEntries.size(), indexFile.length());
   4.129  		} catch (SAXException | IOException | TransformerException | XPathExpressionException e) {
   4.130 @@ -162,7 +171,7 @@
   4.131  			close(infoWriter);
   4.132  		}
   4.133  	}
   4.134 -	
   4.135 +
   4.136  	private void writeIndex(DataOutputStream indexOutputStream, SortedSet<IndexEntry> indexEntries) throws IOException {
   4.137  		long ordinal = 0;
   4.138  		for (IndexEntry e : indexEntries) {
   4.139 @@ -170,13 +179,13 @@
   4.140  			e.setOrdinal(ordinal++);
   4.141  		}
   4.142  	}
   4.143 -	
   4.144 +
   4.145  	private void writeSynonyms(DataOutputStream synonymOutputStream, SortedSet<SynonymsEntry> synonymsEntries) throws IOException {
   4.146  		for (SynonymsEntry s : synonymsEntries) {
   4.147  			s.serialize(synonymOutputStream);
   4.148  		}
   4.149  	}
   4.150 -	
   4.151 +
   4.152  	private void writeInfo(BufferedWriter infoWriter, Document sourceDocument, long wordcount, long synwourdcount, long idxfilesize) throws IOException {
   4.153  		// TODO: values from document metadata
   4.154  		infoWriter.write("StarDict's dict ifo file\n");
   4.155 @@ -191,18 +200,26 @@
   4.156  		infoWriter.write("website=https://telco.frantovo.cz\n");
   4.157  		infoWriter.write("description=A dictionary for telecommunications licensed under GNU FDL\n");
   4.158  		infoWriter.write("date=" + dateFormat.format(new Date()) + "\n");
   4.159 -		infoWriter.write("sametypesequence=h\n");
   4.160 +		infoWriter.write("sametypesequence=" + mode + "\n");
   4.161  	}
   4.162 -	
   4.163 +
   4.164  	public static void main(String[] args) {
   4.165  		File outputFolder = new File("../../delivery/free-telco-dictionary");
   4.166  		outputFolder.mkdir();
   4.167 -		
   4.168 +
   4.169  		try {
   4.170 -			Generator g = new Generator();
   4.171 +			Generator g = new Generator(parseMode(args));
   4.172  			g.generate(outputFolder, "telco");
   4.173  		} catch (ParserConfigurationException | TransformerConfigurationException e) {
   4.174  			log.log(Level.SEVERE, "error during initialization", e);
   4.175  		}
   4.176  	}
   4.177 +
   4.178 +	private static String parseMode(String[] args) {
   4.179 +		if (args.length == 1) {
   4.180 +			return args[0];
   4.181 +		} else {
   4.182 +			return "h";
   4.183 +		}
   4.184 +	}
   4.185  }