java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java
changeset 21 e7c9a8722f76
parent 20 aecdfc3b1950
child 23 f29d2ac58ed6
     1.1 --- a/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java	Wed Jul 10 14:32:45 2013 +0200
     1.2 +++ b/java/dictionary-generator/src/cz/frantovo/telco/dictionary/Generator.java	Wed Jul 10 20:48:18 2013 +0200
     1.3 @@ -66,9 +66,10 @@
     1.4   * @author Ing. František Kučera (frantovo.cz)
     1.5   */
     1.6  public class Generator {
     1.7 -	
     1.8 +
     1.9  	private static final Logger log = Logger.getLogger(Generator.class.getName());
    1.10  	private static final String EML_TO_KEN = "ixumhht68";
    1.11 +	private String mode;
    1.12  	private final DocumentBuilderFactory documentBuilderFactory;
    1.13  	private final DocumentBuilder documentBuilder;
    1.14  	private final XPathFactory xpathFactory;
    1.15 @@ -76,45 +77,53 @@
    1.16  	private final TransformerFactory xslFactory;
    1.17  	private final Transformer xsl;
    1.18  	private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy.MM.dd");
    1.19 -	
    1.20 -	public Generator() throws ParserConfigurationException, TransformerConfigurationException {
    1.21 -		documentBuilderFactory = DocumentBuilderFactory.newInstance();
    1.22 -		documentBuilderFactory.setNamespaceAware(true);
    1.23 -		documentBuilder = documentBuilderFactory.newDocumentBuilder();
    1.24 -		
    1.25 -		xslFactory = TransformerFactory.newInstance();
    1.26 -		xsl = xslFactory.newTransformer(new StreamSource("concept.xsl"));
    1.27 -		
    1.28 -		xpathFactory = XPathFactory.newInstance();
    1.29 -		xpath = xpathFactory.newXPath();
    1.30 -		xpath.setNamespaceContext(getNamespaceContext());
    1.31 +
    1.32 +	public Generator(String mode) throws ParserConfigurationException, TransformerConfigurationException {
    1.33 +		this.mode = mode;
    1.34 +
    1.35 +		File templateFile = new File("concept." + mode + ".xsl");
    1.36 +		if (templateFile.exists()) {
    1.37 +
    1.38 +			documentBuilderFactory = DocumentBuilderFactory.newInstance();
    1.39 +			documentBuilderFactory.setNamespaceAware(true);
    1.40 +			documentBuilder = documentBuilderFactory.newDocumentBuilder();
    1.41 +
    1.42 +			xslFactory = TransformerFactory.newInstance();
    1.43 +			xsl = xslFactory.newTransformer(new StreamSource(templateFile));
    1.44 +
    1.45 +			xpathFactory = XPathFactory.newInstance();
    1.46 +			xpath = xpathFactory.newXPath();
    1.47 +			xpath.setNamespaceContext(getNamespaceContext());
    1.48 +		} else {
    1.49 +			throw new IllegalArgumentException("Invalid mode: " + mode + ". File " + templateFile + " does not exist");
    1.50 +		}
    1.51  	}
    1.52 -	
    1.53 +
    1.54  	private void generate(File folder, String filePrefix) {
    1.55  		File infoFile = new File(folder, filePrefix + ".ifo");
    1.56  		File dictFile = new File(folder, filePrefix + ".dict");
    1.57  		File indexFile = new File(folder, filePrefix + ".idx");
    1.58  		File synonymFile = new File(folder, filePrefix + ".syn");
    1.59 -		
    1.60 +
    1.61  		FileOutputStream dictOutputStream = null;
    1.62  		DataOutputStream synonymOutputStream = null;
    1.63  		DataOutputStream indexOutputStream = null;
    1.64  		BufferedWriter infoWriter = null;
    1.65 -		
    1.66 +
    1.67  		SortedSet<IndexEntry> indexEntries = new TreeSet<>();
    1.68  		SortedSet<SynonymsEntry> synonymsEntries = new TreeSet<>();
    1.69 -		
    1.70 +
    1.71  		try {
    1.72  			dictOutputStream = new FileOutputStream(dictFile);
    1.73  			synonymOutputStream = new DataOutputStream(new FileOutputStream(synonymFile));
    1.74  			indexOutputStream = new DataOutputStream(new FileOutputStream(indexFile));
    1.75  			infoWriter = new BufferedWriter(new FileWriter(infoFile));
    1.76 -			
    1.77 +
    1.78  			Document sourceDocument = documentBuilder.parse("../../data/dictionary.xml");
    1.79  			XPathExpression termsXPath = xpath.compile("d:term/@completeForm|d:term/@abbreviation");
    1.80  			// TODO: tags - labels/descriptions
    1.81  			xsl.setParameter("tags", sourceDocument.getElementsByTagNameNS(DICTIONARY, "tags").item(0));
    1.82 -			
    1.83 +
    1.84  			long offset = 0;
    1.85  			long conceptIndex = 0;
    1.86  			for (Node conceptNode : nodeIterable(sourceDocument.getElementsByTagNameNS(DICTIONARY, "concept"))) {
    1.87 @@ -122,35 +131,35 @@
    1.88  				xsl.transform(new DOMSource(conceptNode), new StreamResult(conceptXhtml));
    1.89  				int length = conceptXhtml.size();
    1.90  				dictOutputStream.write(conceptXhtml.toByteArray());
    1.91 -				
    1.92 +
    1.93  				NodeList nameNodes = (NodeList) termsXPath.evaluate(conceptNode, XPathConstants.NODESET);
    1.94  				List<String> names = new ArrayList<>();
    1.95 -				
    1.96 +
    1.97  				for (Node nameNode : nodeIterable(nameNodes)) {
    1.98  					String name = nameNode.getTextContent().trim();
    1.99  					if (!name.isEmpty()) {
   1.100  						names.add(name);
   1.101  					}
   1.102  				}
   1.103 -				
   1.104 +
   1.105  				String baseName = names.get(0);
   1.106  				IndexEntry indexEntry = new IndexEntry(baseName, offset, length);
   1.107  				indexEntries.add(indexEntry);
   1.108 -				
   1.109 +
   1.110  				for (int i = 1; i < names.size(); i++) {
   1.111  					String name = names.get(i);
   1.112  					if (!baseName.equals(name)) {
   1.113  						synonymsEntries.add(new SynonymsEntry(indexEntry, name));
   1.114  					}
   1.115  				}
   1.116 -				
   1.117 +
   1.118  				offset = offset + length;
   1.119  				conceptIndex++;
   1.120  			}
   1.121 -			
   1.122 +
   1.123  			writeIndex(indexOutputStream, indexEntries);
   1.124  			writeSynonyms(synonymOutputStream, synonymsEntries);
   1.125 -			
   1.126 +
   1.127  			indexOutputStream.flush();
   1.128  			writeInfo(infoWriter, sourceDocument, conceptIndex, synonymsEntries.size(), indexFile.length());
   1.129  		} catch (SAXException | IOException | TransformerException | XPathExpressionException e) {
   1.130 @@ -162,7 +171,7 @@
   1.131  			close(infoWriter);
   1.132  		}
   1.133  	}
   1.134 -	
   1.135 +
   1.136  	private void writeIndex(DataOutputStream indexOutputStream, SortedSet<IndexEntry> indexEntries) throws IOException {
   1.137  		long ordinal = 0;
   1.138  		for (IndexEntry e : indexEntries) {
   1.139 @@ -170,13 +179,13 @@
   1.140  			e.setOrdinal(ordinal++);
   1.141  		}
   1.142  	}
   1.143 -	
   1.144 +
   1.145  	private void writeSynonyms(DataOutputStream synonymOutputStream, SortedSet<SynonymsEntry> synonymsEntries) throws IOException {
   1.146  		for (SynonymsEntry s : synonymsEntries) {
   1.147  			s.serialize(synonymOutputStream);
   1.148  		}
   1.149  	}
   1.150 -	
   1.151 +
   1.152  	private void writeInfo(BufferedWriter infoWriter, Document sourceDocument, long wordcount, long synwourdcount, long idxfilesize) throws IOException {
   1.153  		// TODO: values from document metadata
   1.154  		infoWriter.write("StarDict's dict ifo file\n");
   1.155 @@ -191,18 +200,26 @@
   1.156  		infoWriter.write("website=https://telco.frantovo.cz\n");
   1.157  		infoWriter.write("description=A dictionary for telecommunications licensed under GNU FDL\n");
   1.158  		infoWriter.write("date=" + dateFormat.format(new Date()) + "\n");
   1.159 -		infoWriter.write("sametypesequence=h\n");
   1.160 +		infoWriter.write("sametypesequence=" + mode + "\n");
   1.161  	}
   1.162 -	
   1.163 +
   1.164  	public static void main(String[] args) {
   1.165  		File outputFolder = new File("../../delivery/free-telco-dictionary");
   1.166  		outputFolder.mkdir();
   1.167 -		
   1.168 +
   1.169  		try {
   1.170 -			Generator g = new Generator();
   1.171 +			Generator g = new Generator(parseMode(args));
   1.172  			g.generate(outputFolder, "telco");
   1.173  		} catch (ParserConfigurationException | TransformerConfigurationException e) {
   1.174  			log.log(Level.SEVERE, "error during initialization", e);
   1.175  		}
   1.176  	}
   1.177 +
   1.178 +	private static String parseMode(String[] args) {
   1.179 +		if (args.length == 1) {
   1.180 +			return args[0];
   1.181 +		} else {
   1.182 +			return "h";
   1.183 +		}
   1.184 +	}
   1.185  }