Drupal: ignorování XML komentářů (nebudou dělit odstavce).
authorFrantišek Kučera <franta-hg@frantovo.cz>
Fri, 21 Oct 2011 17:35:29 +0200
changeset 104b4c8a2760d6f
parent 103 a788bf0e1080
child 105 d566d08c20d6
Drupal: ignorování XML komentářů (nebudou dělit odstavce).
src/org/sonews/storage/DrupalArticle.java
src/org/sonews/storage/DrupalMessage.java
     1.1 --- a/src/org/sonews/storage/DrupalArticle.java	Thu Oct 20 10:50:58 2011 +0200
     1.2 +++ b/src/org/sonews/storage/DrupalArticle.java	Fri Oct 21 17:35:29 2011 +0200
     1.3 @@ -24,7 +24,7 @@
     1.4  import javax.mail.internet.InternetHeaders;
     1.5  
     1.6  /**
     1.7 - *
     1.8 + * V Article je IMHO chyba, protože se hlavičky z msg zapíší dvakrát.
     1.9   * @author František Kučera (frantovo.cz)
    1.10   */
    1.11  public class DrupalArticle extends Article {
     2.1 --- a/src/org/sonews/storage/DrupalMessage.java	Thu Oct 20 10:50:58 2011 +0200
     2.2 +++ b/src/org/sonews/storage/DrupalMessage.java	Fri Oct 21 17:35:29 2011 +0200
     2.3 @@ -42,13 +42,19 @@
     2.4  import javax.mail.internet.MimeBodyPart;
     2.5  import javax.mail.internet.MimeMessage;
     2.6  import javax.mail.internet.MimeMultipart;
     2.7 +import javax.xml.parsers.DocumentBuilder;
     2.8 +import javax.xml.parsers.DocumentBuilderFactory;
     2.9 +import javax.xml.parsers.ParserConfigurationException;
    2.10  import javax.xml.transform.Transformer;
    2.11  import javax.xml.transform.TransformerException;
    2.12  import javax.xml.transform.TransformerFactory;
    2.13 +import javax.xml.transform.dom.DOMSource;
    2.14  import javax.xml.transform.stream.StreamResult;
    2.15  import javax.xml.transform.stream.StreamSource;
    2.16  import org.sonews.daemon.NNTPConnection;
    2.17  import org.sonews.util.io.Resource;
    2.18 +import org.w3c.dom.Document;
    2.19 +import org.xml.sax.SAXException;
    2.20  
    2.21  /**
    2.22   * This is MimeMessage which enables custom Message-ID header
    2.23 @@ -71,14 +77,32 @@
    2.24  	private String messageID;
    2.25  	private Long parentID;
    2.26  	private Long groupID;
    2.27 +	private TransformerFactory transformerFactory;
    2.28 +	private DocumentBuilderFactory documentBuilderFactory;
    2.29 +
    2.30 +	/**
    2.31 +	 * Initializes XML factories (Transformer, DocumentBuilder).
    2.32 +	 */
    2.33 +	private void initFactories() {
    2.34 +		transformerFactory = TransformerFactory.newInstance();
    2.35 +		documentBuilderFactory = DocumentBuilderFactory.newInstance();
    2.36 +		/**
    2.37 +		 * Komentáře nás nepotřebujeme 
    2.38 +		 * (a museli bychom je brát v úvahu při dělení odstavců:
    2.39 +		 * v současné verzi XSLT odstavcovače by nám případný komentář
    2.40 +		 * rozdělil text na dva odstavce, přestože to má být odstavec jede).
    2.41 +		 */
    2.42 +		documentBuilderFactory.setIgnoringComments(true);
    2.43 +	}
    2.44  
    2.45  	/**
    2.46  	 * Constructs MIME message from SQL result.
    2.47  	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
    2.48  	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
    2.49  	 */
    2.50 -	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException {
    2.51 +	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException, TransformerException, IOException, ParserConfigurationException, SAXException {
    2.52  		super(Session.getDefaultInstance(System.getProperties()));
    2.53 +		initFactories();
    2.54  
    2.55  		groupID = rs.getLong("group_id");
    2.56  		addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));
    2.57 @@ -135,9 +159,10 @@
    2.58  	 */
    2.59  	public DrupalMessage(Article article) throws MessagingException {
    2.60  		super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));
    2.61 +		initFactories();
    2.62  
    2.63  		String[] parentHeaders = getHeader("In-Reply-To");
    2.64 -		if (parentHeaders.length == 1) {
    2.65 +		if (parentHeaders != null && parentHeaders.length == 1) {
    2.66  			String parentMessageID = parentHeaders[0];
    2.67  			parentID = parseArticleID(parentMessageID);
    2.68  			groupID = parseGroupID(parentMessageID);
    2.69 @@ -162,8 +187,7 @@
    2.70  
    2.71  	private String readPlainText(ResultSet rs, String xhtmlText) {
    2.72  		try {
    2.73 -			TransformerFactory tf = TransformerFactory.newInstance();
    2.74 -			Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
    2.75 +			Transformer textTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
    2.76  
    2.77  			StringReader input = new StringReader(xhtmlText);
    2.78  			StringWriter output = new StringWriter(xhtmlText.length());
    2.79 @@ -179,34 +203,37 @@
    2.80  		}
    2.81  	}
    2.82  
    2.83 -	private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException {
    2.84 +	private DOMSource readDOM(String xml) throws ParserConfigurationException, SAXException, IOException {
    2.85 +		DocumentBuilder db = documentBuilderFactory.newDocumentBuilder();
    2.86 +		Document d = db.parse(new ByteArrayInputStream(xml.getBytes("UTF-8")));
    2.87 +		return new DOMSource(d);
    2.88 +	}
    2.89 +
    2.90 +	private String readXhtmlText(String text, String subject, long parentId, String urlBase, String wwwRead, String wwwPost) throws TransformerException, IOException, ParserConfigurationException, SAXException {
    2.91  		/**
    2.92  		 * TODO: 
    2.93 -		 *		- znovupoužívat XSL transformér
    2.94 +		 *		- znovupoužívat XSL transformér (nejen v instanci)
    2.95  		 *		- používat cache, ukládat si vygenerované články
    2.96  		 */
    2.97  		String inputText = makeSimpleXHTML(text);
    2.98  
    2.99 -		TransformerFactory tf = TransformerFactory.newInstance();
   2.100 -		Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
   2.101 +		Transformer paragraphTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
   2.102  
   2.103  		String paragraphedText;
   2.104  		boolean tidyWasUsed = false;
   2.105  		try {
   2.106 -			StringReader input = new StringReader(inputText);
   2.107  			StringWriter output = new StringWriter(2 * inputText.length());
   2.108 -			paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
   2.109 +			paragraphTransformer.transform(readDOM(inputText), new StreamResult(output));
   2.110  			paragraphedText = output.toString();
   2.111  		} catch (Exception e) {
   2.112  			log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
   2.113 -			StringReader input = new StringReader(tidyXhtml(inputText));
   2.114  			StringWriter output = new StringWriter(2 * inputText.length());
   2.115 -			paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
   2.116 +			paragraphTransformer.transform(readDOM(tidyXhtml(inputText)), new StreamResult(output));
   2.117  			paragraphedText = output.toString();
   2.118  			tidyWasUsed = true;
   2.119  		}
   2.120  
   2.121 -		Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
   2.122 +		Transformer xhtmlTransformer = transformerFactory.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
   2.123  		xhtmlTransformer.setParameter("isRoot", (parentId == 0));
   2.124  		xhtmlTransformer.setParameter("title", subject);
   2.125  		xhtmlTransformer.setParameter("urlBase", urlBase);