src/org/sonews/storage/DrupalMessage.java
author František Kučera <franta-hg@frantovo.cz>
Thu, 20 Oct 2011 09:59:04 +0200
changeset 102 d843b4fee5dc
parent 100 08c9fb6fb017
child 103 a788bf0e1080
permissions -rw-r--r--
Drupal: posílání zpráv do skupiny.
     1 /*
     2  *   SONEWS News Server
     3  *   see AUTHORS for the list of contributors
     4  *
     5  *   This program is free software: you can redistribute it and/or modify
     6  *   it under the terms of the GNU General Public License as published by
     7  *   the Free Software Foundation, either version 3 of the License, or
     8  *   (at your option) any later version.
     9  *
    10  *   This program is distributed in the hope that it will be useful,
    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  *   GNU General Public License for more details.
    14  *
    15  *   You should have received a copy of the GNU General Public License
    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 package org.sonews.storage;
    19 
    20 import java.io.BufferedReader;
    21 import java.io.ByteArrayInputStream;
    22 import java.io.ByteArrayOutputStream;
    23 import java.io.IOException;
    24 import java.io.InputStream;
    25 import java.io.InputStreamReader;
    26 import java.io.PrintStream;
    27 import java.io.StringReader;
    28 import java.io.StringWriter;
    29 import java.io.UnsupportedEncodingException;
    30 import java.sql.ResultSet;
    31 import java.sql.SQLException;
    32 import java.util.ArrayList;
    33 import java.util.Date;
    34 import java.util.Enumeration;
    35 import java.util.logging.Level;
    36 import java.util.logging.Logger;
    37 import javax.mail.Header;
    38 import javax.mail.MessagingException;
    39 import javax.mail.Multipart;
    40 import javax.mail.Session;
    41 import javax.mail.internet.InternetAddress;
    42 import javax.mail.internet.MimeBodyPart;
    43 import javax.mail.internet.MimeMessage;
    44 import javax.mail.internet.MimeMultipart;
    45 import javax.xml.transform.Transformer;
    46 import javax.xml.transform.TransformerFactory;
    47 import javax.xml.transform.stream.StreamResult;
    48 import javax.xml.transform.stream.StreamSource;
    49 import org.sonews.daemon.NNTPConnection;
    50 import org.sonews.util.io.Resource;
    51 
    52 /**
    53  * This is MimeMessage which enables custom Message-ID header
    54  * (this header will not be overwritten by the default one like in MimeMessage).
    55  * 
    56  * Also add header and body separate serialization.
    57  * 
    58  * And can be deserialized from SQL ResultSet
    59  * 
    60  * @author František Kučera (frantovo.cz)
    61  */
    62 public class DrupalMessage extends MimeMessage {
    63 
    64 	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
    65 	private static final String MESSAGE_ID_HEADER = "Message-ID";
    66 	private static final String CRLF = "\r\n";
    67 	public static final String CHARSET = "UTF-8";
    68 	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
    69 	private static final String ZNAKČKA_KONCE_ŘÁDKU = "◆";
    70 	private String messageID;
    71 	private Long parentID;
    72 	private Long groupID;
    73 
    74 	/**
    75 	 * Constructs MIME message from SQL result.
    76 	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
    77 	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
    78 	 */
    79 	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException {
    80 		super(Session.getDefaultInstance(System.getProperties()));
    81 
    82 		groupID = rs.getLong("group_id");
    83 		addHeader("Message-id", constructMessageId(rs.getInt("id"), groupID, rs.getString("group_name"), myDomain));
    84 		addHeader("Newsgroups", rs.getString("group_name"));
    85 		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
    86 		setSubject(rs.getString("subject"));
    87 		setSentDate(new Date(rs.getLong("created")));
    88 
    89 		parentID = rs.getLong("parent_id");
    90 		if (parentID > 0) {
    91 			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
    92 			addHeader("In-Reply-To", parentMessageID);
    93 			addHeader("References", parentMessageID);
    94 		}
    95 
    96 		if (constructBody) {
    97 			Multipart multipart = new MimeMultipart("alternative");
    98 			setContent(multipart);
    99 
   100 			/** XHTML part */
   101 			MimeBodyPart htmlPart = new MimeBodyPart();
   102 			String xhtmlText = readXhtmlText(rs);
   103 			htmlPart.setContent(xhtmlText, XHTML_CONTENT_TYPE);
   104 
   105 			/** Plain text part */
   106 			MimeBodyPart textPart = new MimeBodyPart();
   107 			String plainText = readPlainText(rs, xhtmlText);
   108 			textPart.setText(plainText);
   109 			//addHeader("Lines", String.valueOf(plainText.split("\n").length));
   110 
   111 			/**
   112 			 * Thunderbirdu záleží, v jakém pořadí části jsou 
   113 			 * (když je prostý text druhý, html se nezobrazí),
   114 			 * KNode zobrazuje HTML správně, i když je na prvním místě.
   115 			 */
   116 			multipart.addBodyPart(textPart);
   117 			multipart.addBodyPart(htmlPart);
   118 		} else {
   119 			/** empty body, just headers */
   120 			setText("");
   121 		}
   122 	}
   123 
   124 	/**
   125 	 * Constructs MIME message from article posted by user.
   126 	 * @param article article that came through NNTP.
   127 	 * @throws MessagingException 
   128 	 */
   129 	public DrupalMessage(Article article) throws MessagingException {
   130 		super(Session.getDefaultInstance(System.getProperties()), serializeArticle(article));
   131 
   132 		String[] parentHeaders = getHeader("In-Reply-To");
   133 		if (parentHeaders.length == 1) {
   134 			String parentMessageID = parentHeaders[0];
   135 			parentID = parseArticleID(parentMessageID);
   136 			groupID = parseGroupID(parentMessageID);
   137 		} else {
   138 			throw new MessagingException("Message posted by user must have exactly one In-Reply-To header.");
   139 		}
   140 	}
   141 
   142 	private static InputStream serializeArticle(Article a) {
   143 		byte articleHeaders[] = a.getHeaderSource().getBytes();
   144 		byte delimiter[] = (NNTPConnection.NEWLINE + NNTPConnection.NEWLINE).getBytes();
   145 		byte body[] = a.getBody();
   146 
   147 		byte message[] = new byte[articleHeaders.length + delimiter.length + body.length];
   148 
   149 		System.arraycopy(articleHeaders, 0, message, 0, articleHeaders.length);
   150 		System.arraycopy(delimiter, 0, message, articleHeaders.length, delimiter.length);
   151 		System.arraycopy(body, 0, message, articleHeaders.length + delimiter.length, body.length);
   152 
   153 		return new ByteArrayInputStream(message);
   154 	}
   155 
   156 	private String readPlainText(ResultSet rs, String xhtmlText) {
   157 		try {
   158 			TransformerFactory tf = TransformerFactory.newInstance();
   159 			Transformer textTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeTextPart.xsl")));
   160 
   161 			StringReader input = new StringReader(xhtmlText);
   162 			StringWriter output = new StringWriter(xhtmlText.length());
   163 			textTransformer.transform(new StreamSource(input), new StreamResult(output));
   164 
   165 			return output.toString();
   166 		} catch (Exception e) {
   167 			/**
   168 			 * TODO: lepší ošetření chyby
   169 			 */
   170 			log.log(Level.WARNING, "Error while transforming article to plain text", e);
   171 			return makeSimpleXHTML("Při transformaci příspěvku bohužel došlo k chybě.");
   172 		}
   173 	}
   174 
   175 	private String readXhtmlText(ResultSet rs) {
   176 		/**
   177 		 * TODO: 
   178 		 *		- znovupoužívat XSL transformér
   179 		 *		- používat cache, ukládat si vygenerované články
   180 		 */
   181 		try {
   182 			String inputText = makeSimpleXHTML(rs.getString("text"));
   183 
   184 			TransformerFactory tf = TransformerFactory.newInstance();
   185 			Transformer paragraphTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart-make-paragraphs.xsl")));
   186 
   187 			String paragraphedText;
   188 			boolean tidyWasUsed = false;
   189 			try {
   190 				StringReader input = new StringReader(inputText);
   191 				StringWriter output = new StringWriter(2 * inputText.length());
   192 				paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
   193 				paragraphedText = output.toString();
   194 			} catch (Exception e) {
   195 				log.log(Level.FINER, "HTML input was shitty – Tidy had to be called.", e);
   196 				StringReader input = new StringReader(tidyXhtml(inputText));
   197 				StringWriter output = new StringWriter(2 * inputText.length());
   198 				paragraphTransformer.transform(new StreamSource(input), new StreamResult(output));
   199 				paragraphedText = output.toString();
   200 				tidyWasUsed = true;
   201 			}
   202 
   203 			Transformer xhtmlTransformer = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
   204 			xhtmlTransformer.setParameter("isRoot", (rs.getInt("parent_id") == 0));
   205 			xhtmlTransformer.setParameter("title", rs.getString("subject"));
   206 			xhtmlTransformer.setParameter("urlBase", rs.getString("urlBase"));
   207 			xhtmlTransformer.setParameter("wwwRead", rs.getString("wwwRead"));
   208 			xhtmlTransformer.setParameter("wwwPost", rs.getString("wwwPost"));
   209 			xhtmlTransformer.setParameter("headComment", String.format("Drupal-NNTP bridge. Transformed: %1$tc. Tidy had to be used: %2$b", new Date(), tidyWasUsed));
   210 			StringReader input = new StringReader(paragraphedText);
   211 			StringWriter output = new StringWriter(2 * paragraphedText.length());
   212 			xhtmlTransformer.transform(new StreamSource(input), new StreamResult(output));
   213 
   214 			return output.toString();
   215 		} catch (Exception e) {
   216 			/**
   217 			 * TODO: lepší ošetření chyby
   218 			 */
   219 			log.log(Level.WARNING, "Error while transforming article to XHTML", e);
   220 			return makeSimpleXHTML("<p>Při transformaci příspěvku bohužel došlo k chybě.</p>");
   221 		}
   222 	}
   223 
   224 	private static String makeSimpleXHTML(String body) {
   225 		return "<html xmlns=\"http://www.w3.org/1999/xhtml\"><body>" + body + "</body></html>";
   226 	}
   227 
   228 	/**
   229 	 * TODO: refaktorovat, přesunout
   230 	 */
   231 	private static String tidyXhtml(String inputText) throws IOException {
   232 		/*
   233 		 * Viz https://sourceforge.net/tracker/index.php?func=detail&aid=3424437&group_id=27659&atid=390966
   234 		 *
   235 		 * TODO:
   236 		 *		- použít delší zástupný řetězec, ne jen jeden znak
   237 		 *		- umísťovat ho jen tam, kde už nějaký text je (ne mezi >\s*<)
   238 		 */
   239 		inputText = označKonceŘádků(inputText);
   240 
   241 		Runtime r = Runtime.getRuntime();
   242 		Process p = r.exec(new String[]{"tidy", // http://tidy.sourceforge.net
   243 					"-asxml", // well formed XHTML
   244 					"-numeric", // číselné entity
   245 					"-utf8", // kódování
   246 					"--show-warnings", "false", // žádná varování nás nezajímají
   247 					"--show-errors", "0", // ani chyby
   248 					"--doctype", "omit", // doctype nepotřebujeme (doplníme si případně vlastní v XSLT)
   249 					"--logical-emphasis", "true", // em a strong místo i a b
   250 					"--literal-attributes", "true", // zachovat mezery a konce řádků v atributech
   251 					"--force-output", "true" // neznámé značky zahodíme, vložíme jen jejich obsah
   252 				});
   253 
   254 		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
   255 		vstupProcesu.print(inputText);
   256 		vstupProcesu.close();
   257 
   258 		String outputText = streamToString(p.getInputStream());
   259 
   260 		outputText = vraťKonceŘádků(outputText);
   261 
   262 		return outputText;
   263 	}
   264 
   265 	private static String označKonceŘádků(String text) {
   266 		text = text.replaceAll(">\\s+<", "> <");
   267 		text = text.replaceAll("\\n", ZNAKČKA_KONCE_ŘÁDKU + "\n");
   268 		return text;
   269 	}
   270 
   271 	private static String vraťKonceŘádků(String text) {
   272 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU + "\\n", "\n");
   273 		text = text.replaceAll(ZNAKČKA_KONCE_ŘÁDKU, "\n");
   274 		return text;
   275 	}
   276 
   277 	/**
   278 	 * TODO: refaktorovat, přesunout
   279 	 */
   280 	private static String streamToString(InputStream proud) throws IOException {
   281 		StringBuilder výsledek = new StringBuilder();
   282 		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
   283 		while (true) {
   284 			String radek = buf.readLine();
   285 			if (radek == null) {
   286 				break;
   287 			} else {
   288 				výsledek.append(radek);
   289 				výsledek.append("\n");
   290 			}
   291 		}
   292 		return výsledek.toString();
   293 	}
   294 
   295 	public static String constructMessageId(long articleID, long groupID, String groupName, String domainName) {
   296 		StringBuilder sb = new StringBuilder();
   297 		sb.append("<");
   298 		sb.append(articleID);
   299 		sb.append("-");
   300 		sb.append(groupID);
   301 		sb.append("-");
   302 		sb.append(groupName);
   303 		sb.append("@");
   304 		sb.append(domainName);
   305 		sb.append(">");
   306 		return sb.toString();
   307 	}
   308 
   309 	/**
   310 	 * @return article ID of parent of this message | or null, if this is root article and not reply to another one
   311 	 */
   312 	public Long getParentID() {
   313 		return parentID;
   314 	}
   315 
   316 	/**
   317 	 * @return group ID of this message | or null, if this message is not reply to any other one – which is wrong because we have to know the group
   318 	 */
   319 	public Long getGroupID() {
   320 		return groupID;
   321 	}
   322 
   323 	/**
   324 	 * 
   325 	 * @param messageID &lt;{0}-{1}-{2}@domain.tld&gt; where {0} is nntp_id and {1} is group_id and {2} is group_name
   326 	 * @return array where [0] = nntp_id and [1] = group_id and [2] = group_name or returns null if messageID is invalid
   327 	 */
   328 	private static String[] parseMessageID(String messageID) {
   329 		if (messageID.matches("<[0-9]+\\-[0-9]+\\-[a-z0-9\\.]+@.+>")) {
   330 			return messageID.substring(1).split("@")[0].split("\\-");
   331 		} else {
   332 			return null;
   333 		}
   334 	}
   335 
   336 	public static Long parseArticleID(String messageID) {
   337 		String[] localPart = parseMessageID(messageID);
   338 		if (localPart == null) {
   339 			return null;
   340 		} else {
   341 			return Long.parseLong(localPart[0]);
   342 		}
   343 	}
   344 
   345 	public static Long parseGroupID(String messageID) {
   346 		String[] localPart = parseMessageID(messageID);
   347 		if (localPart == null) {
   348 			return null;
   349 		} else {
   350 			return Long.parseLong(localPart[1]);
   351 			// If needed:
   352 			// parseGroupName() will be same as this method, just with:
   353 			// return localPart[2];
   354 		}
   355 	}
   356 
   357 	@Override
   358 	public void setHeader(String name, String value) throws MessagingException {
   359 		super.setHeader(name, value);
   360 
   361 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   362 			messageID = value;
   363 		}
   364 	}
   365 
   366 	@Override
   367 	public final void addHeader(String name, String value) throws MessagingException {
   368 		super.addHeader(name, value);
   369 
   370 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   371 			messageID = value;
   372 		}
   373 	}
   374 
   375 	@Override
   376 	public void removeHeader(String name) throws MessagingException {
   377 		super.removeHeader(name);
   378 
   379 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   380 			messageID = null;
   381 		}
   382 	}
   383 
   384 	public void setMessageID(String messageID) {
   385 		this.messageID = messageID;
   386 	}
   387 
   388 	@Override
   389 	protected void updateMessageID() throws MessagingException {
   390 		if (messageID == null) {
   391 			super.updateMessageID();
   392 		} else {
   393 			setHeader(MESSAGE_ID_HEADER, messageID);
   394 		}
   395 	}
   396 
   397 	/**
   398 	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
   399 	 * 
   400 	 * <pre>MIME-Version: 1.0
   401 	 *Content-Type: multipart/alternative;</pre>
   402 	 * 
   403 	 * @return serialized headers
   404 	 * @throws MessagingException if getAllHeaders() fails
   405 	 */
   406 	public String getHeaders() throws MessagingException {
   407 		StringBuilder sb = new StringBuilder();
   408 		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
   409 			sb.append(eh.nextElement());
   410 			sb.append(CRLF);
   411 		}
   412 		return sb.toString();
   413 	}
   414 
   415 	public byte[] getBody() throws IOException, MessagingException {
   416 		saveChanges();
   417 
   418 		ArrayList<String> skipHeaders = new ArrayList<String>();
   419 		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
   420 			Header h = (Header) eh.nextElement();
   421 			skipHeaders.add(h.getName());
   422 		}
   423 
   424 		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
   425 		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
   426 		return baos.toByteArray();
   427 	}
   428 }