src/org/sonews/storage/DrupalMessage.java
author František Kučera <franta-hg@frantovo.cz>
Thu, 13 Oct 2011 03:09:22 +0200
changeset 75 41d6c0cac8b3
parent 74 e1244384cc6f
child 82 21f413541357
permissions -rw-r--r--
Drupal: číštění HTML – Tidy.
     1 /*
     2  *   SONEWS News Server
     3  *   see AUTHORS for the list of contributors
     4  *
     5  *   This program is free software: you can redistribute it and/or modify
     6  *   it under the terms of the GNU General Public License as published by
     7  *   the Free Software Foundation, either version 3 of the License, or
     8  *   (at your option) any later version.
     9  *
    10  *   This program is distributed in the hope that it will be useful,
    11  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
    12  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13  *   GNU General Public License for more details.
    14  *
    15  *   You should have received a copy of the GNU General Public License
    16  *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
    17  */
    18 package org.sonews.storage;
    19 
    20 import java.io.BufferedReader;
    21 import java.io.ByteArrayOutputStream;
    22 import java.io.IOException;
    23 import java.io.InputStream;
    24 import java.io.InputStreamReader;
    25 import java.io.PrintStream;
    26 import java.io.StringReader;
    27 import java.io.StringWriter;
    28 import java.io.UnsupportedEncodingException;
    29 import java.sql.ResultSet;
    30 import java.sql.SQLException;
    31 import java.util.ArrayList;
    32 import java.util.Date;
    33 import java.util.Enumeration;
    34 import java.util.logging.Level;
    35 import java.util.logging.Logger;
    36 import javax.mail.Header;
    37 import javax.mail.MessagingException;
    38 import javax.mail.Multipart;
    39 import javax.mail.Session;
    40 import javax.mail.internet.InternetAddress;
    41 import javax.mail.internet.MimeBodyPart;
    42 import javax.mail.internet.MimeMessage;
    43 import javax.mail.internet.MimeMultipart;
    44 import javax.xml.transform.Transformer;
    45 import javax.xml.transform.TransformerFactory;
    46 import javax.xml.transform.stream.StreamResult;
    47 import javax.xml.transform.stream.StreamSource;
    48 import org.sonews.util.io.Resource;
    49 
    50 /**
    51  * This is MimeMessage which enables custom Message-ID header
    52  * (this header will not be overwritten by the default one like in MimeMessage).
    53  * 
    54  * Also add header and body separate serialization.
    55  * 
    56  * And can be deserialized from SQL ResultSet
    57  * 
    58  * @author František Kučera (frantovo.cz)
    59  */
    60 public class DrupalMessage extends MimeMessage {
    61 
    62 	private static final Logger log = Logger.getLogger(DrupalMessage.class.getName());
    63 	private static final String MESSAGE_ID_HEADER = "Message-ID";
    64 	private static final String CRLF = "\r\n";
    65 	public static final String CHARSET = "UTF-8";
    66 	private static final String XHTML_CONTENT_TYPE = "text/html; charset=" + CHARSET;
    67 	private String messageID;
    68 
    69 	/**
    70 	 * Constructs MIME message from SQL result.
    71 	 * @param rs ResultSet containing message data. No {@link ResultSet#next()} will be called, just values from current row will be read.
    72 	 * @param constructBody true if whole message should be constructed | false if we need only message headers (body will be dummy).
    73 	 */
    74 	public DrupalMessage(ResultSet rs, String myDomain, boolean constructBody) throws SQLException, UnsupportedEncodingException, MessagingException {
    75 		super(Session.getDefaultInstance(System.getProperties()));
    76 
    77 		addHeader("Message-id", constructMessageId(rs.getInt("id"), rs.getInt("group_id"), rs.getString("group_name"), myDomain));
    78 		addHeader("Newsgroups", rs.getString("group_name"));
    79 		setFrom(new InternetAddress(rs.getString("sender_email"), rs.getString("sender_name")));
    80 		setSubject(rs.getString("subject"));
    81 		setSentDate(new Date(rs.getLong("created")));
    82 
    83 		int parentID = rs.getInt("parent_id");
    84 		if (parentID > 0) {
    85 			String parentMessageID = constructMessageId(parentID, rs.getInt("group_id"), rs.getString("group_name"), myDomain);
    86 			addHeader("In-Reply-To", parentMessageID);
    87 			addHeader("References", parentMessageID);
    88 		}
    89 
    90 		if (constructBody) {
    91 			Multipart multipart = new MimeMultipart("alternative");
    92 			setContent(multipart);
    93 
    94 			/** Plain text part */
    95 			MimeBodyPart textPart = new MimeBodyPart();
    96 			multipart.addBodyPart(textPart);
    97 			textPart.setText(readPlainText(rs));
    98 
    99 			/** XHTML part */
   100 			MimeBodyPart htmlPart = new MimeBodyPart();
   101 			multipart.addBodyPart(htmlPart);
   102 			htmlPart.setContent(readXhtmlText(rs), XHTML_CONTENT_TYPE);
   103 		} else {
   104 			setText("");
   105 		}
   106 	}
   107 
   108 	private String readPlainText(ResultSet rs) {
   109 		/**
   110 		 * TODO: převést na prostý text
   111 		 */
   112 		return "TODO: obyčejný text";
   113 	}
   114 
   115 	private String readXhtmlText(ResultSet rs) {
   116 		/**
   117 		 * TODO: znovupoužívat XSL transformér
   118 		 */
   119 		try {
   120 			String originalText = rs.getString("text");
   121 
   122 			/**
   123 			 * TODO: používat cache, ukládat si vygenerované články
   124 			 * 
   125 			 * 
   126 			 * Místo markdownu jen ošetřit:
   127 			 *		- odstavce
   128 			 *		- nesmyslné entity v odkazech
   129 			 *		- neuzavřené značky: br, hr, img
   130 			 */
   131 			String tidyTexy = tidyXhtml("<html><body>" + originalText + "</body></html>");
   132 
   133 
   134 
   135 			StringReader input = new StringReader(tidyTexy);
   136 			StringWriter output = new StringWriter(2 * tidyTexy.length());
   137 			TransformerFactory tf = TransformerFactory.newInstance();
   138 			Transformer t = tf.newTransformer(new StreamSource(Resource.getAsStream("helpers/mimeXhtmlPart.xsl")));
   139 			t.setParameter("isRoot", (rs.getInt("parent_id") == 0));
   140 			t.setParameter("title", rs.getString("subject"));
   141 			t.setParameter("urlBase", rs.getString("urlBase"));
   142 			t.setParameter("wwwRead", rs.getString("wwwRead"));
   143 			t.setParameter("wwwPost", rs.getString("wwwPost"));
   144 			t.transform(new StreamSource(input), new StreamResult(output));
   145 
   146 			return output.toString();
   147 		} catch (Exception e) {
   148 			/**
   149 			 * TODO: lepší ošetření chyby
   150 			 */
   151 			log.log(Level.WARNING, "Error while transforming article to XHTML", e);
   152 			return "<html><body><p>Při transformaci příspěvku bohužel došlo k chybě.</p></body></html>";
   153 		}
   154 	}
   155 
   156 	/**
   157 	 * TODO: refaktorovat, přesunout
   158 	 */
   159 	private static String tidyXhtml(String inputText) throws IOException {
   160 		Runtime r = Runtime.getRuntime();
   161 		Process p = r.exec(new String[]{"tidy",
   162 					"-asxml",
   163 					"-numeric",
   164 					"-utf8",
   165 					"-quiet",
   166 					"--doctype", "omit",
   167 					"--logical-emphasis", "true",
   168 					"--show-errors", "0"});
   169 
   170 		PrintStream vstupProcesu = new PrintStream(p.getOutputStream());
   171 		vstupProcesu.print(inputText);
   172 		vstupProcesu.close();
   173 
   174 		String outputText = streamToString(p.getInputStream());
   175 
   176 		return outputText;
   177 	}
   178 
   179 	/**
   180 	 * TODO: refaktorovat, přesunout
   181 	 */
   182 	private static String streamToString(InputStream proud) throws IOException {
   183 		StringBuilder výsledek = new StringBuilder();
   184 		BufferedReader buf = new BufferedReader(new InputStreamReader(proud));
   185 		while (true) {
   186 			String radek = buf.readLine();
   187 			if (radek == null) {
   188 				break;
   189 			} else {
   190 				výsledek.append(radek);
   191 				výsledek.append("\n");
   192 			}
   193 		}
   194 		return výsledek.toString();
   195 	}
   196 
   197 	private static String constructMessageId(int articleID, int groupID, String groupName, String domainName) {
   198 		StringBuilder sb = new StringBuilder();
   199 		sb.append("<");
   200 		sb.append(articleID);
   201 		sb.append("-");
   202 		sb.append(groupID);
   203 		sb.append("-");
   204 		sb.append(groupName);
   205 		sb.append("@");
   206 		sb.append(domainName);
   207 		sb.append(">");
   208 		return sb.toString();
   209 	}
   210 
   211 	@Override
   212 	public void setHeader(String name, String value) throws MessagingException {
   213 		super.setHeader(name, value);
   214 
   215 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   216 			messageID = value;
   217 		}
   218 	}
   219 
   220 	@Override
   221 	public final void addHeader(String name, String value) throws MessagingException {
   222 		super.addHeader(name, value);
   223 
   224 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   225 			messageID = value;
   226 		}
   227 	}
   228 
   229 	@Override
   230 	public void removeHeader(String name) throws MessagingException {
   231 		super.removeHeader(name);
   232 
   233 		if (MESSAGE_ID_HEADER.equalsIgnoreCase(name)) {
   234 			messageID = null;
   235 		}
   236 	}
   237 
   238 	public void setMessageID(String messageID) {
   239 		this.messageID = messageID;
   240 	}
   241 
   242 	@Override
   243 	protected void updateMessageID() throws MessagingException {
   244 		if (messageID == null) {
   245 			super.updateMessageID();
   246 		} else {
   247 			setHeader(MESSAGE_ID_HEADER, messageID);
   248 		}
   249 	}
   250 
   251 	/**
   252 	 * Call {@link #saveChanges()} before this method, if you want all headers including such ones like:
   253 	 * 
   254 	 * <pre>MIME-Version: 1.0
   255 	 *Content-Type: multipart/alternative;</pre>
   256 	 * 
   257 	 * @return serialized headers
   258 	 * @throws MessagingException if getAllHeaders() fails
   259 	 */
   260 	public String getHeaders() throws MessagingException {
   261 		StringBuilder sb = new StringBuilder();
   262 		for (Enumeration eh = getAllHeaderLines(); eh.hasMoreElements();) {
   263 			sb.append(eh.nextElement());
   264 			sb.append(CRLF);
   265 		}
   266 		return sb.toString();
   267 	}
   268 
   269 	public byte[] getBody() throws IOException, MessagingException {
   270 		saveChanges();
   271 
   272 		ArrayList<String> skipHeaders = new ArrayList<String>();
   273 		for (Enumeration eh = getAllHeaders(); eh.hasMoreElements();) {
   274 			Header h = (Header) eh.nextElement();
   275 			skipHeaders.add(h.getName());
   276 		}
   277 
   278 		ByteArrayOutputStream baos = new ByteArrayOutputStream(1024);
   279 		writeTo(baos, skipHeaders.toArray(new String[skipHeaders.size()]));
   280 		return baos.toByteArray();
   281 	}
   282 }