Page MenuHomeIn-Portal Phabricator

mime_decode_helper.php
No OneTemporary

File Metadata

Created
Fri, Nov 21, 4:34 AM

mime_decode_helper.php

<?php
/**
* @version $Id: mime_decode_helper.php 14241 2011-03-16 20:24:35Z alex $
* @package In-Portal
* @copyright Copyright (C) 1997 - 2009 Intechnic. All rights reserved.
* @license GNU/GPL
* In-Portal is Open Source software.
* This means that this software may have been modified pursuant
* the GNU General Public License, and as distributed it includes
* or is derivative of works licensed under the GNU General Public License
* or other free or open source software licenses.
* See http://www.in-portal.org/license for copyright notices and details.
*/
defined('FULL_PATH') or die('restricted access!');
/**
* The MIME decoding class
*
*/
class MimeDecodeHelper extends kHelper {
/**
* Contains headers part of email message
*
* @var string
*/
var $_headerPart;
/**
* Contains body part of email message
*
* @var string
*/
var $_bodyPart;
/**
* Last parsing error message (if any)
*
* @var string
*/
var $_lastErrorMessage = '';
/**
* Decode message headers
*
* @var bool
*/
var $_decodeHeaders = false;
/**
* Include email body in decoded result
*
* @var bool
*/
var $_includeBodies = true;
/**
* Decode email body (only in case, when it will be included in result)
*
* @var bool
*/
var $_decodeBodies = false;
/**
* Displays parsing error
*
* @param string $str
*/
function raiseError($str)
{
trigger_error('Error during email parsing: ' . $str, E_USER_WARNING);
}
/**
* Initializes mime parsing using given email message
*
* @param string $message
*/
function InitHelper($message = null)
{
if (!isset($message)) {
return ;
}
list ($header, $body) = $this->_splitBodyHeader($message);
$this->_headerPart = $header;
$this->_bodyPart = $body;
}
/**
* Decodes email message, that was previously set using InitHelper method
*
* @param bool $decode_headers
* @param bool $include_bodies
* @param bool $decode_bodies
* @return stdClass
*/
function decode($decode_headers = false, $include_bodies = false, $decode_bodies = false)
{
$this->_decodeHeaders = $decode_headers;
$this->_includeBodies = $include_bodies;
$this->_decodeBodies = $decode_bodies;
$ret = $this->decodePart($this->_headerPart, $this->_bodyPart);
if ($ret === false) {
$this->raiseError($this->_lastErrorMessage);
return false;
}
return $ret;
}
function decodePart($headers, $body, $default_ctype = 'text/plain', $only_headers = false)
{
$return = new stdClass;
// process headers
$return->headers = Array ();
$headers = $this->_parseHeaders($headers, $this->_decodeHeaders);
$single_headers = Array ('subject', 'from', 'to', 'cc', 'reply-to', 'date');
foreach ($headers as $value) {
$header_name = strtolower($value['name']);
$header_value = $only_headers ? $this->_decodeHeader($value['value']) : $value['value'];
if (array_key_exists($header_name, $return->headers) && !is_array($return->headers[$header_name]) && !in_array($header_name, $single_headers)) {
// this is not a single header, so convert it to array, when 2nd value is found
$return->headers[$header_name] = Array ( $return->headers[$header_name] );
$return->headers[$header_name][] = $header_value;
}
elseif (array_key_exists($header_name, $return->headers) && !in_array($header_name, $single_headers)) {
$return->headers[$header_name][] = $header_value;
}
else {
$return->headers[$header_name] = $header_value;
}
}
if ($only_headers) {
return $return->headers;
}
foreach ($headers as $value) {
$header_name = strtolower($value['name']);
$header_value = $value['value'];
switch ($header_name) {
case 'content-type':
$content_type = $this->_parseHeaderValue($header_value);
if (preg_match('/([0-9a-z+.-]+)\/([0-9a-z+.-]+)/i', $content_type['value'], $regs)) {
// "text/plain", "text/html", etc.
$return->ctype_primary = $regs[1];
$return->ctype_secondary = $regs[2];
}
if (array_key_exists('other', $content_type)) {
// "charset", etc.
foreach ($content_type['other'] as $p_name => $p_value) {
$return->ctype_parameters["$p_name"] = $p_value;
}
}
break;
case 'content-disposition';
$content_disposition = $this->_parseHeaderValue($header_value);
$return->disposition = $content_disposition['value'];
if (array_key_exists('other', $content_disposition)) {
// "filename", etc.
foreach ($content_disposition['other'] as $p_name => $p_value) {
$return->d_parameters["$p_name"] = $p_value;
}
}
break;
case 'content-transfer-encoding':
$content_transfer_encoding = $this->_parseHeaderValue($header_value);
break;
}
}
// process message body
if (isset($content_type)) {
switch ( strtolower($content_type['value']) ) {
case 'text/plain':
case 'text/html':
if ($this->_includeBodies) {
$encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
$return->body = $this->_decodeBodies ? $this->_decodeBody($body, $encoding) : $body;
}
break;
case 'multipart/parallel':
case 'multipart/report': // RFC1892
case 'multipart/signed': // PGP
case 'multipart/digest':
case 'multipart/alternative':
case 'multipart/appledouble':
case 'multipart/related':
case 'multipart/mixed':
if (!isset($content_type['other']['boundary'])) {
$this->_lastErrorMessage = 'No boundary found for ' . $content_type['value'] . ' part';
return false;
}
$default_ctype = (strtolower($content_type['value']) === 'multipart/digest') ? 'message/rfc822' : 'text/plain';
$parts = $this->_boundarySplit($body, $content_type['other']['boundary']);
for ($i = 0; $i < count($parts); $i++) {
list ($part_header, $part_body) = $this->_splitBodyHeader($parts[$i]);
$part = $this->decodePart($part_header, $part_body, $default_ctype);
if ($part === false) {
// part is broken
$this->raiseError($this->_lastErrorMessage);
}
$return->parts[] = $part;
}
break;
case 'message/rfc822':
case 'message/disposition-notification':
// create another instance, not to interfear with main parser
$mime_decode_helper =& $this->Application->makeClass('MimeDecodeHelper');
/* @var $mime_decode_helper MimeDecodeHelper */
$mime_decode_helper->InitHelper($body);
$return->parts[] = $mime_decode_helper->decode(true, $this->_includeBodies, $this->_decodeBodies);
unset($mime_decode_helper);
break;
default:
if ($this->_includeBodies) {
$encoding = isset($content_transfer_encoding) ? $content_transfer_encoding['value'] : '7bit';
$return->body = $this->_decodeBodies ? $this->_decodeBody($body, $encoding) : $body;
}
break;
}
} else {
$ctype = explode('/', $default_ctype);
$return->ctype_primary = $ctype[0];
$return->ctype_secondary = $ctype[1];
if ($this->_includeBodies) {
$return->body = $this->_decodeBodies ? $this->_decodeBody($body) : $body;
}
}
return $return;
}
/**
* Divides message into header and body parts
*
* @param string $input
* @return Array
*/
function _splitBodyHeader($input)
{
if (strpos($input, "\r\n\r\n") === false) {
return Array ($input, '');
} elseif (preg_match("/^(.*?)\r?\n\r?\n(.*)/s", $input, $match)) {
return Array ($match[1], $match[2]);
} else {
$this->_lastErrorMessage = 'Could not split header and body';
return false;
}
}
/**
* Parses headers string into array and optionally decode them
*
* @param string $input
* @param bool $decode
* @return Array
*/
function _parseHeaders($input, $decode = false)
{
if (!$input) {
return Array ();
}
$ret = Array ();
// Unfold the input
$input = preg_replace("/\r\n/", "\n", $input);
$input = preg_replace("/\n(\t| )+/", ' ', $input);
$headers = explode("\n", trim($input));
foreach ($headers as $value) {
$pos = strpos($value, ':');
$hdr_name = substr($value, 0, $pos);
$hdr_value = substr($value, $pos + 1);
if ($hdr_value[0] == ' ') {
$hdr_value = substr($hdr_value, 1);
}
$ret[] = Array (
'name' => $hdr_name,
'value' => $decode ? $this->_decodeHeader($hdr_value) : $hdr_value
);
}
return $ret;
}
/**
* Parses header value in following format (without quotes): "multipart/alternative; boundary=001636c9274051e332048498d8cc"
*
* @param string $input
* @return Array
*/
function _parseHeaderValue($input)
{
$ret = Array ();
$pos = strpos($input, ';');
if ($pos === false) {
$ret['value'] = trim($input);
return $ret;
}
// get text until first ";"
$ret['value'] = trim(substr($input, 0, $pos));
$input = trim(substr($input, $pos + 1));
if (strlen($input) > 0) {
// This splits on a semi-colon, if there's no preceeding backslash
// Can't handle if it's in double quotes however. (Of course anyone
// sending that needs a good slap).
$parameters = preg_split('/\s*(?<!\\\\);\s*/i', $input);
for ($i = 0; $i < count($parameters); $i++) {
$pos = strpos($parameters[$i], '=');
$param_name = substr($parameters[$i], 0, $pos);
$param_value = substr($parameters[$i], $pos + 1);
if ($param_value[0] == '"') {
$param_value = substr($param_value, 1, -1);
}
$ret['other']["$param_name"] = $param_value;
$ret['other'][ strtolower($param_name) ] = $param_value;
}
}
return $ret;
}
/**
* Splits input body using given boundary
*
* @param string $input
* @param string $boundary
* @return Array
*/
function _boundarySplit($input, $boundary)
{
$tmp = explode('--' . $boundary, $input);
for ($i = 1; $i < count($tmp) - 1; $i++) {
$parts[] = $tmp[$i];
}
return $parts;
}
/**
* Decode message header value
*
* @param string $input
* @return string
*/
function _decodeHeader($input)
{
// Remove white space between encoded-words (http://www.ietf.org/rfc/rfc2047.txt)
$regexp = '/(=\?[^?]+\?(Q|B)\?[^?]*\?=)(\s)+=\?/i';
while (preg_match($regexp, $input)) {
// process each word separately
$input = preg_replace($regexp, '\1=?', $input);
}
// For each encoded-word...
while (preg_match('/(=\?([^?]+)\?(Q|B)\?([^?]*)\?=)/i', $input, $matches)) {
$encoded = $matches[1];
$charset = $matches[2];
$encoding = $matches[3];
$text = $matches[4];
switch (strtoupper($encoding)) {
case 'B':
$text = base64_decode($text);
break;
case 'Q':
// $text = $this->_quotedPrintableDecode($text);
$text = str_replace('_', ' ', $text);
preg_match_all('/=([a-f0-9]{2})/i', $text, $matches);
foreach($matches[1] as $value) {
$text = str_replace('=' . $value, chr(hexdec($value)), $text);
}
break;
}
$input = $this->convertEncoding($charset, str_replace($encoded, $text, $input));
}
return $input;
}
/**
* Converts encoding to one, that site uses
*
* @param string $from_engoding
* @param string $text
* @return string
* @author Alex
*/
function convertEncoding($from_engoding, $text)
{
if (!function_exists('mb_convert_encoding')) {
// if mbstring extension not installed
return $text;
}
static $to_encoding = false;
if ($to_encoding === false) {
$language =& $this->Application->recallObject('lang.current');
/* @var $language LanguagesItem */
$to_encoding = $language->GetDBField('Charset');
}
return mb_convert_encoding($text, $to_encoding, $from_engoding);
}
/**
* Decodes message body
*
* @param string $input
* @param string $encoding
* @return string
*/
function _decodeBody($input, $encoding = '7bit')
{
switch (strtolower($encoding)) {
case 'quoted-printable':
return $this->_quotedPrintableDecode($input);
break;
case 'base64':
return base64_decode($input);
break;
}
// for 7bit, 8bit, anything else
return $input;
}
/**
* Decodes "quoted-printable" encoding
*
* @param string $string
* @return string
*/
function _quotedPrintableDecode($string)
{
// Remove soft line breaks
$string = preg_replace("/=\r?\n/", '', $string);
// Replace encoded characters
if (preg_match_all('/=[a-f0-9]{2}/i', $string, $matches)) {
$matches = array_unique($matches[0]);
foreach ($matches as $value) {
$string = str_replace($value, chr(hexdec(substr($value,1))), $string);
}
}
return $string;
}
}

Event Timeline