00001 #ifndef _MIMETIC_PARSER_ITPARSER_H_
00002 #define _MIMETIC_PARSER_ITPARSER_H_
00003 #include <iterator>
00004 #include <algorithm>
00005 #include <stack>
00006 #include <iostream>
00007 #include <mimetic/tree.h>
00008 #include <mimetic/utils.h>
00009 #include <mimetic/mimeentity.h>
00010
00011
00012
00013
00014 namespace mimetic
00015 {
00016
00017
00018 template<typename Iterator,
00019 typename ItCategory=typename std::iterator_traits<Iterator>::iterator_category>
00020 struct IteratorParser
00021 {
00022 };
00023
00024
00025
00026
00027 template<typename Iterator>
00028 struct IteratorParser<Iterator, std::input_iterator_tag>
00029 {
00030
00031 IteratorParser(MimeEntity& me)
00032 : m_me(me), m_iMask(imNone), m_lastBoundary(NoBoundary)
00033 {
00034 m_entityStack.push(&m_me);
00035 }
00036 virtual ~IteratorParser()
00037 {
00038 }
00039
00040
00041
00042 void iMask(size_t mask) { m_iMask = mask; }
00043
00044
00045
00046 size_t iMask() const { return m_iMask; }
00047
00048
00049
00050 void run(Iterator bit, Iterator eit)
00051 {
00052 m_bit = bit;
00053 m_eit = eit;
00054 doLoad();
00055 }
00056 protected:
00057 typedef std::list<std::string> BoundaryList;
00058 enum {
00059 CR = 0xD,
00060 LF = 0xA,
00061 NL = '\n'
00062 };
00063 enum {
00064 peIgnore,
00065 pePreamble,
00066 peBody,
00067 peEpilogue
00068 };
00069 enum BoundaryType {
00070 NoBoundary = 0,
00071 Boundary,
00072 ClosingBoundary,
00073 HigherLevelBoundary
00074
00075 };
00076 enum EntityType {
00077 etRfc822,
00078 etMsgRfc822,
00079 etMultipart
00080 };
00081
00082 MimeEntity& m_me;
00083 Iterator m_bit, m_eit;
00084 size_t m_iMask;
00085 BoundaryList m_boundaryList;
00086 BoundaryType m_lastBoundary;
00087 std::stack<MimeEntity*> m_entityStack;
00088
00089 protected:
00090 void appendPreambleBlock(const char* buf, int sz)
00091 {
00092 MimeEntity* pMe = m_entityStack.top();
00093 pMe->body().preamble().append(buf,sz);
00094 }
00095
00096 void appendEpilogueBlock(const char* buf, int sz)
00097 {
00098 MimeEntity* pMe = m_entityStack.top();
00099 pMe->body().epilogue().append(buf,sz);
00100 }
00101
00102 void appendBodyBlock(const char* buf, int sz)
00103 {
00104 MimeEntity* pMe = m_entityStack.top();
00105 pMe->body().append(buf, sz);
00106 }
00107
00108 std::string getBoundary()
00109 {
00110 const MimeEntity* pMe = m_entityStack.top();
00111 const ContentType& ct = pMe->header().contentType();
00112 return std::string("--") + ct.param("boundary");
00113 }
00114
00115 void popChild()
00116 {
00117 m_entityStack.pop();
00118 }
00119
00120 void pushNewChild()
00121 {
00122 MimeEntity* pMe = m_entityStack.top();
00123 MimeEntity* pChild = new MimeEntity;
00124 pMe->body().parts().push_back(pChild);
00125 m_entityStack.push(pChild);
00126 }
00127
00128 EntityType getType()
00129 {
00130 MimeEntity* pMe = m_entityStack.top();
00131 const Header& h = pMe->header();
00132
00133
00134 const ContentType& ct = h.contentType();
00135 if(ct.isMultipart())
00136 return etMultipart;
00137 else if (ct.type() == "message" && ct.subtype() == "rfc822")
00138 return etMsgRfc822;
00139 else
00140 return etRfc822;
00141 }
00142
00143 void addField(const std::string& name, const std::string& value)
00144 {
00145 MimeEntity* pMe = m_entityStack.top();
00146 Header& h = pMe->header();
00147 Header::iterator it = h.insert(h.end(), Field());
00148 it->name(name);
00149 it->value(value);
00150 }
00151
00152 BoundaryType isBoundary(const std::string& line)
00153 {
00154 if(line.length() == 0 || line[0] != '-')
00155 return m_lastBoundary = NoBoundary;
00156
00157 int level = 0;
00158 int lineLen = line.length();
00159 BoundaryList::const_iterator bit,eit;
00160 bit = m_boundaryList.begin(), eit = m_boundaryList.end();
00161 for(;bit != eit; ++bit, ++level)
00162 {
00163 const std::string& b = *bit;
00164 int bLen = b.length();
00165 if(line.compare(0, bLen, b) == 0)
00166 {
00167
00168 if(level > 0)
00169 return m_lastBoundary=HigherLevelBoundary;
00170
00171 if(lineLen > bLen && line.compare(bLen,2,"--") == 0)
00172 return m_lastBoundary = ClosingBoundary;
00173 else
00174 return m_lastBoundary = Boundary;
00175 }
00176 }
00177 return m_lastBoundary = NoBoundary;
00178 }
00179
00180 inline bool isnl(char c) const
00181 {
00182 return (c == CR || c == LF);
00183 }
00184
00185 inline bool isnl(char a, char b) const
00186 {
00187 if(a == CR || a == LF)
00188 if(b == (a == CR ? LF : CR))
00189 return true;
00190 return false;
00191 }
00192 void doLoad()
00193 {
00194 loadHeader();
00195 loadBody();
00196 }
00197 bool valid() const
00198 {
00199 return m_bit != m_eit;
00200 }
00201 void append(char*& buf, size_t& bufsz, char c, size_t& pos)
00202 {
00203 enum { alloc_block = 128};
00204 if(pos == bufsz)
00205 {
00206
00207 char* tmp = buf;
00208 int oldBufsz = bufsz;
00209 while(pos >= bufsz)
00210 bufsz = bufsz + alloc_block;
00211 buf = new char[bufsz+1];
00212 if(tmp != 0)
00213 {
00214 assert(oldBufsz > 0);
00215 memset(buf, 0, bufsz);
00216 memcpy(buf, tmp, oldBufsz);
00217 delete[] tmp;
00218 }
00219 }
00220 buf[pos++] = c;
00221 }
00222
00223
00224 void loadHeader()
00225 {
00226 enum {
00227 sInit,
00228 sIgnoreLine,
00229 sNewline,
00230 sWaitingName,
00231 sWaitingValue,
00232 sWaitingFoldedValue,
00233 sName,
00234 sValue,
00235 sIgnoreHeader
00236 };
00237 register int status;
00238 int pos;
00239 char *name, *value;
00240 size_t nBufSz, vBufSz, nPos, vPos;
00241 char prev, c = 0;
00242
00243 name = value = 0;
00244 pos = nBufSz = vBufSz = nPos = vPos = 0;
00245 status = (m_iMask & imHeader ? sIgnoreHeader : sInit);
00246
00247 while(m_bit != m_eit)
00248 {
00249 c = *m_bit;
00250 switch(status)
00251 {
00252 case sInit:
00253 if(isnl(c))
00254 status = sNewline;
00255 else
00256 status = sName;
00257 continue;
00258 case sIgnoreLine:
00259 if(!isnl(c))
00260 break;
00261 status = sNewline;
00262 continue;
00263 case sNewline:
00264 status = sWaitingName;
00265 if(pos > 0)
00266 {
00267 pos = 0;
00268 prev = c;
00269 if(++m_bit == m_eit) goto out;
00270 c = *m_bit;
00271 if(c == (prev == CR ? LF : CR))
00272 {
00273 --pos;
00274 break;
00275 } else
00276 continue;
00277 } else {
00278
00279 prev = c;
00280 if(++m_bit == m_eit) goto out;
00281 c = *m_bit;
00282 if(c == (prev == CR ? LF : CR))
00283 ++m_bit;
00284 goto out;
00285 }
00286 case sWaitingName:
00287 if(isblank(c))
00288 {
00289
00290 status = sWaitingFoldedValue;
00291 continue;
00292 }
00293
00294 if(nPos)
00295 {
00296 name[nPos] = 0;
00297
00298 if(vPos)
00299 {
00300 value[vPos] = 0;
00301 addField(name,value);
00302 } else
00303 addField(name,"");
00304 nPos = vPos = 0;
00305 }
00306 status = (isnl(c) ? sNewline : sName);
00307 continue;
00308 case sWaitingValue:
00309 if(isblank(c))
00310 break;
00311 status = sValue;
00312 continue;
00313 case sWaitingFoldedValue:
00314 if(isblank(c))
00315 break;
00316 append(value, vBufSz, ' ', vPos);
00317 status = sValue;
00318 continue;
00319 case sName:
00320 if(c > 32 && c < 127 && c != ':') {
00321 append(name, nBufSz, c, nPos);
00322 } else if(c == ':') {
00323 status = sWaitingValue;
00324 } else {
00325 nPos = 0;
00326 status = sIgnoreLine;
00327 continue;
00328 }
00329 break;
00330 case sValue:
00331 if(isnl(c))
00332 {
00333 status = sNewline;
00334 continue;
00335 }
00336 append(value, vBufSz, c, vPos);
00337 break;
00338 case sIgnoreHeader:
00339 if(isnl(c))
00340 {
00341 prev = c;
00342 if(++m_bit == m_eit) goto out;
00343 c = *m_bit;
00344 if(c == (prev == CR ? LF : CR))
00345 ++m_bit;
00346 if(pos == 0)
00347 goto out;
00348 pos = 0;
00349 continue;
00350 }
00351 break;
00352 }
00353 ++m_bit; ++pos;
00354 }
00355 out:
00356 if(name)
00357 delete[] name;
00358 if(value)
00359 delete[] value;
00360 return;
00361 }
00362 void loadBody()
00363 {
00364 switch(getType())
00365 {
00366 case etRfc822:
00367 if(m_iMask & imBody)
00368 jump_to_next_boundary();
00369 else
00370 copy_until_boundary(peBody);
00371 break;
00372 case etMultipart:
00373 loadMultipart();
00374 break;
00375 case etMsgRfc822:
00376 if(m_iMask & imChildParts)
00377 jump_to_next_boundary();
00378 else {
00379 pushNewChild();
00380 doLoad();
00381 popChild();
00382 }
00383 break;
00384 }
00385 }
00386 void loadMultipart()
00387 {
00388 std::string boundary = getBoundary();
00389 m_boundaryList.push_front(boundary);
00390 ParsingElem pe;
00391
00392 pe = (m_iMask & imPreamble ? peIgnore : pePreamble );
00393 copy_until_boundary(pe);
00394 while(m_bit != m_eit)
00395 {
00396 switch(m_lastBoundary)
00397 {
00398 case NoBoundary:
00399 return;
00400 case Boundary:
00401 if(m_iMask & imChildParts)
00402 jump_to_next_boundary();
00403 else {
00404 pushNewChild();
00405 doLoad();
00406 popChild();
00407 }
00408 break;
00409 case ClosingBoundary:
00410 m_boundaryList.erase(m_boundaryList.begin());
00411
00412 pe=(m_iMask & imEpilogue? peIgnore: peEpilogue);
00413 copy_until_boundary(pe);
00414 return;
00415 case HigherLevelBoundary:
00416 m_boundaryList.erase(m_boundaryList.begin());
00417 return;
00418 }
00419 }
00420 }
00421 inline void onBlock(const char* block, int sz, ParsingElem pe)
00422 {
00423 switch(pe)
00424 {
00425 case peIgnore:
00426 return;
00427 case pePreamble:
00428 appendPreambleBlock(block, sz);
00429 break;
00430 case peEpilogue:
00431 appendEpilogueBlock(block, sz);
00432 break;
00433 case peBody:
00434 appendBodyBlock(block, sz);
00435 break;
00436 }
00437 }
00438 void jump_to_next_boundary()
00439 {
00440 copy_until_boundary(peIgnore);
00441 }
00442
00443
00444
00445 virtual void copy_until_boundary(ParsingElem pe)
00446 {
00447 size_t pos, lines, eomsz;
00448 register char c;
00449 enum { nlsz = 1 };
00450 char nl[2] = { NL, 0 };
00451 const char *eom = 0;
00452
00453 enum { blksz = 4096 };
00454 char block[blksz];
00455 size_t blkpos = 0;
00456 size_t sl_off = 0;
00457
00458 pos = lines = 0;
00459 while(m_bit != m_eit)
00460 {
00461
00462 if(blkpos >= blksz - 2 - nlsz)
00463 {
00464 if(sl_off == 0)
00465 {
00466
00467
00468
00469 block[blkpos] = 0;
00470 onBlock(block, blkpos, pe);
00471 blkpos = sl_off = 0;
00472 } else {
00473
00474
00475 size_t llen = blkpos - sl_off;
00476 onBlock(block, sl_off, pe);
00477 memmove(block, block + sl_off, llen);
00478 sl_off = 0;
00479 blkpos = llen;
00480 }
00481 }
00482 c = *m_bit;
00483 if(isnl(c))
00484 {
00485 char nlbuf[3] = { 0, 0, 0 };
00486
00487 nlbuf[0] = c;
00488
00489
00490 if(++m_bit != m_eit)
00491 {
00492 char next = *m_bit;
00493 if(next == (c == CR ? LF : CR))
00494 {
00495 nlbuf[1] = next;
00496 ++m_bit;
00497 }
00498 }
00499
00500 if(pos)
00501 {
00502
00503 block[blkpos] = 0;
00504 if(block[sl_off] == '-' &&
00505 sl_off < blkpos &&
00506 block[sl_off+1] == '-')
00507 {
00508 std::string Line(block+sl_off, blkpos-sl_off);
00509 if(isBoundary(Line))
00510 {
00511
00512 int i = sl_off;
00513 char a = block[--i];
00514 char b = block[--i];
00515 if(isnl(a,b))
00516 sl_off -= 2;
00517 else if(isnl(a))
00518 sl_off--;
00519 onBlock(block, sl_off,
00520 pe);
00521 return;
00522 }
00523 }
00524
00525
00526 if(eom && pos >= eomsz)
00527 {
00528 char *line = block + sl_off;
00529 size_t i = 0;
00530 for(; i < eomsz; i++)
00531 if(eom[i] != line[i])
00532 break;
00533 if(i==eomsz)
00534 {
00535 onBlock(block, sl_off,
00536 pe);
00537 return;
00538 }
00539 }
00540 }
00541
00542 for(int i = 0; nlbuf[i] != 0; i++)
00543 block[blkpos++] = nlbuf[i];
00544 block[blkpos] = 0;
00545 sl_off = blkpos;
00546 pos = 0;
00547 } else {
00548 pos++;
00549 block[blkpos++] = c;
00550 ++m_bit;
00551 }
00552 }
00553
00554 block[blkpos] = 0;
00555 onBlock(block, blkpos, pe);
00556 }
00557 };
00558
00559
00560
00561
00562
00563 template<typename Iterator>
00564 struct IteratorParser<Iterator, std::forward_iterator_tag>:
00565 public IteratorParser<Iterator, std::input_iterator_tag>
00566 {
00567
00568
00569
00570
00571
00572 typedef IteratorParser<Iterator, std::input_iterator_tag> base_type;
00573 IteratorParser(MimeEntity& me)
00574 : base_type(me)
00575 {
00576 }
00577 };
00578
00579
00580
00581
00582 template<typename Iterator>
00583 struct IteratorParser<Iterator, std::bidirectional_iterator_tag>:
00584 public IteratorParser<Iterator, std::forward_iterator_tag>
00585 {
00586 typedef IteratorParser<Iterator, std::forward_iterator_tag> base_type;
00587 IteratorParser(MimeEntity& me)
00588 : base_type(me)
00589 {
00590 }
00591 };
00592
00593
00594
00595
00596 template<typename Iterator>
00597 struct IteratorParser<Iterator, std::random_access_iterator_tag>:
00598 public IteratorParser<Iterator, std::bidirectional_iterator_tag>
00599 {
00600 typedef IteratorParser<Iterator, std::bidirectional_iterator_tag> base_type;
00601 IteratorParser(MimeEntity& me)
00602 : base_type(me)
00603 {
00604 }
00605 private:
00606 using base_type::peIgnore;
00607 using base_type::pePreamble;
00608 using base_type::peBody;
00609 using base_type::peEpilogue;
00610
00611 using base_type::NoBoundary;
00612 using base_type::Boundary;
00613 using base_type::ClosingBoundary;
00614 using base_type::HigherLevelBoundary;
00615
00616 using base_type::m_boundaryList;
00617 using base_type::m_lastBoundary;
00618 using base_type::m_entityStack;
00619 using base_type::m_me;
00620 using base_type::m_iMask;
00621 using base_type::m_bit;
00622 using base_type::m_eit;
00623 using base_type::isnl;
00624
00625 typedef TreeNode<char> BoundaryTree;
00626 inline void onBlock(Iterator bit, int size, ParsingElem pe)
00627 {
00628 if(pe == peIgnore)
00629 return;
00630 Iterator eit = bit + size;
00631 MimeEntity* pMe = m_entityStack.top();
00632 switch(pe)
00633 {
00634 case pePreamble:
00635 pMe->body().preamble().append(bit, eit);
00636 break;
00637 case peEpilogue:
00638 pMe->body().epilogue().append(bit, eit);
00639 break;
00640 case peBody:
00641 pMe->body().append(bit, eit);
00642 break;
00643 }
00644 }
00645 void copy_until_boundary(ParsingElem pe)
00646 {
00647
00648 if(m_boundaryList.empty())
00649 {
00650 onBlock(m_bit, m_eit-m_bit, pe);
00651 m_bit = m_eit;
00652 return;
00653 }
00654
00655
00656
00657 typename base_type::BoundaryList::const_iterator
00658 bBit = m_boundaryList.begin(), bEit = m_boundaryList.end();
00659 m_lastBoundary = NoBoundary;
00660 int depth = 0;
00661 for( ;bBit != bEit; ++bBit, ++depth)
00662 {
00663 const std::string& boundary = *bBit;
00664 Iterator off;
00665 if( (off=utils::find_bm(m_bit,m_eit,boundary)) != m_eit)
00666 {
00667 Iterator base = m_bit;
00668 size_t block_sz = off - base;
00669 m_lastBoundary =
00670 (depth ? HigherLevelBoundary: Boundary);
00671 off += boundary.length();
00672 m_bit = off;
00673 if(off<m_eit-1 && *off =='-' && *(off+1) == '-')
00674 {
00675 m_lastBoundary = ClosingBoundary;
00676 m_bit = off + 2;
00677 }
00678 if(m_bit < m_eit && isnl(*m_bit))
00679 {
00680 char c = *m_bit++;
00681 char next = *m_bit;
00682 if(isnl(next) && next != c)
00683 ++m_bit;
00684 }
00685
00686
00687 if(block_sz)
00688 {
00689 Iterator p = base + block_sz;
00690 char a = *--p, b = *--p;
00691 if(isnl(a,b))
00692 block_sz -= 2;
00693 else if(isnl(a))
00694 block_sz--;
00695 }
00696 onBlock(base, block_sz, pe);
00697 return;
00698 } else {
00699 onBlock(m_bit, m_eit-m_bit, pe);
00700 m_bit = m_eit;
00701 }
00702 }
00703 }
00704 BoundaryTree m_boundaryTree;
00705 void buildBoundaryTree()
00706 {
00707 m_boundaryTree = BoundaryTree();
00708 typename base_type::BoundaryList::const_iterator
00709 bit = m_boundaryList.begin(), eit = m_boundaryList.end();
00710 BoundaryTree::NodeList *pChilds;
00711 BoundaryTree::NodeList::iterator it;
00712 int depth = 0;
00713 for( ; bit != eit; ++bit)
00714 {
00715 pChilds = &m_boundaryTree.childList();
00716 it = pChilds->begin();
00717 const char *w = bit->c_str();
00718 do
00719 {
00720 it = find_if(pChilds->begin(), pChilds->end(),
00721 FindNodePred<char>(*w));
00722 if( it == pChilds->end() )
00723 it = pChilds->insert(pChilds->end(),*w);
00724 pChilds = &it->childList();
00725 depth++;
00726 } while(*(++w));
00727 }
00728 }
00729
00730 };
00731
00732 }
00733
00734 #endif