diff options
author | piaip <piaip@63ad8ddf-47c3-0310-b6dd-a9e9d9715204> | 2014-08-08 02:01:12 +0800 |
---|---|---|
committer | piaip <piaip@63ad8ddf-47c3-0310-b6dd-a9e9d9715204> | 2014-08-08 02:01:12 +0800 |
commit | d230b2bf331fc06ef569ae19abd9848f65f25852 (patch) | |
tree | ddc079d457977794dcbfb9b45883eb38ff591dff | |
parent | 589c0a1b1657e0df72c349bab5d64852f963d66e (diff) | |
download | pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.gz pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.bz2 pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.lz pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.xz pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.zst pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.zip |
Use Python parser.
git-svn-id: http://opensvn.csie.org/pttbbs/trunk@6043 63ad8ddf-47c3-0310-b6dd-a9e9d9715204
-rwxr-xr-x | pttbbs/daemon/postd/postd.py | 42 | ||||
-rw-r--r-- | pttbbs/daemon/postd/rebuild.c | 182 | ||||
-rw-r--r-- | pttbbs/include/daemons.h | 2 |
3 files changed, 26 insertions, 200 deletions
diff --git a/pttbbs/daemon/postd/postd.py b/pttbbs/daemon/postd/postd.py index 911ceaa6..25f9b9d9 100755 --- a/pttbbs/daemon/postd/postd.py +++ b/pttbbs/daemon/postd/postd.py @@ -14,6 +14,7 @@ import gevent.server import leveldb from pyutil import pttstruct +from pyutil import pttpost # Ref: ../../include/daemons.h RequestFormatString = 'HH' @@ -25,7 +26,7 @@ AddRecordFormatString = 'III%ds' % (pttstruct.IDLEN + 1) AddRecord = collections.namedtuple('AddRecord', 'userref ctime ipv4 userid') CONTENT_LEN_FORMAT = 'I' REQ_ADD = 1 -REQ_ADD2 = 2 +REQ_IMPORT = 2 REQ_GET_CONTENT = 3 _SERVER_ADDR = '127.0.0.1' _SERVER_PORT = 5135 @@ -75,27 +76,22 @@ def LoadPost(query): logging.debug(" => %r", UnpackPost(data)) return UnpackPost(data) - -def SavePost(keypak, data, extra=None): - return SavePost2(None, keypak, data, extra) - -def SavePost2(content, keypak, data, extra=None): - if extra: - data.update(extra._asdict()) +def SavePost(legacy, keypak, data, extra=None): logging.debug("SavePost: %r => %r", keypak, data) key = '%s/%s' % (keypak.board, keypak.file) g_db.set(key, serialize(data)) logging.debug(' Saved: %s', key) - if content is None: - content_file = os.path.join(BBSHOME, 'boards', keypak.board[0], - keypak.board, keypak.file) - content_length = os.path.getsize(content_file) - else: - logging.debug('got transfered content') + content_file = os.path.join(BBSHOME, 'boards', keypak.board[0], + keypak.board, keypak.file) + if legacy: + (content, comments) = pttpost.ParsePost(content_file) content_length = len(content) + # TODO update comments + else: + content_length = os.path.getsize(content_file) + content = open(content_file).read() start = time.time() - g_db.set(key + ':content', - open(content_file).read() if content is None else content) + g_db.set(key + ':content', content) exec_time = time.time() - start logging.debug(' Content (%d) save time: %.3fs.', content_length, exec_time) if exec_time > 0.1: @@ -139,18 +135,14 @@ def handle_request(socket, _): header_blob = fd.read(pttstruct.FILEHEADER_SIZE) addblob = fd.read(struct.calcsize(AddRecordFormatString)) keyblob = fd.read(struct.calcsize(PostKeyFormatString)) - SavePost(UnpackPostKey(keyblob), DecodeFileHeader(header_blob), - UnpackAddRecord(addblob)) - elif req.operation == REQ_ADD2: + SavePost(False, UnpackPostKey(keyblob), + DecodeFileHeader(header_blob), UnpackAddRecord(addblob)) + elif req.operation == REQ_IMPORT: header_blob = fd.read(pttstruct.FILEHEADER_SIZE) addblob = fd.read(struct.calcsize(AddRecordFormatString)) keyblob = fd.read(struct.calcsize(PostKeyFormatString)) - content_len = struct.unpack( - CONTENT_LEN_FORMAT, - fd.read(struct.calcsize(CONTENT_LEN_FORMAT)))[0] - content = fd.read(content_len) - SavePost2(content, UnpackPostKey(keyblob), - DecodeFileHeader(header_blob), UnpackAddRecord(addblob)) + SavePost(True, UnpackPostKey(keyblob), + DecodeFileHeader(header_blob), UnpackAddRecord(addblob)) elif req.operation == REQ_GET_CONTENT: keyblob = fd.read(struct.calcsize(PostKeyFormatString)) content = GetPostContent(UnpackPostKey(keyblob)) diff --git a/pttbbs/daemon/postd/rebuild.c b/pttbbs/daemon/postd/rebuild.c index cf6c73fd..5ae11979 100644 --- a/pttbbs/daemon/postd/rebuild.c +++ b/pttbbs/daemon/postd/rebuild.c @@ -2,172 +2,15 @@ #include "bbs.h" #include "daemons.h" -// Comments format: CommentsPrefix ANSI_COLOR(33) [AUTHOR] -// ANSI_RESET ANSI_COLOR(33) ":" [CONTENT] -// ANSI_RESET [trailings] -const char *CommentsPrefix[] = { - ANSI_COLOR(1;37) "推 ", - ANSI_COLOR(1;31) "噓 ", - ANSI_COLOR(1;31) "→ ", // Shared by P1 and P2 <OLDRECOMMEND>. - NULL, -}; - -int CommentsExtract(const char *input, - char *output_owner, - char *output_content, - char *output_trailings) { - int i, kind = -1; - const char *prefix = NULL, *p = NULL, *p2, *p3; - const char *pat_Prefix2 = ANSI_COLOR(33), - *pat_PostAuthor = ANSI_RESET ANSI_COLOR(33) ":", - *pat_PostContent = ANSI_RESET; - for (i = 0; !p && CommentsPrefix[i]; i++) { - prefix = CommentsPrefix[i]; - if (!str_starts_with(input, prefix)) - continue; - p = input + strlen(prefix); - kind = i; - } - if (!p) { - // printf("error - !p\n"); - return -1; - } - if (!str_starts_with(p, pat_Prefix2)) { - printf("error - !starts_with(prefix2)\n"); - return -1; - } - p += strlen(pat_Prefix2); - p2 = strstr(p, pat_PostAuthor); - if (!p || p2 <= p) { - printf("error - !p || p2 <= p\n"); - return -1; - } - // author = p..p2 - if (output_owner) { - *output_owner = 0; - strncat(output_owner, p, p2 - p); - } - p = p2 + strlen(pat_PostAuthor); - p2 = strstr(p, pat_PostContent); - if (!p2) { - printf("error - !p2\n"); - return -1; - } - // content = p..[spaces]p2 - p3 = p2 - 1; - while (p3 > p && *p3 == ' ') - p3--; - if (output_content) { - *output_content = 0; - strncat(output_content, p, p3 - p + 1); - } - p = p2 + strlen(pat_PostContent); - if (output_trailings) { - strcpy(output_trailings, p); - } - return kind; -} - -int IsCrossPostLog(const char *buf) { - // format: "※ " ANSI_COLOR(1;32) "%s" ANSI_COLOR(0;32) ":轉錄至" %s - if (!str_starts_with(buf, "※ " ANSI_COLOR(1;32))) - return 0; - if (!strstr(buf, ANSI_COLOR(0;32) ":轉錄至")) - return 0; - printf("Found XPOST!!!\n"); - return 1; -} - -char *ProcessPost(const char *filename) { - FILE *fp = fopen(filename, "rt"); - char *content; - long offBegin, offEnd, off; - char buf[ANSILINELEN]; - char bufOwner[ANSILINELEN], - bufContent[ANSILINELEN], - bufTrailing[ANSILINELEN]; - int kind; - assert(fp); - - // first line, expecting STR_AUTHOR1 or STR_AUTHOR2. - if (fgets(buf, sizeof(buf), fp)) { - int skip_lines = 0; - if (strncmp(STR_AUTHOR1, buf, strlen(STR_AUTHOR1)) == 0) { - // local file: 3 line format. (author, subject, time) - skip_lines = 3; - } else if (strncmp(STR_AUTHOR2, buf, strlen(STR_AUTHOR2)) == 0) { - // remote file: 4 line format. (author, subject, time, source) - skip_lines = 4; - } else { - // unknown, sorry. - rewind(fp); - } - for (; skip_lines > 0; skip_lines--) { - fgets(buf, sizeof(buf), fp); - if (buf[0] == '\r' || buf[0] == '\n') - break; - } - } - - // Here we want to determine end of edited contents. - // If a user has edited his file, there will be "edit" signatures. - // Otherwise it ends with site sig. - // So, an easy way is to parse all "valid" comments until we've reach end of - // file and find the starting of continuous valid comments. The only - // exceptions are system logs - ex crosspost. - // format: "※ " ANSI_COLOR(1;32) "%s" ANSI_COLOR(0;32) ":轉錄至" %s - - offBegin = ftell(fp); - offEnd = offBegin; - off = offEnd; - - while (fgets(buf, sizeof(buf), fp)) { - off += strlen(buf); - if (IsCrossPostLog(buf) || - (buf[0] == ESC_CHR && - CommentsExtract(buf, NULL, NULL, NULL) >= 0)) { - // do something - } else { - offEnd = off; - } - } - - // Content: offBegin to offEnd. - fseek(fp, offBegin, SEEK_SET); - content = malloc(offEnd - offBegin + 1); - assert(content); - fread(content, 1, offEnd - offBegin, fp); - content[offEnd - offBegin] = 0; - - // Try to parse comments - fseek(fp, offEnd, SEEK_SET); - while (fgets(buf, sizeof(buf), fp)) { - if (buf[0] != ESC_CHR) // Includes IsCrossPostLog. - continue; - // See comments.c:FormatCommentString: - kind = CommentsExtract(buf, bufOwner, bufContent, bufTrailing); - assert(kind >= 0); - chomp(bufTrailing); - // TODO we should probably upload these comments. - printf("K[%d], A[%s], C[%s], T[%s]\n", - kind, bufOwner, bufContent, bufTrailing); - } - - fclose(fp); - return content; -} - -int PostAddRecord(const char *board, const fileheader_t *fhdr, const char *fpath) +int PostAddRecord(const char *board, const fileheader_t *fhdr) { int s; PostAddRequest req = {0}; char *userid; - char *contents; - uint32_t content_length; char xuid[IDLEN + 1]; req.cb = sizeof(req); - req.operation = POSTD_REQ_ADD2; + req.operation = POSTD_REQ_IMPORT; strlcpy(req.key.board, board, sizeof(req.key.board)); strlcpy(req.key.file, fhdr->filename, sizeof(req.key.file)); memcpy(&req.header, fhdr, sizeof(req.header)); @@ -208,9 +51,6 @@ int PostAddRecord(const char *board, const fileheader_t *fhdr, const char *fpath strlcpy(req.extra.userid, userid, sizeof(req.extra.userid)); printf(" (userref: %s.%d)", req.extra.userid, req.extra.userref); - // TODO try harder to parse content, to remove header and re-construct - // comments. - s = toconnectex(POSTD_ADDR, 10); if (s < 0) return 1; @@ -218,16 +58,6 @@ int PostAddRecord(const char *board, const fileheader_t *fhdr, const char *fpath close(s); return 1; } - - contents = ProcessPost(fpath); - content_length = strlen(contents); - if (towrite(s, &content_length, sizeof(content_length)) < 0 || - towrite(s, contents, content_length) < 0) { - free(contents); - close(s); - return 1; - } - free(contents); close(s); return 0; } @@ -265,7 +95,7 @@ void rebuild_board(int bid GCC_UNUSED, boardheader_t *bp) // TODO Add .DIR sequence number. printf(" - Adding %s", fhdr.filename); - if (PostAddRecord(bp->brdname, &fhdr, fpath) != 0) + if (PostAddRecord(bp->brdname, &fhdr) != 0) printf(" (error)"); printf("\n"); } @@ -276,8 +106,12 @@ void rebuild_board(int bid GCC_UNUSED, boardheader_t *bp) int main(int argc, char **argv) { int bid = 0; - chdir(BBSHOME); + if (argc < 2) { + printf("usage: %s boardname ...\n", argv[0]); + return 1; + } + chdir(BBSHOME); attach_SHM(); for (bid = 1; bid <= MAX_BOARD; bid++) { diff --git a/pttbbs/include/daemons.h b/pttbbs/include/daemons.h index b7f18b1a..32bb1c68 100644 --- a/pttbbs/include/daemons.h +++ b/pttbbs/include/daemons.h @@ -186,7 +186,7 @@ typedef struct { enum { POSTD_REQ_ADD = 1, - POSTD_REQ_ADD2, + POSTD_REQ_IMPORT, POSTD_REQ_GET_CONTENT, }; |