summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpiaip <piaip@63ad8ddf-47c3-0310-b6dd-a9e9d9715204>2014-08-08 02:01:12 +0800
committerpiaip <piaip@63ad8ddf-47c3-0310-b6dd-a9e9d9715204>2014-08-08 02:01:12 +0800
commitd230b2bf331fc06ef569ae19abd9848f65f25852 (patch)
treeddc079d457977794dcbfb9b45883eb38ff591dff
parent589c0a1b1657e0df72c349bab5d64852f963d66e (diff)
downloadpttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar
pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.gz
pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.bz2
pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.lz
pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.xz
pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.tar.zst
pttbbs-d230b2bf331fc06ef569ae19abd9848f65f25852.zip
Use Python parser.
git-svn-id: http://opensvn.csie.org/pttbbs/trunk@6043 63ad8ddf-47c3-0310-b6dd-a9e9d9715204
-rwxr-xr-xpttbbs/daemon/postd/postd.py42
-rw-r--r--pttbbs/daemon/postd/rebuild.c182
-rw-r--r--pttbbs/include/daemons.h2
3 files changed, 26 insertions, 200 deletions
diff --git a/pttbbs/daemon/postd/postd.py b/pttbbs/daemon/postd/postd.py
index 911ceaa6..25f9b9d9 100755
--- a/pttbbs/daemon/postd/postd.py
+++ b/pttbbs/daemon/postd/postd.py
@@ -14,6 +14,7 @@ import gevent.server
import leveldb
from pyutil import pttstruct
+from pyutil import pttpost
# Ref: ../../include/daemons.h
RequestFormatString = 'HH'
@@ -25,7 +26,7 @@ AddRecordFormatString = 'III%ds' % (pttstruct.IDLEN + 1)
AddRecord = collections.namedtuple('AddRecord', 'userref ctime ipv4 userid')
CONTENT_LEN_FORMAT = 'I'
REQ_ADD = 1
-REQ_ADD2 = 2
+REQ_IMPORT = 2
REQ_GET_CONTENT = 3
_SERVER_ADDR = '127.0.0.1'
_SERVER_PORT = 5135
@@ -75,27 +76,22 @@ def LoadPost(query):
logging.debug(" => %r", UnpackPost(data))
return UnpackPost(data)
-
-def SavePost(keypak, data, extra=None):
- return SavePost2(None, keypak, data, extra)
-
-def SavePost2(content, keypak, data, extra=None):
- if extra:
- data.update(extra._asdict())
+def SavePost(legacy, keypak, data, extra=None):
logging.debug("SavePost: %r => %r", keypak, data)
key = '%s/%s' % (keypak.board, keypak.file)
g_db.set(key, serialize(data))
logging.debug(' Saved: %s', key)
- if content is None:
- content_file = os.path.join(BBSHOME, 'boards', keypak.board[0],
- keypak.board, keypak.file)
- content_length = os.path.getsize(content_file)
- else:
- logging.debug('got transfered content')
+ content_file = os.path.join(BBSHOME, 'boards', keypak.board[0],
+ keypak.board, keypak.file)
+ if legacy:
+ (content, comments) = pttpost.ParsePost(content_file)
content_length = len(content)
+ # TODO update comments
+ else:
+ content_length = os.path.getsize(content_file)
+ content = open(content_file).read()
start = time.time()
- g_db.set(key + ':content',
- open(content_file).read() if content is None else content)
+ g_db.set(key + ':content', content)
exec_time = time.time() - start
logging.debug(' Content (%d) save time: %.3fs.', content_length, exec_time)
if exec_time > 0.1:
@@ -139,18 +135,14 @@ def handle_request(socket, _):
header_blob = fd.read(pttstruct.FILEHEADER_SIZE)
addblob = fd.read(struct.calcsize(AddRecordFormatString))
keyblob = fd.read(struct.calcsize(PostKeyFormatString))
- SavePost(UnpackPostKey(keyblob), DecodeFileHeader(header_blob),
- UnpackAddRecord(addblob))
- elif req.operation == REQ_ADD2:
+ SavePost(False, UnpackPostKey(keyblob),
+ DecodeFileHeader(header_blob), UnpackAddRecord(addblob))
+ elif req.operation == REQ_IMPORT:
header_blob = fd.read(pttstruct.FILEHEADER_SIZE)
addblob = fd.read(struct.calcsize(AddRecordFormatString))
keyblob = fd.read(struct.calcsize(PostKeyFormatString))
- content_len = struct.unpack(
- CONTENT_LEN_FORMAT,
- fd.read(struct.calcsize(CONTENT_LEN_FORMAT)))[0]
- content = fd.read(content_len)
- SavePost2(content, UnpackPostKey(keyblob),
- DecodeFileHeader(header_blob), UnpackAddRecord(addblob))
+ SavePost(True, UnpackPostKey(keyblob),
+ DecodeFileHeader(header_blob), UnpackAddRecord(addblob))
elif req.operation == REQ_GET_CONTENT:
keyblob = fd.read(struct.calcsize(PostKeyFormatString))
content = GetPostContent(UnpackPostKey(keyblob))
diff --git a/pttbbs/daemon/postd/rebuild.c b/pttbbs/daemon/postd/rebuild.c
index cf6c73fd..5ae11979 100644
--- a/pttbbs/daemon/postd/rebuild.c
+++ b/pttbbs/daemon/postd/rebuild.c
@@ -2,172 +2,15 @@
#include "bbs.h"
#include "daemons.h"
-// Comments format: CommentsPrefix ANSI_COLOR(33) [AUTHOR]
-// ANSI_RESET ANSI_COLOR(33) ":" [CONTENT]
-// ANSI_RESET [trailings]
-const char *CommentsPrefix[] = {
- ANSI_COLOR(1;37) "推 ",
- ANSI_COLOR(1;31) "噓 ",
- ANSI_COLOR(1;31) "→ ", // Shared by P1 and P2 <OLDRECOMMEND>.
- NULL,
-};
-
-int CommentsExtract(const char *input,
- char *output_owner,
- char *output_content,
- char *output_trailings) {
- int i, kind = -1;
- const char *prefix = NULL, *p = NULL, *p2, *p3;
- const char *pat_Prefix2 = ANSI_COLOR(33),
- *pat_PostAuthor = ANSI_RESET ANSI_COLOR(33) ":",
- *pat_PostContent = ANSI_RESET;
- for (i = 0; !p && CommentsPrefix[i]; i++) {
- prefix = CommentsPrefix[i];
- if (!str_starts_with(input, prefix))
- continue;
- p = input + strlen(prefix);
- kind = i;
- }
- if (!p) {
- // printf("error - !p\n");
- return -1;
- }
- if (!str_starts_with(p, pat_Prefix2)) {
- printf("error - !starts_with(prefix2)\n");
- return -1;
- }
- p += strlen(pat_Prefix2);
- p2 = strstr(p, pat_PostAuthor);
- if (!p || p2 <= p) {
- printf("error - !p || p2 <= p\n");
- return -1;
- }
- // author = p..p2
- if (output_owner) {
- *output_owner = 0;
- strncat(output_owner, p, p2 - p);
- }
- p = p2 + strlen(pat_PostAuthor);
- p2 = strstr(p, pat_PostContent);
- if (!p2) {
- printf("error - !p2\n");
- return -1;
- }
- // content = p..[spaces]p2
- p3 = p2 - 1;
- while (p3 > p && *p3 == ' ')
- p3--;
- if (output_content) {
- *output_content = 0;
- strncat(output_content, p, p3 - p + 1);
- }
- p = p2 + strlen(pat_PostContent);
- if (output_trailings) {
- strcpy(output_trailings, p);
- }
- return kind;
-}
-
-int IsCrossPostLog(const char *buf) {
- // format: "※ " ANSI_COLOR(1;32) "%s" ANSI_COLOR(0;32) ":轉錄至" %s
- if (!str_starts_with(buf, "※ " ANSI_COLOR(1;32)))
- return 0;
- if (!strstr(buf, ANSI_COLOR(0;32) ":轉錄至"))
- return 0;
- printf("Found XPOST!!!\n");
- return 1;
-}
-
-char *ProcessPost(const char *filename) {
- FILE *fp = fopen(filename, "rt");
- char *content;
- long offBegin, offEnd, off;
- char buf[ANSILINELEN];
- char bufOwner[ANSILINELEN],
- bufContent[ANSILINELEN],
- bufTrailing[ANSILINELEN];
- int kind;
- assert(fp);
-
- // first line, expecting STR_AUTHOR1 or STR_AUTHOR2.
- if (fgets(buf, sizeof(buf), fp)) {
- int skip_lines = 0;
- if (strncmp(STR_AUTHOR1, buf, strlen(STR_AUTHOR1)) == 0) {
- // local file: 3 line format. (author, subject, time)
- skip_lines = 3;
- } else if (strncmp(STR_AUTHOR2, buf, strlen(STR_AUTHOR2)) == 0) {
- // remote file: 4 line format. (author, subject, time, source)
- skip_lines = 4;
- } else {
- // unknown, sorry.
- rewind(fp);
- }
- for (; skip_lines > 0; skip_lines--) {
- fgets(buf, sizeof(buf), fp);
- if (buf[0] == '\r' || buf[0] == '\n')
- break;
- }
- }
-
- // Here we want to determine end of edited contents.
- // If a user has edited his file, there will be "edit" signatures.
- // Otherwise it ends with site sig.
- // So, an easy way is to parse all "valid" comments until we've reach end of
- // file and find the starting of continuous valid comments. The only
- // exceptions are system logs - ex crosspost.
- // format: "※ " ANSI_COLOR(1;32) "%s" ANSI_COLOR(0;32) ":轉錄至" %s
-
- offBegin = ftell(fp);
- offEnd = offBegin;
- off = offEnd;
-
- while (fgets(buf, sizeof(buf), fp)) {
- off += strlen(buf);
- if (IsCrossPostLog(buf) ||
- (buf[0] == ESC_CHR &&
- CommentsExtract(buf, NULL, NULL, NULL) >= 0)) {
- // do something
- } else {
- offEnd = off;
- }
- }
-
- // Content: offBegin to offEnd.
- fseek(fp, offBegin, SEEK_SET);
- content = malloc(offEnd - offBegin + 1);
- assert(content);
- fread(content, 1, offEnd - offBegin, fp);
- content[offEnd - offBegin] = 0;
-
- // Try to parse comments
- fseek(fp, offEnd, SEEK_SET);
- while (fgets(buf, sizeof(buf), fp)) {
- if (buf[0] != ESC_CHR) // Includes IsCrossPostLog.
- continue;
- // See comments.c:FormatCommentString:
- kind = CommentsExtract(buf, bufOwner, bufContent, bufTrailing);
- assert(kind >= 0);
- chomp(bufTrailing);
- // TODO we should probably upload these comments.
- printf("K[%d], A[%s], C[%s], T[%s]\n",
- kind, bufOwner, bufContent, bufTrailing);
- }
-
- fclose(fp);
- return content;
-}
-
-int PostAddRecord(const char *board, const fileheader_t *fhdr, const char *fpath)
+int PostAddRecord(const char *board, const fileheader_t *fhdr)
{
int s;
PostAddRequest req = {0};
char *userid;
- char *contents;
- uint32_t content_length;
char xuid[IDLEN + 1];
req.cb = sizeof(req);
- req.operation = POSTD_REQ_ADD2;
+ req.operation = POSTD_REQ_IMPORT;
strlcpy(req.key.board, board, sizeof(req.key.board));
strlcpy(req.key.file, fhdr->filename, sizeof(req.key.file));
memcpy(&req.header, fhdr, sizeof(req.header));
@@ -208,9 +51,6 @@ int PostAddRecord(const char *board, const fileheader_t *fhdr, const char *fpath
strlcpy(req.extra.userid, userid, sizeof(req.extra.userid));
printf(" (userref: %s.%d)", req.extra.userid, req.extra.userref);
- // TODO try harder to parse content, to remove header and re-construct
- // comments.
-
s = toconnectex(POSTD_ADDR, 10);
if (s < 0)
return 1;
@@ -218,16 +58,6 @@ int PostAddRecord(const char *board, const fileheader_t *fhdr, const char *fpath
close(s);
return 1;
}
-
- contents = ProcessPost(fpath);
- content_length = strlen(contents);
- if (towrite(s, &content_length, sizeof(content_length)) < 0 ||
- towrite(s, contents, content_length) < 0) {
- free(contents);
- close(s);
- return 1;
- }
- free(contents);
close(s);
return 0;
}
@@ -265,7 +95,7 @@ void rebuild_board(int bid GCC_UNUSED, boardheader_t *bp)
// TODO Add .DIR sequence number.
printf(" - Adding %s", fhdr.filename);
- if (PostAddRecord(bp->brdname, &fhdr, fpath) != 0)
+ if (PostAddRecord(bp->brdname, &fhdr) != 0)
printf(" (error)");
printf("\n");
}
@@ -276,8 +106,12 @@ void rebuild_board(int bid GCC_UNUSED, boardheader_t *bp)
int main(int argc, char **argv)
{
int bid = 0;
- chdir(BBSHOME);
+ if (argc < 2) {
+ printf("usage: %s boardname ...\n", argv[0]);
+ return 1;
+ }
+ chdir(BBSHOME);
attach_SHM();
for (bid = 1; bid <= MAX_BOARD; bid++) {
diff --git a/pttbbs/include/daemons.h b/pttbbs/include/daemons.h
index b7f18b1a..32bb1c68 100644
--- a/pttbbs/include/daemons.h
+++ b/pttbbs/include/daemons.h
@@ -186,7 +186,7 @@ typedef struct {
enum {
POSTD_REQ_ADD = 1,
- POSTD_REQ_ADD2,
+ POSTD_REQ_IMPORT,
POSTD_REQ_GET_CONTENT,
};