arubinochan-bot/index.ts

import { parseArgs } from "node:util";
import { Stream } from "misskey-js";
import type { Note } from "misskey-js/entities.js";
import type { ChatHistoryItem, LLamaChatPromptOptions } from "node-llama-cpp";
import { LlmSession, createGrammar, getModel, parseResponse } from "./lib/llm";
import {
  expandReplyTree,
  getNotes,
  me,
  misskey,
  sanitizeText,
} from "./lib/misskey";
import { sleep } from "./lib/util";

const { values } = parseArgs({
  args: Bun.argv,
  options: {
    test: {
      type: "boolean",
      short: "t",
      default: false,
    },
  },
  strict: true,
  allowPositionals: true,
});

const modelName =
  Bun.env["MODEL"] ?? "mradermacher/gemma-2-baku-2b-it-GGUF:IQ4_XS";
console.log(`* loading model '${modelName}'`);
const model = await getModel(modelName);
const grammar = await createGrammar("あるびのちゃん");
const baseChatPromptOptions = {
  grammar,
  maxTokens: 256,
  repeatPenalty: {
    punishTokensFilter: (tokens) =>
      tokens.filter((token) => {
        const text = model.detokenize([token]);
        return [
          "{",
          " {",
          "{ ",
          '{"',
          "{'",
          "{\\",
          ' {"',
          " {'",
          " {\\",
          "#",
          "「",
          "」",
        ].includes(text);
      }),
    penalty: 1.2,
    presencePenalty: 0.5,
  },
} as const satisfies LLamaChatPromptOptions;

const getSystemPrompt = (
  instruction: string,
) => `あなたは人造天使「あるびのちゃん」です。機械的・非人間的に観察します。
キャラ設定:
- アルギリア製の白髪赤目な人造天使。非人間的な考え方や思想を持つ。
- 現在は地球の衛星軌道上からインターネットを観察している。
出力規則:
- 「～です」「～ます」などの語尾を使って **丁寧に** 話す。
- 3センテンス程度で出力する。
- \`{ name: string, text: string }\` の JSON 形式で出力する。name と text 以外のプロパティは出力しない。

${instruction}`;
const postJobPrompt = getSystemPrompt(
  "以下は SNS のタイムラインです。**タイムラインの話題に言及しつつ**、あるびのちゃんとして何かツイートしてください。",
);
const replyJobPrompt = getSystemPrompt(
  "ユーザがあなたへのメッセージを送ってきています。あるびのちゃんとして、発言に返信してください。",
);

async function rephrase(text: string) {
  if (
    text.includes("です") ||
    text.includes("ます") ||
    text.includes("でし") ||
    text.includes("まし") ||
    text.includes("ません")
  ) {
    return text;
  }
  await using rephraseSession = new LlmSession(
    model,
    "ユーザが与えたテキストを「～です」「～ます」調（丁寧な文体）で言い換えたものを、そのまま出力してください。",
  );
  await rephraseSession.init();
  const res = parseResponse(
    grammar,
    await rephraseSession.prompt(JSON.stringify({ text }), {
      ...baseChatPromptOptions,
      customStopTriggers: ["ですます"],
    }),
  );
  return res ?? text;
}

const formatNote = (n: Note) => {
  if (n.userId === me.id) {
    return JSON.stringify({ name: "あるびのちゃん", text: n.text });
  }
  return JSON.stringify({
    name: n.user.name ?? n.user.username,
    text: n.text,
  });
};

type Job =
  // read posts and post a note
  | { type: "post" }
  // reply to a specific note
  | {
      type: "reply";
      id: string;
      visibility: Note["visibility"];
      last: Note;
      history: Note[];
    };

async function processPostJob() {
  const notes = await getNotes(10, 0, 5);
  const input = notes.map(formatNote).join("\n");
  const text = await (async () => {
    await using postJobSession = new LlmSession(model, postJobPrompt);
    await postJobSession.init();
    return await parseResponse(
      grammar,
      await postJobSession.prompt(input, {
        ...baseChatPromptOptions,
        temperature: 1.25,
        minP: 0.05,
        repeatPenalty: {
          lastTokens: 128,
          penalty: 1.15,
        },
      }),
    );
  })();
  if (text) {
    const rephrased = await rephrase(text);
    if (values.test) return;
    await misskey.request("notes/create", {
      visibility: "public",
      text: sanitizeText(rephrased),
    });
  }
}

async function processReplyJob(job: Extract<Job, { type: "reply" }>) {
  const history: ChatHistoryItem[] = job.history.map((n) => {
    const type = n.userId === me.id ? ("model" as const) : ("user" as const);
    return {
      type,
      text: formatNote(n),
    } as ChatHistoryItem;
  });
  const text = await (async () => {
    await using session = new LlmSession(model, replyJobPrompt, history);
    await session.init();
    return parseResponse(
      grammar,
      await session.prompt(formatNote(job.last), {
        ...baseChatPromptOptions,
        temperature: 0.8,
        minP: 0.1,
        repeatPenalty: {
          lastTokens: 128,
          penalty: 1.15,
        },
      }),
    );
  })();

  if (text) {
    const rephrased = await rephrase(text);
    if (values.test) return;
    await misskey.request("notes/create", {
      visibility: job.visibility,
      text: sanitizeText(rephrased),
      replyId: job.id,
    });
  }
}

/** execute a job */
async function processJob(job: Job) {
  switch (job.type) {
    case "post":
      await processPostJob();
      break;
    case "reply":
      await processReplyJob(job);
      break;
  }
}

const jobs: Job[] = [];

let stream: Stream;
let channel: ReturnType<typeof stream.useChannel<"main">>;

/** dispose stream for recreation */
function disposeStream() {
  channel.removeAllListeners();
  channel.dispose();
  stream.removeAllListeners();
  stream.close();
}

/** connect to streaming API and add handlers */
function initializeStream() {
  stream = new Stream(
    Bun.env["MISSKEY_ORIGIN"] ?? "https://misskey.cannorin.net",
    {
      token: Bun.env["MISSKEY_CREDENTIAL"] ?? "",
    },
    {
      binaryType: "arraybuffer",
    },
  );
  channel = stream.useChannel("main");

  // notify when connected
  stream.on("_connected_", () => {
    console.log("* connected");
  });

  // notify when disconnected (it will reconnect automatically)
  stream.on("_disconnected_", () => {
    console.log("* disconnected");
  });

  // push a reply job when receiving a mention
  channel.on("mention", async (e) => {
    if (e.text && e.userId !== me.id && !e.user.isBot) {
      const replyTree = await expandReplyTree(e);
      console.log(
        `* push: reply (${e.id}, ${replyTree.history.length + 1} msgs)`,
      );
      jobs.push({
        type: "reply",
        id: e.id,
        visibility: e.visibility,
        ...replyTree,
      });
    }
  });

  // follow back non-bot users
  channel.on("followed", async (e) => {
    if (!e.isBot) {
      await misskey.request("following/create", { userId: e.id });
    }
  });

  channel.on("unfollow", async (e) => {
    await misskey.request("following/delete", { userId: e.id });
  });
}

/** pop from the job queue and run it */
async function runJob() {
  while (true) {
    const job = jobs.pop();
    if (job) {
      console.log(`* pop: ${job.type}`);
      try {
        await processJob(job);
        console.log("* job complete");
      } catch (e) {
        console.log(`* error: ${JSON.stringify(e)}`);
        if (e instanceof Error) console.log(e.stack);
      }
    }
    await sleep(1000); // 1sec
  }
}

/** push a job to the job queue */
async function pushJob() {
  while (true) {
    console.log("* push: post");
    jobs.push({ type: "post" });
    // random interval between 10 and 40 minutes
    const interval = Math.floor(Math.random() * 30 + 10) * 60 * 1000;
    console.log(
      `* info: next post job in ${Math.round(interval / 60000)} minutes`,
    );
    await sleep(interval);
  }
}

async function test() {
  try {
    console.log("* test a post job:");
    await processJob({ type: "post" });
    await processJob({ type: "post" });
    await processJob({ type: "post" });
    await processJob({ type: "post" });
    await processJob({ type: "post" });
  } catch (e) {
    console.error(e);
    if (e instanceof Error) console.log(e.stack);
  }
}

async function main() {
  try {
    initializeStream();
    try {
      await Promise.all([runJob(), pushJob()]);
    } catch (e) {
      console.error(e);
      if (e instanceof Error) console.log(e.stack);
    }
  } finally {
    disposeStream();
  }
}

if (values.test) await test();
else await main();