fix: hashtag extraction and make remote links absolute

This commit is contained in:
max_richter 2023-08-02 15:05:35 +02:00
parent 2d56710223
commit cebbb8af2b
9 changed files with 253 additions and 99 deletions

View File

@ -10,3 +10,4 @@ export { default as IconReportSearch } from "https://deno.land/x/tabler_icons_ts
export { default as IconRefresh } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/refresh.tsx";
export { default as IconCirclePlus } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/circle-plus.tsx";
export { default as IconCircleMinus } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/circle-minus.tsx";
export { default as IconLoader2 } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/loader-2.tsx";

View File

@ -16,16 +16,17 @@ import * as $10 from "./routes/api/movies/enhance/[name].ts";
import * as $11 from "./routes/api/movies/index.ts";
import * as $12 from "./routes/api/recipes/[name].ts";
import * as $13 from "./routes/api/recipes/index.ts";
import * as $14 from "./routes/api/tmdb/[id].ts";
import * as $15 from "./routes/api/tmdb/credits/[id].ts";
import * as $16 from "./routes/api/tmdb/query.ts";
import * as $17 from "./routes/articles/[name].tsx";
import * as $18 from "./routes/articles/index.tsx";
import * as $19 from "./routes/index.tsx";
import * as $20 from "./routes/movies/[name].tsx";
import * as $21 from "./routes/movies/index.tsx";
import * as $22 from "./routes/recipes/[name].tsx";
import * as $23 from "./routes/recipes/index.tsx";
import * as $14 from "./routes/api/test.ts";
import * as $15 from "./routes/api/tmdb/[id].ts";
import * as $16 from "./routes/api/tmdb/credits/[id].ts";
import * as $17 from "./routes/api/tmdb/query.ts";
import * as $18 from "./routes/articles/[name].tsx";
import * as $19 from "./routes/articles/index.tsx";
import * as $20 from "./routes/index.tsx";
import * as $21 from "./routes/movies/[name].tsx";
import * as $22 from "./routes/movies/index.tsx";
import * as $23 from "./routes/recipes/[name].tsx";
import * as $24 from "./routes/recipes/index.tsx";
import * as $$0 from "./islands/Counter.tsx";
import * as $$1 from "./islands/IngredientsList.tsx";
import * as $$2 from "./islands/KMenu.tsx";
@ -48,16 +49,17 @@ const manifest = {
"./routes/api/movies/index.ts": $11,
"./routes/api/recipes/[name].ts": $12,
"./routes/api/recipes/index.ts": $13,
"./routes/api/tmdb/[id].ts": $14,
"./routes/api/tmdb/credits/[id].ts": $15,
"./routes/api/tmdb/query.ts": $16,
"./routes/articles/[name].tsx": $17,
"./routes/articles/index.tsx": $18,
"./routes/index.tsx": $19,
"./routes/movies/[name].tsx": $20,
"./routes/movies/index.tsx": $21,
"./routes/recipes/[name].tsx": $22,
"./routes/recipes/index.tsx": $23,
"./routes/api/test.ts": $14,
"./routes/api/tmdb/[id].ts": $15,
"./routes/api/tmdb/credits/[id].ts": $16,
"./routes/api/tmdb/query.ts": $17,
"./routes/articles/[name].tsx": $18,
"./routes/articles/index.tsx": $19,
"./routes/index.tsx": $20,
"./routes/movies/[name].tsx": $21,
"./routes/movies/index.tsx": $22,
"./routes/recipes/[name].tsx": $23,
"./routes/recipes/index.tsx": $24,
},
islands: {
"./islands/Counter.tsx": $$0,

View File

@ -34,6 +34,7 @@ export const KMenu = (
const activeState = useSignal<"normal" | "loading" | "error" | "input">(
"normal",
);
const loadingText = useSignal("");
const activeIndex = useSignal(-1);
const input = useRef<HTMLInputElement>(null);
@ -74,6 +75,7 @@ export const KMenu = (
menuEntry.cb({
activeMenu: activeMenuType,
loadingText,
menus,
activeState,
commandInput,
@ -133,8 +135,14 @@ export const KMenu = (
class={`relative w-1/2 max-h-64 max-w-[400px] rounded-2xl shadow-2xl nnoisy-gradient overflow-hidden after:opacity-10 bg-gray-700`}
>
<div
class="grid h-12 text-gray-400 border-b border-gray-500 "
style={{ gridTemplateColumns: "4em 1fr" }}
class={`grid h-12 text-gray-400 ${
activeState.value !== "loading" && "border-b"
} border-gray-500 `}
style={{
gridTemplateColumns: activeState.value !== "loading"
? "4em 1fr"
: "1fr",
}}
>
{(activeState.value === "normal" || activeState.value === "input") &&
(
@ -156,8 +164,9 @@ export const KMenu = (
</>
)}
{activeState.value === "loading" && (
<div class="p-4">
Loading...
<div class="py-3 px-4 flex items-center gap-2">
<icons.IconLoader2 class="animate-spin w-4 h-4" />
{loadingText.value || "Loading..."}
</div>
)}
</div>

View File

@ -1,7 +1,7 @@
import { Menu } from "@islands/KMenu/types.ts";
import { Movie } from "@lib/resource/movies.ts";
import { TMDBMovie } from "@lib/types.ts";
import { isValidUrl } from "@lib/helpers.ts";
import { fetchStream, isValidUrl } from "@lib/helpers.ts";
export const menus: Record<string, Menu> = {
main: {
@ -28,20 +28,24 @@ export const menus: Record<string, Menu> = {
state.activeMenu.value = "input_link";
state.activeState.value = "input";
const unsub = state.commandInput.subscribe(async (value) => {
const unsub = state.commandInput.subscribe((value) => {
if (isValidUrl(value)) {
unsub();
state.activeState.value = "loading";
const response = await fetch("/api/articles/create?url=" + value);
const newArticle = await response.json();
if (newArticle?.id) {
window.location.href = "/articles/" + newArticle.id;
}
state.visible.value = false;
fetchStream("/api/articles/create?url=" + value, (chunk) => {
console.log({ chunk: chunk.split("\n") });
if (chunk.startsWith("id:")) {
state.loadingText.value = "Finished";
setTimeout(() => {
window.location.href = "/articles/" +
chunk.replace("id:", "").trim();
}, 500);
} else {
state.loadingText.value = chunk;
}
});
}
});
},

View File

@ -6,6 +6,7 @@ type IconKey = keyof typeof icons;
export type MenuState = {
activeMenu: Signal<string>;
activeState: Signal<"input" | "error" | "normal" | "loading">;
loadingText:Signal<string>;
commandInput: Signal<string>;
visible: Signal<boolean>;
menus: Record<string, Menu>;

View File

@ -30,3 +30,49 @@ export const fixRenderedMarkdown = (content: string) => {
}
});
};
export async function fetchStream(url: string, cb: (chunk: string) => void) {
const response = await fetch(url);
const reader = response?.body?.getReader();
if (reader) {
while (true) {
const { done, value } = await reader.read();
if (done) return;
const data = new TextDecoder().decode(value);
data
.split("$")
.filter((d) => d && d.length)
.map((d) => cb(Array.isArray(d) ? d[0] : d));
}
}
}
export const createStreamResponse = () => {
let controller: ReadableStreamController<ArrayBufferView>;
const body = new ReadableStream({
start(cont) {
controller = cont;
},
});
const response = new Response(body, {
headers: {
"content-type": "text/plain",
"x-content-type-options": "nosniff",
},
});
function cancel() {
controller.close();
}
function enqueue(chunk: string) {
controller?.enqueue(new TextEncoder().encode("$" + chunk));
}
return {
response,
cancel,
enqueue,
};
};

View File

@ -20,9 +20,12 @@ export function extractHashTags(inputString: string) {
const hashtags = [];
for (
const [hashtag] of inputString.matchAll(/(?<!\()\B(\#[a-zA-Z\-]+\b)(?!;)/g)
const [hashtag] of inputString.matchAll(/(?:^|\s)#\S*(?<!\))/g)
) {
hashtags.push(hashtag.replace(/\#/g, ""));
const cleaned = hashtag.replace(/\#/g, "").trim();
if (cleaned.length > 2) {
hashtags.push(cleaned);
}
}
return hashtags;

View File

@ -2,25 +2,130 @@ import { Handlers } from "$fresh/server.ts";
import { Readability } from "https://cdn.skypack.dev/@mozilla/readability";
import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts";
import { BadRequestError } from "@lib/errors.ts";
import { isValidUrl, json } from "@lib/helpers.ts";
import { createStreamResponse, isValidUrl, json } from "@lib/helpers.ts";
import * as openai from "@lib/openai.ts";
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
import { createArticle } from "@lib/resource/articles.ts";
const service = new tds({
headingStyle: "atx",
codeBlockStyle: "fenced",
hr: "---",
bulletListMarker: "-",
});
const parser = new DOMParser();
//service.use(gfm);
async function processCreateArticle(
{ fetchUrl, streamResponse }: {
fetchUrl: string;
streamResponse: ReturnType<typeof createStreamResponse>;
},
) {
console.log("[api/article] create article from url", { url: fetchUrl });
streamResponse.enqueue("downloading article");
const request = await fetch(fetchUrl);
const html = await request.text();
streamResponse.enqueue("download success");
const document = parser.parseFromString(html, "text/html");
const title = document?.querySelector("title")?.innerText;
const metaAuthor =
document?.querySelector('meta[name="twitter:creator"]')?.getAttribute(
"content",
) ||
document?.querySelector('meta[name="author"]')?.getAttribute("content");
const readable = new Readability(document);
const result = readable.parse();
console.log("[api/article] parsed ", {
url: fetchUrl,
content: result.textContent,
});
const cleanDocument = parser.parseFromString(
result.content,
"text/html",
);
const service = new tds({
headingStyle: "atx",
codeBlockStyle: "fenced",
hr: "---",
bulletListMarker: "-",
});
const url = new URL(fetchUrl);
service.addRule("fix image links", {
filter: ["img"],
replacement: function (_: string, node: HTMLImageElement) {
const src = node.getAttribute("src");
const alt = node.getAttribute("alt") || "";
if (!src || src.startsWith("data:image")) return "";
if (src.startsWith("/")) {
return `![${alt}](${url.origin}${src.replace(/$\//, "")})`;
}
return `![${alt}](${src})`;
},
});
service.addRule("fix normal links", {
filter: ["a"],
replacement: function (content: string, node: HTMLImageElement) {
const href = node.getAttribute("href");
if (!href) return content;
if (href.startsWith("/")) {
return `[${content}](${url.origin}${href.replace(/$\//, "")})`;
}
if (href.startsWith("#")) {
return `[${content}](${url.href}#${href})`.replace("##", "#");
}
return `[${content}](${href})`;
},
});
const markdown = service.turndown(cleanDocument);
streamResponse.enqueue("parsed article, creating tags with openai");
const [tags, shortTitle, author] = await Promise.all([
openai.createTags(markdown),
title && openai.shortenTitle(title),
metaAuthor || openai.extractAuthorName(markdown),
]);
const id = shortTitle || title || "";
const newArticle = {
id,
name: title || "",
content: markdown,
tags: tags || [],
meta: {
author: (author || "").replace("@", "twitter:"),
link: fetchUrl,
status: "not-finished",
date: new Date(),
},
} as const;
streamResponse.enqueue("finished processing");
await createArticle(newArticle);
streamResponse.enqueue("id: " + newArticle.id);
}
export const handler: Handlers = {
async GET(req) {
GET(req) {
const url = new URL(req.url);
const fetchUrl = url.searchParams.get("url");
@ -28,63 +133,16 @@ export const handler: Handlers = {
throw new BadRequestError();
}
console.log("[api/article] create article from url", { url: fetchUrl });
const streamResponse = createStreamResponse();
const request = await fetch(fetchUrl);
const html = await request.text();
const document = parser.parseFromString(html, "text/html");
const title = document?.querySelector("title")?.innerText;
const metaAuthor =
document?.querySelector('meta[name="twitter:creator"]')?.getAttribute(
"content",
) ||
document?.querySelector('meta[name="author"]')?.getAttribute("content");
console.log({ metaAuthor });
const readable = new Readability(document);
const result = readable.parse();
console.log("[api/article] parsed ", {
url: fetchUrl,
content: result.textContent,
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
console.log({ article });
}).catch((err) => {
console.log(err);
}).finally(() => {
streamResponse.cancel();
});
const cleanDocument = parser.parseFromString(
result.content,
"text/html",
);
const [tags, summary, shortTitle, author] = await Promise.all([
openai.createTags(result.textContent),
openai.summarize(result.textContent),
title && openai.shortenTitle(title),
metaAuthor || openai.extractAuthorName(result.textContent),
]);
const markdown = service.turndown(cleanDocument);
const id = shortTitle || title || "";
const newArticle = {
id,
name: title || "",
content: markdown,
tags: tags || [],
meta: {
author: author || "",
link: fetchUrl,
status: "not-finished",
date: new Date(),
},
} as const;
await createArticle(newArticle);
return json(newArticle);
return streamResponse.response;
},
};

30
routes/api/test.ts Normal file
View File

@ -0,0 +1,30 @@
import { Handlers } from "$fresh/server.ts";
function GET() {
let timer: number | undefined = undefined;
const body = new ReadableStream({
start(controller) {
timer = setInterval(() => {
const message = `It is ${new Date().toISOString()}\n`;
controller.enqueue(new TextEncoder().encode(message));
}, 1000);
},
cancel() {
if (timer !== undefined) {
clearInterval(timer);
}
},
});
return new Response(body, {
headers: {
"content-type": "text/plain",
"x-content-type-options": "nosniff",
},
});
}
export const handler: Handlers = {
GET,
};