fix: hashtag extraction and make remote links absolute
This commit is contained in:
parent
2d56710223
commit
cebbb8af2b
@ -10,3 +10,4 @@ export { default as IconReportSearch } from "https://deno.land/x/tabler_icons_ts
|
||||
export { default as IconRefresh } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/refresh.tsx";
|
||||
export { default as IconCirclePlus } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/circle-plus.tsx";
|
||||
export { default as IconCircleMinus } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/circle-minus.tsx";
|
||||
export { default as IconLoader2 } from "https://deno.land/x/tabler_icons_tsx@0.0.3/tsx/loader-2.tsx";
|
||||
|
42
fresh.gen.ts
42
fresh.gen.ts
@ -16,16 +16,17 @@ import * as $10 from "./routes/api/movies/enhance/[name].ts";
|
||||
import * as $11 from "./routes/api/movies/index.ts";
|
||||
import * as $12 from "./routes/api/recipes/[name].ts";
|
||||
import * as $13 from "./routes/api/recipes/index.ts";
|
||||
import * as $14 from "./routes/api/tmdb/[id].ts";
|
||||
import * as $15 from "./routes/api/tmdb/credits/[id].ts";
|
||||
import * as $16 from "./routes/api/tmdb/query.ts";
|
||||
import * as $17 from "./routes/articles/[name].tsx";
|
||||
import * as $18 from "./routes/articles/index.tsx";
|
||||
import * as $19 from "./routes/index.tsx";
|
||||
import * as $20 from "./routes/movies/[name].tsx";
|
||||
import * as $21 from "./routes/movies/index.tsx";
|
||||
import * as $22 from "./routes/recipes/[name].tsx";
|
||||
import * as $23 from "./routes/recipes/index.tsx";
|
||||
import * as $14 from "./routes/api/test.ts";
|
||||
import * as $15 from "./routes/api/tmdb/[id].ts";
|
||||
import * as $16 from "./routes/api/tmdb/credits/[id].ts";
|
||||
import * as $17 from "./routes/api/tmdb/query.ts";
|
||||
import * as $18 from "./routes/articles/[name].tsx";
|
||||
import * as $19 from "./routes/articles/index.tsx";
|
||||
import * as $20 from "./routes/index.tsx";
|
||||
import * as $21 from "./routes/movies/[name].tsx";
|
||||
import * as $22 from "./routes/movies/index.tsx";
|
||||
import * as $23 from "./routes/recipes/[name].tsx";
|
||||
import * as $24 from "./routes/recipes/index.tsx";
|
||||
import * as $$0 from "./islands/Counter.tsx";
|
||||
import * as $$1 from "./islands/IngredientsList.tsx";
|
||||
import * as $$2 from "./islands/KMenu.tsx";
|
||||
@ -48,16 +49,17 @@ const manifest = {
|
||||
"./routes/api/movies/index.ts": $11,
|
||||
"./routes/api/recipes/[name].ts": $12,
|
||||
"./routes/api/recipes/index.ts": $13,
|
||||
"./routes/api/tmdb/[id].ts": $14,
|
||||
"./routes/api/tmdb/credits/[id].ts": $15,
|
||||
"./routes/api/tmdb/query.ts": $16,
|
||||
"./routes/articles/[name].tsx": $17,
|
||||
"./routes/articles/index.tsx": $18,
|
||||
"./routes/index.tsx": $19,
|
||||
"./routes/movies/[name].tsx": $20,
|
||||
"./routes/movies/index.tsx": $21,
|
||||
"./routes/recipes/[name].tsx": $22,
|
||||
"./routes/recipes/index.tsx": $23,
|
||||
"./routes/api/test.ts": $14,
|
||||
"./routes/api/tmdb/[id].ts": $15,
|
||||
"./routes/api/tmdb/credits/[id].ts": $16,
|
||||
"./routes/api/tmdb/query.ts": $17,
|
||||
"./routes/articles/[name].tsx": $18,
|
||||
"./routes/articles/index.tsx": $19,
|
||||
"./routes/index.tsx": $20,
|
||||
"./routes/movies/[name].tsx": $21,
|
||||
"./routes/movies/index.tsx": $22,
|
||||
"./routes/recipes/[name].tsx": $23,
|
||||
"./routes/recipes/index.tsx": $24,
|
||||
},
|
||||
islands: {
|
||||
"./islands/Counter.tsx": $$0,
|
||||
|
@ -34,6 +34,7 @@ export const KMenu = (
|
||||
const activeState = useSignal<"normal" | "loading" | "error" | "input">(
|
||||
"normal",
|
||||
);
|
||||
const loadingText = useSignal("");
|
||||
const activeIndex = useSignal(-1);
|
||||
|
||||
const input = useRef<HTMLInputElement>(null);
|
||||
@ -74,6 +75,7 @@ export const KMenu = (
|
||||
|
||||
menuEntry.cb({
|
||||
activeMenu: activeMenuType,
|
||||
loadingText,
|
||||
menus,
|
||||
activeState,
|
||||
commandInput,
|
||||
@ -133,8 +135,14 @@ export const KMenu = (
|
||||
class={`relative w-1/2 max-h-64 max-w-[400px] rounded-2xl shadow-2xl nnoisy-gradient overflow-hidden after:opacity-10 bg-gray-700`}
|
||||
>
|
||||
<div
|
||||
class="grid h-12 text-gray-400 border-b border-gray-500 "
|
||||
style={{ gridTemplateColumns: "4em 1fr" }}
|
||||
class={`grid h-12 text-gray-400 ${
|
||||
activeState.value !== "loading" && "border-b"
|
||||
} border-gray-500 `}
|
||||
style={{
|
||||
gridTemplateColumns: activeState.value !== "loading"
|
||||
? "4em 1fr"
|
||||
: "1fr",
|
||||
}}
|
||||
>
|
||||
{(activeState.value === "normal" || activeState.value === "input") &&
|
||||
(
|
||||
@ -156,8 +164,9 @@ export const KMenu = (
|
||||
</>
|
||||
)}
|
||||
{activeState.value === "loading" && (
|
||||
<div class="p-4">
|
||||
Loading...
|
||||
<div class="py-3 px-4 flex items-center gap-2">
|
||||
<icons.IconLoader2 class="animate-spin w-4 h-4" />
|
||||
{loadingText.value || "Loading..."}
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
|
@ -1,7 +1,7 @@
|
||||
import { Menu } from "@islands/KMenu/types.ts";
|
||||
import { Movie } from "@lib/resource/movies.ts";
|
||||
import { TMDBMovie } from "@lib/types.ts";
|
||||
import { isValidUrl } from "@lib/helpers.ts";
|
||||
import { fetchStream, isValidUrl } from "@lib/helpers.ts";
|
||||
|
||||
export const menus: Record<string, Menu> = {
|
||||
main: {
|
||||
@ -28,20 +28,24 @@ export const menus: Record<string, Menu> = {
|
||||
state.activeMenu.value = "input_link";
|
||||
state.activeState.value = "input";
|
||||
|
||||
const unsub = state.commandInput.subscribe(async (value) => {
|
||||
const unsub = state.commandInput.subscribe((value) => {
|
||||
if (isValidUrl(value)) {
|
||||
unsub();
|
||||
|
||||
state.activeState.value = "loading";
|
||||
|
||||
const response = await fetch("/api/articles/create?url=" + value);
|
||||
const newArticle = await response.json();
|
||||
|
||||
if (newArticle?.id) {
|
||||
window.location.href = "/articles/" + newArticle.id;
|
||||
}
|
||||
|
||||
state.visible.value = false;
|
||||
fetchStream("/api/articles/create?url=" + value, (chunk) => {
|
||||
console.log({ chunk: chunk.split("\n") });
|
||||
if (chunk.startsWith("id:")) {
|
||||
state.loadingText.value = "Finished";
|
||||
setTimeout(() => {
|
||||
window.location.href = "/articles/" +
|
||||
chunk.replace("id:", "").trim();
|
||||
}, 500);
|
||||
} else {
|
||||
state.loadingText.value = chunk;
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
},
|
||||
|
@ -6,6 +6,7 @@ type IconKey = keyof typeof icons;
|
||||
export type MenuState = {
|
||||
activeMenu: Signal<string>;
|
||||
activeState: Signal<"input" | "error" | "normal" | "loading">;
|
||||
loadingText:Signal<string>;
|
||||
commandInput: Signal<string>;
|
||||
visible: Signal<boolean>;
|
||||
menus: Record<string, Menu>;
|
||||
|
@ -30,3 +30,49 @@ export const fixRenderedMarkdown = (content: string) => {
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
export async function fetchStream(url: string, cb: (chunk: string) => void) {
|
||||
const response = await fetch(url);
|
||||
const reader = response?.body?.getReader();
|
||||
if (reader) {
|
||||
while (true) {
|
||||
const { done, value } = await reader.read();
|
||||
if (done) return;
|
||||
const data = new TextDecoder().decode(value);
|
||||
data
|
||||
.split("$")
|
||||
.filter((d) => d && d.length)
|
||||
.map((d) => cb(Array.isArray(d) ? d[0] : d));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const createStreamResponse = () => {
|
||||
let controller: ReadableStreamController<ArrayBufferView>;
|
||||
const body = new ReadableStream({
|
||||
start(cont) {
|
||||
controller = cont;
|
||||
},
|
||||
});
|
||||
|
||||
const response = new Response(body, {
|
||||
headers: {
|
||||
"content-type": "text/plain",
|
||||
"x-content-type-options": "nosniff",
|
||||
},
|
||||
});
|
||||
|
||||
function cancel() {
|
||||
controller.close();
|
||||
}
|
||||
|
||||
function enqueue(chunk: string) {
|
||||
controller?.enqueue(new TextEncoder().encode("$" + chunk));
|
||||
}
|
||||
|
||||
return {
|
||||
response,
|
||||
cancel,
|
||||
enqueue,
|
||||
};
|
||||
};
|
||||
|
@ -20,9 +20,12 @@ export function extractHashTags(inputString: string) {
|
||||
const hashtags = [];
|
||||
|
||||
for (
|
||||
const [hashtag] of inputString.matchAll(/(?<!\()\B(\#[a-zA-Z\-]+\b)(?!;)/g)
|
||||
const [hashtag] of inputString.matchAll(/(?:^|\s)#\S*(?<!\))/g)
|
||||
) {
|
||||
hashtags.push(hashtag.replace(/\#/g, ""));
|
||||
const cleaned = hashtag.replace(/\#/g, "").trim();
|
||||
if (cleaned.length > 2) {
|
||||
hashtags.push(cleaned);
|
||||
}
|
||||
}
|
||||
|
||||
return hashtags;
|
||||
|
@ -2,25 +2,130 @@ import { Handlers } from "$fresh/server.ts";
|
||||
import { Readability } from "https://cdn.skypack.dev/@mozilla/readability";
|
||||
import { DOMParser } from "https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts";
|
||||
import { BadRequestError } from "@lib/errors.ts";
|
||||
import { isValidUrl, json } from "@lib/helpers.ts";
|
||||
import { createStreamResponse, isValidUrl, json } from "@lib/helpers.ts";
|
||||
import * as openai from "@lib/openai.ts";
|
||||
|
||||
import tds from "https://cdn.skypack.dev/turndown@7.1.1";
|
||||
//import { gfm } from "https://cdn.skypack.dev/@guyplusplus/turndown-plugin-gfm@1.0.7";
|
||||
import { createArticle } from "@lib/resource/articles.ts";
|
||||
|
||||
const service = new tds({
|
||||
headingStyle: "atx",
|
||||
codeBlockStyle: "fenced",
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
});
|
||||
const parser = new DOMParser();
|
||||
|
||||
//service.use(gfm);
|
||||
|
||||
async function processCreateArticle(
|
||||
{ fetchUrl, streamResponse }: {
|
||||
fetchUrl: string;
|
||||
streamResponse: ReturnType<typeof createStreamResponse>;
|
||||
},
|
||||
) {
|
||||
console.log("[api/article] create article from url", { url: fetchUrl });
|
||||
|
||||
streamResponse.enqueue("downloading article");
|
||||
|
||||
const request = await fetch(fetchUrl);
|
||||
const html = await request.text();
|
||||
|
||||
streamResponse.enqueue("download success");
|
||||
|
||||
const document = parser.parseFromString(html, "text/html");
|
||||
|
||||
const title = document?.querySelector("title")?.innerText;
|
||||
|
||||
const metaAuthor =
|
||||
document?.querySelector('meta[name="twitter:creator"]')?.getAttribute(
|
||||
"content",
|
||||
) ||
|
||||
document?.querySelector('meta[name="author"]')?.getAttribute("content");
|
||||
|
||||
const readable = new Readability(document);
|
||||
|
||||
const result = readable.parse();
|
||||
|
||||
console.log("[api/article] parsed ", {
|
||||
url: fetchUrl,
|
||||
content: result.textContent,
|
||||
});
|
||||
|
||||
const cleanDocument = parser.parseFromString(
|
||||
result.content,
|
||||
"text/html",
|
||||
);
|
||||
|
||||
const service = new tds({
|
||||
headingStyle: "atx",
|
||||
codeBlockStyle: "fenced",
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
});
|
||||
|
||||
const url = new URL(fetchUrl);
|
||||
service.addRule("fix image links", {
|
||||
filter: ["img"],
|
||||
replacement: function (_: string, node: HTMLImageElement) {
|
||||
const src = node.getAttribute("src");
|
||||
const alt = node.getAttribute("alt") || "";
|
||||
if (!src || src.startsWith("data:image")) return "";
|
||||
|
||||
if (src.startsWith("/")) {
|
||||
return `data:image/s3,"s3://crabby-images/8c960/8c960d40a88e20b8b158976a46df5d45ae8e784e" alt="${alt}"})`;
|
||||
}
|
||||
|
||||
return `data:image/s3,"s3://crabby-images/d7481/d7481d42afb64d9e9c70c2eb80e35130bfbe4a0a" alt="${alt}"`;
|
||||
},
|
||||
});
|
||||
service.addRule("fix normal links", {
|
||||
filter: ["a"],
|
||||
replacement: function (content: string, node: HTMLImageElement) {
|
||||
const href = node.getAttribute("href");
|
||||
if (!href) return content;
|
||||
|
||||
if (href.startsWith("/")) {
|
||||
return `[${content}](${url.origin}${href.replace(/$\//, "")})`;
|
||||
}
|
||||
|
||||
if (href.startsWith("#")) {
|
||||
return `[${content}](${url.href}#${href})`.replace("##", "#");
|
||||
}
|
||||
|
||||
return `[${content}](${href})`;
|
||||
},
|
||||
});
|
||||
|
||||
const markdown = service.turndown(cleanDocument);
|
||||
|
||||
streamResponse.enqueue("parsed article, creating tags with openai");
|
||||
|
||||
const [tags, shortTitle, author] = await Promise.all([
|
||||
openai.createTags(markdown),
|
||||
title && openai.shortenTitle(title),
|
||||
metaAuthor || openai.extractAuthorName(markdown),
|
||||
]);
|
||||
|
||||
const id = shortTitle || title || "";
|
||||
|
||||
const newArticle = {
|
||||
id,
|
||||
name: title || "",
|
||||
content: markdown,
|
||||
tags: tags || [],
|
||||
meta: {
|
||||
author: (author || "").replace("@", "twitter:"),
|
||||
link: fetchUrl,
|
||||
status: "not-finished",
|
||||
date: new Date(),
|
||||
},
|
||||
} as const;
|
||||
|
||||
streamResponse.enqueue("finished processing");
|
||||
|
||||
await createArticle(newArticle);
|
||||
|
||||
streamResponse.enqueue("id: " + newArticle.id);
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
async GET(req) {
|
||||
GET(req) {
|
||||
const url = new URL(req.url);
|
||||
const fetchUrl = url.searchParams.get("url");
|
||||
|
||||
@ -28,63 +133,16 @@ export const handler: Handlers = {
|
||||
throw new BadRequestError();
|
||||
}
|
||||
|
||||
console.log("[api/article] create article from url", { url: fetchUrl });
|
||||
const streamResponse = createStreamResponse();
|
||||
|
||||
const request = await fetch(fetchUrl);
|
||||
const html = await request.text();
|
||||
|
||||
const document = parser.parseFromString(html, "text/html");
|
||||
|
||||
const title = document?.querySelector("title")?.innerText;
|
||||
|
||||
const metaAuthor =
|
||||
document?.querySelector('meta[name="twitter:creator"]')?.getAttribute(
|
||||
"content",
|
||||
) ||
|
||||
document?.querySelector('meta[name="author"]')?.getAttribute("content");
|
||||
|
||||
console.log({ metaAuthor });
|
||||
|
||||
const readable = new Readability(document);
|
||||
|
||||
const result = readable.parse();
|
||||
|
||||
console.log("[api/article] parsed ", {
|
||||
url: fetchUrl,
|
||||
content: result.textContent,
|
||||
processCreateArticle({ fetchUrl, streamResponse }).then((article) => {
|
||||
console.log({ article });
|
||||
}).catch((err) => {
|
||||
console.log(err);
|
||||
}).finally(() => {
|
||||
streamResponse.cancel();
|
||||
});
|
||||
|
||||
const cleanDocument = parser.parseFromString(
|
||||
result.content,
|
||||
"text/html",
|
||||
);
|
||||
|
||||
const [tags, summary, shortTitle, author] = await Promise.all([
|
||||
openai.createTags(result.textContent),
|
||||
openai.summarize(result.textContent),
|
||||
title && openai.shortenTitle(title),
|
||||
metaAuthor || openai.extractAuthorName(result.textContent),
|
||||
]);
|
||||
|
||||
const markdown = service.turndown(cleanDocument);
|
||||
|
||||
const id = shortTitle || title || "";
|
||||
|
||||
const newArticle = {
|
||||
id,
|
||||
name: title || "",
|
||||
content: markdown,
|
||||
tags: tags || [],
|
||||
meta: {
|
||||
author: author || "",
|
||||
link: fetchUrl,
|
||||
status: "not-finished",
|
||||
date: new Date(),
|
||||
},
|
||||
} as const;
|
||||
|
||||
await createArticle(newArticle);
|
||||
|
||||
return json(newArticle);
|
||||
return streamResponse.response;
|
||||
},
|
||||
};
|
||||
|
30
routes/api/test.ts
Normal file
30
routes/api/test.ts
Normal file
@ -0,0 +1,30 @@
|
||||
import { Handlers } from "$fresh/server.ts";
|
||||
|
||||
function GET() {
|
||||
let timer: number | undefined = undefined;
|
||||
const body = new ReadableStream({
|
||||
start(controller) {
|
||||
timer = setInterval(() => {
|
||||
const message = `It is ${new Date().toISOString()}\n`;
|
||||
controller.enqueue(new TextEncoder().encode(message));
|
||||
}, 1000);
|
||||
},
|
||||
|
||||
cancel() {
|
||||
if (timer !== undefined) {
|
||||
clearInterval(timer);
|
||||
}
|
||||
},
|
||||
});
|
||||
|
||||
return new Response(body, {
|
||||
headers: {
|
||||
"content-type": "text/plain",
|
||||
"x-content-type-options": "nosniff",
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export const handler: Handlers = {
|
||||
GET,
|
||||
};
|
Loading…
x
Reference in New Issue
Block a user