Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A Better Version of It. #36

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
110 changes: 60 additions & 50 deletions tensor-api/index.ts
Original file line number Diff line number Diff line change
@@ -1,33 +1,33 @@
import * as tf from "@tensorflow/tfjs-node"
import fs from "fs"
import csv from "csv-parser"
import { Transform } from "stream"
import OpenAI from "openai"
import * as tf from "@tensorflow/tfjs-node";
import fs from "fs";
import csv from "csv-parser";
import { Transform } from "stream";
import OpenAI from "openai";

const openai = new OpenAI({
apiKey: process.env.OPENAI_KEY,
})
});

interface Row {
embedding: string
rating: string
embedding: string;
rating: string;
}

function createLineRangeStream(startLine: number, endLine: number) {
let currentLine = 0
let currentLine = 0;
return new Transform({
transform(chunk, _, callback) {
if (currentLine >= startLine && currentLine < endLine) {
this.push(chunk)
this.push(chunk);
}
currentLine++
currentLine++;
if (currentLine >= endLine) {
this.push(null)
this.push(null);
}
callback()
callback();
},
objectMode: true,
})
});
}

async function parseCSV(
Expand All @@ -36,94 +36,104 @@ async function parseCSV(
endLine: number
): Promise<Row[]> {
return new Promise((resolve, reject) => {
const rows: Row[] = []
const rows: Row[] = [];

fs.createReadStream(filePath)
.pipe(csv({ separator: "|" }))
.pipe(createLineRangeStream(startLine, endLine))
.on("data", (row) => {
rows.push(row)
rows.push(row);
})
.on("error", (error) => {
reject(error)
reject(error);
})
.on("end", () => {
resolve(rows)
})
})
resolve(rows);
});
});
}

class AI {
model: tf.Sequential;

constructor() {
this.model = this.compile();
}

compile() {
const model = tf.sequential()
const model = tf.sequential();

// input layer
model.add(
tf.layers.dense({
units: 3,
inputShape: [1536],
})
)
);

// output layer
model.add(
tf.layers.dense({
units: 1,
activation: "sigmoid",
})
)
);

model.compile({
loss: "binaryCrossentropy",
optimizer: "sgd",
metrics: ["accuracy"],
})
});

return model
return model;
}

async run() {
const model = this.compile()

const data = await parseCSV("prepared_dataset.csv", 0, 45000)
async train() {
const data = await parseCSV("prepared_dataset.csv", 0, 45000);

const converted = data.map((row) => ({
embedding: JSON.parse(row.embedding),
rating: Number(row.rating),
}))

const xsConverted = converted.map(({ embedding }) => embedding)
}));

const ysConverted = converted.map(({ rating }) => [rating])
const xsConverted = converted.map(({ embedding }) => embedding);

console.log(xsConverted, ysConverted)
const ysConverted = converted.map(({ rating }) => [rating]);

const xs = tf.tensor2d(xsConverted)
const xs = tf.tensor2d(xsConverted);

const ys = tf.tensor2d(ysConverted)
const ys = tf.tensor2d(ysConverted);

await model.fit(xs, ys, {
await this.model.fit(xs, ys, {
epochs: 250,
})

const testText = "hello world" // no flagging expected
});
}

async predict(text: string) {
const stuff = await openai.embeddings.create({
input: testText,
input: text,
model: "text-embedding-3-small",
})
});

const vector = stuff.data[0].embedding
const vector = stuff.data[0].embedding;

const example = tf.tensor2d([vector])
const prediction = model.predict(example)
const example = tf.tensor2d([vector]);
const prediction = this.model.predict(example);

// @ts-ignore
prediction.print()
return prediction.dataSync()[0];
}

await model.save("file://./profanity-model")
async save() {
await this.model.save("file://./profanity-model");
}
}

const ai = new AI()
ai.run()
async function main() {
const ai = new AI();
await ai.train();
const prediction = await ai.predict("hello world");
console.log("Prediction:", prediction);
await ai.save();
}

main();
7 changes: 6 additions & 1 deletion tensor-api/predict.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ app.post("/", async (c) => {
const body = await c.req.json()
const { message } = body

if (!message) {
return c.json({ error: "Missing message in request body" }, 400)
}

const openaiRes = await openai.embeddings.create({
input: message,
model: "text-embedding-3-small",
Expand All @@ -43,7 +47,8 @@ app.post("/", async (c) => {
note: "1 is very toxic/profane, 0 is not profane at all",
})
} catch (err) {
return c.json({ error: JSON.stringify(err) })
console.error(err)
return c.json({ error: "An error occurred during prediction" }, 500)
}
})

Expand Down
38 changes: 22 additions & 16 deletions tensor-api/prepare.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import { OpenAI } from "openai"

const openai = new OpenAI({
apiKey: process.env.OPENAI_KEY,
})
});

const writeStream = fs.createWriteStream("prepared_dataset.csv", { flags: "a" })

Expand Down Expand Up @@ -69,26 +69,32 @@ const prepare = async () => {

const data = await parseCSV("raw_dataset.csv", start, end)

data.forEach(async (row) => {
const hate = Number(row.severe_toxic)
const obscene = Number(row.obscene)
const insult = Number(row.insult)
const identity = Number(row.identity_hate)
const threat = Number(row.threat)
for (const row of data) {
try {
const hate = Number(row.severe_toxic)
const obscene = Number(row.obscene)
const insult = Number(row.insult)
const identity = Number(row.identity_hate)
const threat = Number(row.threat)

const isFlagged = hate || obscene || insult || identity || threat
const isFlagged = hate || obscene || insult || identity || threat

const stuff = await openai.embeddings.create({
input: row.comment_text,
model: "text-embedding-3-small",
})
const stuff = await openai.embeddings.create({
input: row.comment_text,
model: "text-embedding-3-small",
})

const vector = stuff.data[0].embedding
const vector = stuff.data[0].embedding

writeStream.write(`[${vector}]|${isFlagged ? 1 : 0}` + "\n")
writeStream.write(`[${vector}]|${isFlagged ? 1 : 0}` + "\n")

await new Promise((resolve) => setTimeout(resolve, 500))
})
await new Promise((resolve) => setTimeout(resolve, 500))
} catch (error) {
console.error(
`Error processing row ${row.id}: ${error.message}`
)
}
}
}
}

Expand Down