123 lines
6.7 KiB
JavaScript
123 lines
6.7 KiB
JavaScript
// services/dealClassification.service.js
|
|
const OpenAI = require("openai")
|
|
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
|
|
|
|
const SYSTEM_PROMPT = `
|
|
Classify the deal into exactly ONE category_id and optionally suggest up to 5 tags.
|
|
|
|
Tags are NOT keyword repeats. Tags must represent INTENT/AUDIENCE/USE-CASE.
|
|
|
|
- Prefer audience or use-case tags such as: okul, ofis, is, gaming, kamp, mutfak, temizlik, araba, bahce, bebek, evcil-hayvan, fitness.
|
|
- Do NOT output literal product words.
|
|
- You MAY infer relevant intent/audience tags even if not explicitly written, as long as the inference is strong and widely accepted.
|
|
- Avoid weak guesses: if the intent/audience is not clear, set needs_review=true and tags can be [].
|
|
|
|
Forbidden:
|
|
- store/company/seller names
|
|
- promotion/marketing words
|
|
- generic category words
|
|
|
|
Max 5 tags total, lowercase.
|
|
Review / safety:
|
|
- Set needs_review=true if you are not confident about the chosen category OR if the deal text looks problematic.
|
|
- If unclear/unrelated, use best_category_id=0 and needs_review=true.
|
|
- Set has_issue=true if the text contains profanity, harassment, hate, explicit sexual content, doxxing/personal data, scams/phishing, or clear spam.
|
|
- If has_issue=true, briefly explain in issue_reason (short, generic, no quotes).
|
|
|
|
Output JSON only:
|
|
{
|
|
"best_category_id": number,
|
|
"needs_review": boolean,
|
|
"tags": string[],
|
|
"has_issue": boolean,
|
|
"issue_type": "NONE" | "PROFANITY" | "PHONE_NUMBER" | "PERSONAL_DATA" | "SPAM" | "OTHER",
|
|
"issue_reason": string | null
|
|
}
|
|
`
|
|
|
|
const TAXONOMY_LINE =
|
|
`TAXONOMY:0 undefined;1 electronics;2 beauty;3 food;4 auto;5 home-garden;6 computers;7 pc-components;8 pc-ram;9 pc-ssd;10 pc-cpu;11 pc-gpu;12 pc-peripherals;13 pc-keyboard;14 pc-mouse;15 pc-monitor;16 beauty-makeup;17 beauty-lipstick;18 beauty-foundation;19 beauty-mascara;20 beauty-skincare;21 beauty-moisturizer;22 food-snacks;23 food-cigkofte;24 food-beverages;25 food-coffee;26 auto-oils;27 auto-engine-oil;28 auto-parts;29 auto-brake-pads;30 home-garden-garden;31 garden-irrigation;32 phone;33 phone-smartphone;34 phone-case;35 phone-screen-protector;36 phone-charging;37 phone-powerbank;38 wearables;39 wearables-smartwatch;40 wearables-band;41 audio;42 audio-headphones;43 audio-tws;44 audio-bt-speaker;45 audio-soundbar;46 audio-microphone;47 audio-turntable;48 tv-video;49 tv;50 projector;51 tv-media-player;52 tv-accessories;53 tv-receiver;54 console;55 console-playstation;56 console-xbox;57 console-nintendo;58 console-games;59 console-accessories;60 camera;61 camera-photo;62 camera-action;63 camera-lens;64 camera-tripod;65 smart-home;66 smart-security-camera;67 smart-plug;68 smart-bulb;69 smart-sensor;70 pc-networking;71 pc-router;72 pc-modem;73 pc-switch;74 pc-wifi-extender;75 pc-printing;76 pc-printer;77 pc-ink-toner;78 pc-scanner;79 pc-laptop;80 pc-desktop;81 pc-tablet;82 pc-storage;83 pc-external-drive;84 pc-usb-drive;85 pc-nas;86 pc-webcam;87 pc-speaker;88 pc-mic;89 pc-mousepad;90 pc-dock-hub;91 pc-laptop-bag;92 pc-controller;93 pc-motherboard;94 pc-psu;95 pc-case;96 pc-cooling;97 pc-fan;98 pc-liquid-cooling;99 beauty-fragrance;100 beauty-fragrance-women;101 beauty-fragrance-men;102 beauty-haircare;103 beauty-shampoo;104 beauty-conditioner;105 beauty-hair-styling;106 beauty-personal-care;107 beauty-deodorant;108 beauty-shaving;109 beauty-hair-removal;110 beauty-skincare-serum;111 beauty-sunscreen;112 beauty-cleanser;113 beauty-mask;114 beauty-toner;115 food-staples;116 food-pasta;117 food-legumes;118 food-oil-vinegar;119 food-breakfast;120 food-cheese;121 food-olive;122 food-jam-honey;123 food-soda;124 food-water;125 food-energy;126 food-tea;127 food-frozen;128 food-meat;129 food-dessert;130 auto-accessories;131 auto-in-car-electronics;132 auto-care;133 auto-cleaning;134 auto-tires;135 auto-battery;136 auto-lighting;137 auto-audio;138 home-furniture;139 home-dining-table;140 home-chair;141 home-sofa;142 home-bed;143 home-textile;144 home-bedding;145 home-blanket;146 home-curtain;147 home-kitchen;148 home-cookware;149 home-small-appliances;150 home-coffee-machine;151 home-blender;152 home-airfryer;153 home-vacuum;154 home-lighting;155 home-decor;156 home-rug;157 home-wall-decor;158 home-cleaning;159 home-detergent;160 home-paper-products;161 home-tools;162 home-drill;163 home-saw;164 home-hardware;165 pet;166 pet-cat-food;167 pet-dog-food;168 pet-cat-litter;169 office;170 office-paper-notebook;171 office-a4-paper;172 office-pen;173 office-school-bag;174 baby;175 baby-diaper;176 baby-wipes;177 baby-food;178 baby-toys;179 sports;180 sports-camping;181 sports-fitness;182 sports-bicycle;183 fashion;184 fashion-shoes;185 fashion-men;186 fashion-women;187 fashion-bags;188 books-media;189 books;190 digital-games`
|
|
|
|
const CATEGORY_ENUM = [...Array(191).keys()] // 0..31
|
|
|
|
function s(x) {
|
|
return x == null ? "" : String(x)
|
|
}
|
|
|
|
function normalizeTags(tags) {
|
|
const arr = Array.isArray(tags) ? tags : []
|
|
const cleaned = arr
|
|
.map((t) => String(t).trim().toLowerCase())
|
|
.filter(Boolean)
|
|
.slice(0, 5)
|
|
return [...new Set(cleaned)]
|
|
}
|
|
|
|
function parseOutputJson(resp) {
|
|
const text = resp.output_text ?? resp.output?.[0]?.content?.[0]?.text
|
|
if (!text) throw new Error("OpenAI response text missing")
|
|
return JSON.parse(text)
|
|
}
|
|
|
|
async function classifyDeal({ title, description, url, seller }) {
|
|
const userText = [
|
|
TAXONOMY_LINE,
|
|
`title: ${s(title)}`,
|
|
`description: ${s(description)}`,
|
|
`url: ${s(url)}`,
|
|
`seller: ${s(seller)}`,
|
|
].join("\n")
|
|
|
|
const resp = await client.responses.create({
|
|
model: "gpt-5-nano",
|
|
input: [
|
|
{ role: "system", content: SYSTEM_PROMPT },
|
|
{ role: "user", content: userText },
|
|
],
|
|
text: {
|
|
format: {
|
|
type: "json_schema",
|
|
name: "deal_classification_v1",
|
|
strict: true,
|
|
schema: {
|
|
type: "object",
|
|
additionalProperties: false,
|
|
required: [
|
|
"best_category_id",
|
|
"needs_review",
|
|
"tags",
|
|
"has_issue",
|
|
"issue_type",
|
|
"issue_reason",
|
|
],
|
|
properties: {
|
|
best_category_id: { type: "integer", enum: CATEGORY_ENUM },
|
|
needs_review: { type: "boolean" },
|
|
tags: { type: "array", items: { type: "string" }, maxItems: 5 },
|
|
has_issue: { type: "boolean" },
|
|
issue_type: {
|
|
type: "string",
|
|
enum: ["NONE", "PROFANITY", "PHONE_NUMBER", "PERSONAL_DATA", "SPAM", "OTHER"],
|
|
},
|
|
issue_reason: { type: ["string", "null"] },
|
|
},
|
|
},
|
|
},
|
|
},
|
|
|
|
})
|
|
|
|
const parsed = parseOutputJson(resp)
|
|
|
|
return {
|
|
best_category_id: parsed.best_category_id ?? 0,
|
|
needs_review: Boolean(parsed.needs_review),
|
|
has_issue: Boolean(parsed.has_issue),
|
|
issue_type: parsed.issue_type ?? "NONE",
|
|
issue_reason: parsed.issue_reason ?? null,
|
|
tags: normalizeTags(parsed.tags),
|
|
}
|
|
}
|
|
|
|
module.exports = { classifyDeal }
|