HotTRDealsBackend/services/dealClassification.service.js
2026-01-25 17:50:56 +00:00

123 lines
6.7 KiB
JavaScript

// services/dealClassification.service.js
const OpenAI = require("openai")
const client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY })
const SYSTEM_PROMPT = `
Classify the deal into exactly ONE category_id and optionally suggest up to 5 tags.
Tags are NOT keyword repeats. Tags must represent INTENT/AUDIENCE/USE-CASE.
- Prefer audience or use-case tags such as: okul, ofis, is, gaming, kamp, mutfak, temizlik, araba, bahce, bebek, evcil-hayvan, fitness.
- Do NOT output literal product words.
- You MAY infer relevant intent/audience tags even if not explicitly written, as long as the inference is strong and widely accepted.
- Avoid weak guesses: if the intent/audience is not clear, set needs_review=true and tags can be [].
Forbidden:
- store/company/seller names
- promotion/marketing words
- generic category words
Max 5 tags total, lowercase.
Review / safety:
- Set needs_review=true if you are not confident about the chosen category OR if the deal text looks problematic.
- If unclear/unrelated, use best_category_id=0 and needs_review=true.
- Set has_issue=true if the text contains profanity, harassment, hate, explicit sexual content, doxxing/personal data, scams/phishing, or clear spam.
- If has_issue=true, briefly explain in issue_reason (short, generic, no quotes).
Output JSON only:
{
"best_category_id": number,
"needs_review": boolean,
"tags": string[],
"has_issue": boolean,
"issue_type": "NONE" | "PROFANITY" | "PHONE_NUMBER" | "PERSONAL_DATA" | "SPAM" | "OTHER",
"issue_reason": string | null
}
`
const TAXONOMY_LINE =
`TAXONOMY:0 undefined;1 electronics;2 beauty;3 food;4 auto;5 home-garden;6 computers;7 pc-components;8 pc-ram;9 pc-ssd;10 pc-cpu;11 pc-gpu;12 pc-peripherals;13 pc-keyboard;14 pc-mouse;15 pc-monitor;16 beauty-makeup;17 beauty-lipstick;18 beauty-foundation;19 beauty-mascara;20 beauty-skincare;21 beauty-moisturizer;22 food-snacks;23 food-cigkofte;24 food-beverages;25 food-coffee;26 auto-oils;27 auto-engine-oil;28 auto-parts;29 auto-brake-pads;30 home-garden-garden;31 garden-irrigation;32 phone;33 phone-smartphone;34 phone-case;35 phone-screen-protector;36 phone-charging;37 phone-powerbank;38 wearables;39 wearables-smartwatch;40 wearables-band;41 audio;42 audio-headphones;43 audio-tws;44 audio-bt-speaker;45 audio-soundbar;46 audio-microphone;47 audio-turntable;48 tv-video;49 tv;50 projector;51 tv-media-player;52 tv-accessories;53 tv-receiver;54 console;55 console-playstation;56 console-xbox;57 console-nintendo;58 console-games;59 console-accessories;60 camera;61 camera-photo;62 camera-action;63 camera-lens;64 camera-tripod;65 smart-home;66 smart-security-camera;67 smart-plug;68 smart-bulb;69 smart-sensor;70 pc-networking;71 pc-router;72 pc-modem;73 pc-switch;74 pc-wifi-extender;75 pc-printing;76 pc-printer;77 pc-ink-toner;78 pc-scanner;79 pc-laptop;80 pc-desktop;81 pc-tablet;82 pc-storage;83 pc-external-drive;84 pc-usb-drive;85 pc-nas;86 pc-webcam;87 pc-speaker;88 pc-mic;89 pc-mousepad;90 pc-dock-hub;91 pc-laptop-bag;92 pc-controller;93 pc-motherboard;94 pc-psu;95 pc-case;96 pc-cooling;97 pc-fan;98 pc-liquid-cooling;99 beauty-fragrance;100 beauty-fragrance-women;101 beauty-fragrance-men;102 beauty-haircare;103 beauty-shampoo;104 beauty-conditioner;105 beauty-hair-styling;106 beauty-personal-care;107 beauty-deodorant;108 beauty-shaving;109 beauty-hair-removal;110 beauty-skincare-serum;111 beauty-sunscreen;112 beauty-cleanser;113 beauty-mask;114 beauty-toner;115 food-staples;116 food-pasta;117 food-legumes;118 food-oil-vinegar;119 food-breakfast;120 food-cheese;121 food-olive;122 food-jam-honey;123 food-soda;124 food-water;125 food-energy;126 food-tea;127 food-frozen;128 food-meat;129 food-dessert;130 auto-accessories;131 auto-in-car-electronics;132 auto-care;133 auto-cleaning;134 auto-tires;135 auto-battery;136 auto-lighting;137 auto-audio;138 home-furniture;139 home-dining-table;140 home-chair;141 home-sofa;142 home-bed;143 home-textile;144 home-bedding;145 home-blanket;146 home-curtain;147 home-kitchen;148 home-cookware;149 home-small-appliances;150 home-coffee-machine;151 home-blender;152 home-airfryer;153 home-vacuum;154 home-lighting;155 home-decor;156 home-rug;157 home-wall-decor;158 home-cleaning;159 home-detergent;160 home-paper-products;161 home-tools;162 home-drill;163 home-saw;164 home-hardware;165 pet;166 pet-cat-food;167 pet-dog-food;168 pet-cat-litter;169 office;170 office-paper-notebook;171 office-a4-paper;172 office-pen;173 office-school-bag;174 baby;175 baby-diaper;176 baby-wipes;177 baby-food;178 baby-toys;179 sports;180 sports-camping;181 sports-fitness;182 sports-bicycle;183 fashion;184 fashion-shoes;185 fashion-men;186 fashion-women;187 fashion-bags;188 books-media;189 books;190 digital-games`
const CATEGORY_ENUM = [...Array(191).keys()] // 0..31
function s(x) {
return x == null ? "" : String(x)
}
function normalizeTags(tags) {
const arr = Array.isArray(tags) ? tags : []
const cleaned = arr
.map((t) => String(t).trim().toLowerCase())
.filter(Boolean)
.slice(0, 5)
return [...new Set(cleaned)]
}
function parseOutputJson(resp) {
const text = resp.output_text ?? resp.output?.[0]?.content?.[0]?.text
if (!text) throw new Error("OpenAI response text missing")
return JSON.parse(text)
}
async function classifyDeal({ title, description, url, seller }) {
const userText = [
TAXONOMY_LINE,
`title: ${s(title)}`,
`description: ${s(description)}`,
`url: ${s(url)}`,
`seller: ${s(seller)}`,
].join("\n")
const resp = await client.responses.create({
model: "gpt-5-nano",
input: [
{ role: "system", content: SYSTEM_PROMPT },
{ role: "user", content: userText },
],
text: {
format: {
type: "json_schema",
name: "deal_classification_v1",
strict: true,
schema: {
type: "object",
additionalProperties: false,
required: [
"best_category_id",
"needs_review",
"tags",
"has_issue",
"issue_type",
"issue_reason",
],
properties: {
best_category_id: { type: "integer", enum: CATEGORY_ENUM },
needs_review: { type: "boolean" },
tags: { type: "array", items: { type: "string" }, maxItems: 5 },
has_issue: { type: "boolean" },
issue_type: {
type: "string",
enum: ["NONE", "PROFANITY", "PHONE_NUMBER", "PERSONAL_DATA", "SPAM", "OTHER"],
},
issue_reason: { type: ["string", "null"] },
},
},
},
},
})
const parsed = parseOutputJson(resp)
return {
best_category_id: parsed.best_category_id ?? 0,
needs_review: Boolean(parsed.needs_review),
has_issue: Boolean(parsed.has_issue),
issue_type: parsed.issue_type ?? "NONE",
issue_reason: parsed.issue_reason ?? null,
tags: normalizeTags(parsed.tags),
}
}
module.exports = { classifyDeal }