Skip to main content

CTR Model Train & Predict

import "github.com/auxten/go-ctr/recommend"

Constants

const (
SampleAssembler = 16
StageKey = "stage"
ItemEmbDim = 16
ItemEmbWindow = 5
UserBehaviorLen = 10
)

Variables

var (

//TODO: maybe a switch to control whether to reuse training cache when predict
UserFeatureCache *ccache.Cache
ItemFeatureCache *ccache.Cache
UserBehaviorCache *ccache.Cache

// DefaultUserFeature and DefaultItemFeature are backup if not nil
//when user or item missing in database, use this to fill
DefaultUserFeature []float32
DefaultItemFeature []float32

DebugUserId int
DebugItemId int
)

func BatchPredict

func BatchPredict(ctx context.Context, recSys Predictor, sampleKeys []Sample) (y tensor.Tensor, err error)

func GetItemEmbeddingModelFromUb

func GetItemEmbeddingModelFromUb(ctx context.Context, iSeq ItemEmbedding) (mod model.Model, err error)

func GetSampleVector

func GetSampleVector(ctx context.Context, userFeatureCache *ccache.Cache, itemFeatureCache *ccache.Cache, featureProvider BasicFeatureProvider, sampleKey *Sample) (vec []float32, userFeatureWidth int, itemFeatureWidth int, err error)

func StartHttpApi

func StartHttpApi(predict Predictor, path string, addr string, efs *embed.FS) (err error)

StartHttpApi starts the http api for recommendation Query by:

curl --header "Content-Type: application/json" \
--request POST \
--data '{"userId":107,"itemIdList":[1,2,39]}' \
http://localhost:8080/api/v1/recommend

type BasicFeatureProvider

type BasicFeatureProvider interface {
UserFeaturer
ItemFeaturer
}

type DashboardOverviewResult

type DashboardOverviewResult struct {
Users int `json:"users"`
Items int `json:"items"`
TotalPositive int `json:"total_positive"`
ValidPositive int `json:"valid_positive"`
ValidNegative int `json:"valid_negative"`
}

type FeatureOverview

type FeatureOverview interface {
// GetUsersFeatureOverview returns offset and size used for paging query
GetUsersFeatureOverview(ctx context.Context, offset, size int, opts map[string][]string) (UserItemOverviewResult, error)

// GetItemsFeatureOverview returns offset and size used for paging query
GetItemsFeatureOverview(ctx context.Context, offset, size int, opts map[string][]string) (ItemOverviewResult, error)

// GetDashboardOverview returns dashboard overview, see DashboardOverviewResult
GetDashboardOverview(ctx context.Context) (DashboardOverviewResult, error)
}

type Fitter

type Fitter interface {
Fit(sample *TrainSample) (PredictAbstract, error)
}

type ItemEmbedding

ItemEmbedding is an interface used to generate item embedding with item2vec model by just providing a behavior based item sequence. Example: user liked items sequence, user bought items sequence, user viewed items sequence

type ItemEmbedding interface {
ItemSeqGenerator(context.Context) (<-chan string, error)
}

type ItemFeaturer

type ItemFeaturer interface {
GetItemFeature(context.Context, int) (Tensor, error)
}

type ItemOverView

type ItemOverView struct {
ItemId int `json:"item_id"`
ItemFeatures map[string]interface{}
}

type ItemOverviewResult

type ItemOverviewResult struct {
Items []ItemOverView `json:"items"`
}

type ItemScore

type ItemScore struct {
ItemId int `json:"itemId"`
Score float32 `json:"score"`
}

func Rank

func Rank(ctx context.Context, recSys Predictor, userId int, itemIds []int) (itemScores []ItemScore, err error)

type PreRanker

type PreRanker interface {
PreRank(context.Context) error
}

type PreTrainer

type PreTrainer interface {
PreTrain(context.Context) error
}

type PredictAbstract

type PredictAbstract interface {
Predict(X tensor.Tensor) tensor.Tensor
}

type Predictor

type Predictor interface {
BasicFeatureProvider
PredictAbstract
}

func Train

func Train(ctx context.Context, recSys RecSys, mlp Fitter) (model Predictor, err error)

type RecApiRequest

type RecApiRequest struct {
UserId int `json:"userId"`
ItemIdList []int `json:"itemIdList"`
}

type RecApiResponse

type RecApiResponse struct {
ItemScoreList []ItemScore `json:"itemScoreList"`
}

type RecSys

type RecSys interface {
BasicFeatureProvider
Trainer
}

type Sample

type Sample struct {
UserId int `json:"userId"`
ItemId int `json:"itemId"`
Label float32 `json:"label"`
Timestamp int64 `json:"timestamp"`
}

type SampleInfo

type SampleInfo struct {
UserProfileRange [2]int // [start, end)
UserBehaviorRange [2]int // [start, end)
ItemFeatureRange [2]int // [start, end)
CtxFeatureRange [2]int // [start, end)
}

type Stage

type Stage int
const (
TrainStage Stage = iota
PredictStage
)

type Tensor

type Tensor []float32

type TrainSample

type TrainSample struct {
X []float32
Y []float32
Rows int
XCols int

Info SampleInfo
}

func GetSample

func GetSample(recSys RecSys, ctx context.Context) (sample *TrainSample, err error)

type Trainer

type Trainer interface {
SampleGenerator(context.Context) (<-chan Sample, error)
}

type UserBehavior

UserBehavior interface is used to get user behavior feature. typically, it is user's clicked/bought/liked item id list ordered by time desc. During training, you should limit the seq to avoid time travel,

maxPk or maxTs could be used here:
- maxPk is the max primary key of user behavior table.
- maxTs is the max timestamp of user behavior table.
- maxLen is the max length of user behavior seq, if total len is
greater than maxLen, the seq will be truncated from the tail.
which is latest maxLen items.

specially, -1 means no limit. During prediction, you should use the latest user behavior seq.

type UserBehavior interface {
GetUserBehavior(ctx context.Context, userId int,
maxLen int64, maxPk int64, maxTs int64) (itemSeq []int, err error)
}

type UserFeaturer

type UserFeaturer interface {
GetUserFeature(context.Context, int) (Tensor, error)
}

type UserItemOverview

type UserItemOverview struct {
UserId int `json:"user_id"`
UserFeatures map[string]interface{}
}

type UserItemOverviewResult

type UserItemOverviewResult struct {
Users []UserItemOverview `json:"users"`
}