%ProbS
clear all;
%% 数据读入与预处理data = load('E:\network_papers\u1.base');
test = load('E:\network_papers\u1.test');R = preprocess(data.train);
T = preprocess(test.test); [M,N] = size(R);[m,n] = size(T);w = resource_allocate(R,du,di);
for u = 1:M
index_i_n(u).id = find( R(u,:) == 0 );end%% 对每个用户u,对其所有uncollected items预测评分PR = zeros(M,N);
for u = 1:M index_y = find( R(u,:) ~= 0 ); vec = R(u,index_y); for k = 1:length(index_i_n(u).id) PR( u, index_i_n(u).id(k) ) = w( index_i_n(u).id(k), index_y ) * vec'; endendvalue = evaluate('precision',R,PR,T,index_i_n);
hit=hitrate(PR,T,20);save predi_matrix PR;------------------------------------------------------------------------------------------------
%Preprocess
function R = preprocess (A)
[m,n] = size(A);M = max( A(:,1) );N = max( A(:,2) );B(M,N) = 0;for i = 1:m B( A(i,1), A(i,2) ) = A(i,3);endB( B < 3 ) = 0;B( B >= 3 ) = 1;R = B;-------------------------------------------------------------------------------------------------------------
%evalate
% evaluate function for multiplied rate for recommendation system
% opt:选择的评价标准,PR:经过预评分的训练集,T:测试集,index_n:所有用户没有评价的物品的索引function value = evaluate(opt,R,PR,T,index_i_n)[m,n] = size(T);[M,N] = size(R);%% 选择评价方法switch (opt)%% 均方根差
case {'RMSE'} RMSE = zeros(1,m); for u = 1:m index_tmp = index_i_n(u).id; index_tmp( index_tmp > n ) = []; len = length(index_tmp); vec = PR(u,index_tmp) - T(u,index_tmp); RMSE(u) = sqrt( sum( vec .* vec ) / len ); if ~(mod(u,10)) fprintf('%d\n',u); end end value = sum(RMSE) / length(RMSE); fprintf('The RMSE is:\n%d',value);%% Pearson积矩相关系数,衡量预测评分和真实评分的线性相关程度 % pcc在-1到1之间,越靠近1或者-1,线性相关性越好,0表示没有相关性 case {'pcc'} pcc = zeros(1,m); for u = 1:m index_tmp = index_i_n(u).id; index_tmp( index_tmp > n ) = []; len = length(index_tmp); predict = PR(u,index_tmp); real = T(u,index_tmp); mean_predict = sum(predict) / len; mean_real = sum(real) / length(real); vec1 = predict - mean_predict; vec2 = real - mean_real; sum1 = vec1 * vec1'; sum2 = vec2 * vec2'; if ( sum1 ~= 0 ) && ( sum2 ~= 0 ) pcc(u) = vec1 * vec2' / sqrt( sum1 * sum2 ); end if ~(mod(u,10)) fprintf('%d\n',u); end end value = sum(pcc) / m; fprintf('The PCC is:\n%d',value);
%% 命中率hitting rate 只适用于二值标准,如“喜欢”、“不喜欢” case {'hitrate'} [SR,index_sr] = sort(PR,2,'descend'); rato(m,n) = 0; for u = 1:m sumu = sum(T(u,:)); rec = 1; while rec <= n tmp1 = index_sr(u,1:rec); tmp1( tmp1 > n ) = []; tmp2 = T(u,tmp1); if (sumu ~= 0) rato(u,rec) = sum(tmp2) / sumu; end rec = rec + 1; end if ~(mod(u,10)) fprintf('%d\n',u); end end value = sum(rato) / m; x = 1:length(value); plot(x,value,'--r'); hold on; xlabel('length of recommendation list'); ylabel('hitting rate'); %% 平均排序分 case {'rankscore'} [SR,index_sr] = sort(PR,2,'descend'); %rato = zeros( 1, m ); for u = 1:m len1 = length( index_i_n(u).id ); index_i_t = find( T(u,:) == 1 ); len2 = length( index_i_t ); index_tmp = zeros( 1, len2 ); if len2 ~= 0 for k = 1:len2 tmp = index_i_t(k); index_tmp(k) = find( index_sr(u,:) == tmp ); end rato(u) = sum( index_tmp / len1 ) / len2; end end value = sum(rato) / length(rato); fprintf('The average rank score is:\n%d\n',value); %% 准确度及准确度提高比例 case {'precision'} L = 10; [SR,index_sr] = sort(PR,2,'descend'); list = index_sr(:,1:L); p = zeros(1,m); for u = 1:m index_i_t = find( T(u,:) == 1 ); vec = intersect( index_i_t, list(u,:) ); p(u) = numel(vec) / L; end value = sum(p) / m; ep = value * M * N / sum( sum(T) ); fprintf('The precision is:\n%d\n',value); fprintf('The precision enhancement is:\n%d\n',ep); %% recall & recall enhancement case {'recall'} L = 20; [SR,index_sr] = sort(PR,2,'descend'); list = index_sr(:,1:L); for u = 1:m index_i_t = find( T(u,:) == 1 ); vec = ismember( index_i_t, list(u,:) ); if sum( T(u,:) ) ~= 0 recall(u) = sum(vec) / sum( T(u,:) ); end end value = sum(recall) / length(recall); er = value * M / L; fprintf('The recall is:\n%d\n',value); fprintf('The recall enhancement is:\n%d\n',er); %% personalization case {'personalization'} L = 20; [SR,index_sr] = sort(PR,2,'descend'); list = index_sr(:,1:L); flag = 1; h = zeros(m,m); for u = 1:m for k = flag:m tmp = intersect( list(u,:), list(k,:) ); h(u,k) = 1 - length( tmp ) / L; h(k,u) = h(u,k); end flag = flag + 1; end value = sum( sum(h) ) / ( m^2 - m ); fprintf('The personalization is:\n%d\n',value); case {'novelty'} degree_i = sum( R,1 ); L = 20; [SR,index_sr] = sort(PR,2,'descend'); list = index_sr(:,1:L); I = zeros(1,m); for u = 1:m vec1 = degree_i( 1, list(u,:) ); vec2 = M ./ vec1; mult = 1; for k = 1:length(vec2) mult = mult * vec2(k); end I(u) = log2(mult) / L; end value = sum(I) / m; fprintf('The novelty is:\n%d\n',value); end -------------------------------------------------------------------------------------------------
%CF
%% 数据预处理
clear all;
%data = load('E:\network_papers\datasets\Jester\jeste_train');%test = load('E:\network_papers\datasets\Jester\jester_test');data = load('E:\network_papers\u1.base');test = load('E:\network_papers\u1.test');R = preprocess(data);
T = preprocess(test);%{ R=data.train;R(R<3)=0;R(R>=3)=1;T=test.test;T(T<3)=0;T(T>=3)=1;du = sum(R,2);di = sum(R,1);ex=find(du==0);R(ex,:)=[];T(ex,:)=[];du(ex,:)=[];%}[M,N] = size(R);
[m,n] = size(T);for u = 1:M index_i_n(u).id = find( R(u,:) == 0 );end%% 计算出每个用户与其他用户之间的相似度sim = get_Sim_u(R);
%% 预测评分PR = zeros(M,N);
for u = 1:M index_n = find( R(u,:) == 0 ); for k = 1:length( index_n ) PR( u, index_n(k) ) = predict_Rate( u, index_n(k), sim, R ); endend value = evaluate('precision',R,PR,T,index_i_n); hit=hitrate(PR,T,20);