% residual_information(VEs,NoExamples,I) takes as input
% a list VEs of pairs (Vi,Ei), where Vi is a value on the
% attribute and Ei are examples with value Vi on the attribute,
% and the total number of examples in VEs (NoExamples),
% and returns the residual information I.

residual_information(VEs,NoExamples,I):-
    residual_information(VEs,NoExamples,0,I).

residual_information([],_,I,I).
residual_information([(_,[])|VEs],TotalNoExamples,I1,I2):-
    !, residual_information(VEs,TotalNoExamples,I1,I2).
residual_information([(_,Es)|VEs],TotalNoExamples,I1,I2):-
    length(Es,NoExamples),
    classes(Es,Classes),
    probability_list(Classes,NoExamples,ProbList),
    information(ProbList,I3),
    I4 is I1+NoExamples/TotalNoExamples*I3,
    residual_information(VEs,TotalNoExamples,I4,I2).

probability_list([],_,[]):-!.
probability_list([(_,N)|Classes],NoExamples,[Prob|ProbList]):-
    Prob is N/NoExamples,
    probability_list(Classes,NoExamples,ProbList).

information(ProbList,Information):-
    information(ProbList,0,Information).

information([],I,I).
information([Prob|ProbList],I1,I2):-
    information2(Prob,I3),
    I4 is I1+I3,
    information(ProbList,I4,I2).

information2(0,0):-!.
information2(P,I):-
	I is -P*log(2,P). % sicstus log(2,P).
