English > Development

Script for egafd.com

<< < (11/27) > >>

Ivek23:
You can wait two or three days because I was half collapsed computer and I can not get to certain files. Thank you for understanding.

pra15:
No problem, good courage!

Ivek23:
I'm back.


--- Quote ---Certainly i made mistakes in modifying my code!
could you post your total code of Parse people?
--- End quote ---
Here is a complete Procedure Parse people code

--- Code: ---//--------------------Procedure parse people-----------------
procedure ParsePeople(URL : String; HTML : String);

var curpos, endpos, PosStart, PosEnd, debug_Pos1, difpos : Integer;
    actPosstart, actposstart2, actPosStart3, actPosStart4, actposend, UrlposStart, UrlposEnd : Integer;
    Pseudo, Born, Lien, URL1, Name, OrigT, Title, Year, Av, Role, Notes, Note, tmpYear : String;


begin

//URL
Addfieldvalue(pfURL,URL);


//BORN:
curpos := Pos('<th>Notes</th>', HTML);
endpos := curpos;

While (curpos > 0) AND (curpos < Posfrom('</tr>', HTML, EndPos)) do begin
endpos := curpos;
PosStart := PosFrom('<td><ul class="list"><li>', HTML, endpos);
PosEnd := PosFrom('</li></ul></td>', HTML, PosStart);
Born := Trim(Copy(HTML, (PosStart + 25), (PosEnd - PosStart - 25)));
//LogMessage('BORN :' + Born);
curpos := posfrom('<td><ul class="list"><li>', HTML, posend);
if Born <> '' then
//AddFieldValue(pfBirthplace, Born);
LogMessage('BORN :' + Born);
end;


//AKA:
curpos := Pos('<th>Pseudonyms</th>', HTML);
endpos := curpos;

While (curpos > 0) AND (curpos < Posfrom('<th>Films</th>', HTML, EndPos)) do begin
endpos := curpos;
PosStart := PosFrom('class="acta">', HTML, endpos);
PosEnd := PosFrom('</span>', HTML, PosStart);
Pseudo := Trim(Copy(HTML, (PosStart + 13), (PosEnd - PosStart - 13)));
LogMessage('AKA :' + Pseudo);
curpos := posfrom('class="acta">', HTML, posend);
if Pseudo <> '' then
AddFieldValue(pfAltnames, Pseudo);
end;


//BIO:
curpos := Pos('<th>Films</th>', HTML);
    LogMessage('Films readout');
    if curPos > 0 then    begin

Lien := '';

    EndPos := curPos;
       while (curPos > 0) AND (curPos < PosFrom('</ul>', HTML, EndPos)) do begin
     
         EndPos := curPos; // Set last position to actual position
         // get url
         UrlPosStart := PosFrom('<a href="', HTML, EndPos);  // search for url start
         UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart);  // search for url end     
         URL1 := BASE_URL + Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) );
         LogMessage(URL1);

   
(*       // get url (for example)
UrlPosStart := PosFrom('<a href="', HTML, EndPos);  // search for url start
         UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart);  // search for url end
         URL1 := BASE_URL + Trim(Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) ));
         LogMessage(URL1);
*)


         // Get Name
         actPosStart := PosFrom('<a href="', HTML, EndPos);   // search for url start;
         actPosStart2 := PosFrom('">', HTML, actPosStart)
         actPosEnd:=PosFrom('</a>', HTML, actPosStart2);    // search for url end
         Name := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
         LogMessage(Name);
     
         debug_pos1:=Pos('(',Name);
            if debug_pos1 >0 then
            Name := Copy(Name,0,debug_pos1-1);
            LogMessage(Name);


//If Original:
        actposstart := actposEnd + 5;
        actposstart := PosFrom('">', HTML, actposstart) + 2;
        actPosEnd := PosFrom('</', HTML, actPosstart) - 1;
        If copy(HTML, actposstart, 3) = 'alt' then
        OrigT := Copy(HTML, (actPosstart + 22),(actPosEnd-actPosStart-21))
        else
        OrigT := Title;


(* // Get Title  (for movies)
         actPosStart := PosFrom('<a href="', HTML, EndPos);   // search for url start;
         actPosStart2 := PosFrom('">', HTML, actPosStart)
         actPosEnd:=PosFrom('</a>', HTML, actPosStart2);    // search for url end
         Title := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
         LogMessage(Title);
     
         debug_pos1:=Pos('(',Title);
            if debug_pos1 >0 then
            Title := Copy(Title,0,debug_pos1-1);
            LogMessage(Title);
*)


         //Notes :
     actPosStart := PosFrom('<a href="', HTML, EndPos);
     actPosStart2 := PosFrom('</a>', HTML, actPosStart);
     Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
            logmessage('AV : ' + Av);
            If Av = '<' then begin
            actposStart2 := (actposstart2 + 5);           //Step to go after "</a>"
            actposstart3 := PosFrom('>', HTML, actposStart2);
            actposend := PosFrom('<', HTML, actposstart3);
            Notes := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
            logmessage('Notes :' + Notes);
            end;   

            debug_pos1:=Pos('(',Notes);
            if debug_pos1 >0 then
            Year:= Copy(Notes,0,debug_pos1-1);
            LogMessage(Notes);

           
            ///Get Year & Note :
        tmpYear := Copy(Notes, 0, 2);
        logMessage('tmpYear :' + tmpYear);

        Case tmpYear of
            'c.' : Begin
               Year := Copy(Notes,4,4);
               Note := '';
   end;
   
'19', '20' : Begin
               If Copy(Notes,0,5) = 's' {OR Copy(Notes,0,5) = '?'} then begin    
                                Year := Copy(Notes,0,5);
                Note := Copy(Notes,7, Length(Notes)-6);
                end
                       else begin
                Year := Copy(Notes,0,4);
                Note := Copy(Notes,6, Length(Notes)-5);
               end;
                           If Copy(Notes,0,5) = '?' then begin
                Year := Copy(Notes,0,5);
                Note := Copy(Notes,7, Length(Notes)-6);
                end
               else begin
                Year := Copy(Notes,0,4);
                Note := Copy(Notes,6, Length(Notes)-5);
               end;
               end;
   
    else begin
             Year := '';
             Note := Notes;
            end;
            end;

logmessage('Year :' + Year);
        logmessage('Note :' + Note);


(*       // Get Year
     actPosStart := PosFrom('<a href="', HTML, EndPos);
     actPosStart2 := PosFrom('</a>', HTML, actPosStart);
     Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
     logmessage('AV : ' + Av);
     If Av = '<' then begin
     actposStart2 := (actposstart2 + 5);           //Step to go after "</a>"
     actposstart3 := PosFrom('>', HTML, actposStart2);
     actposend := PosFrom('<', HTML, actposstart3);
     Year := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
     logmessage(Year);
     end;   

         debug_pos1:=Pos('(',Year);
         if debug_pos1 >0 then
         Year:= Copy(Year,0,debug_pos1-1);
         LogMessage(Year);
*)


(* // Get Role (Now defined)
         actPosStart4 := PosFrom('<i>', HTML, (actposend-1)) + 4;
         actPosEnd:=PosFrom('</i>', HTML, actPosStart4) - 1;
         Role := Trim(Copy(HTML, actposStart4, (actPosEnd - actPosStart4)));
         LogMessage('Role: ' + Role);
     
         debug_pos1:=Pos('(',Role);
         if debug_pos1 >0 then
         Role:= Copy(Role,0,debug_pos1-1);
         LogMessage(Role);
*)


        // Get Role (Now defined)
     difpos := (PosFrom('<i>', HTML, (actposEnd-1))+4) - actposend;
     logmessage('DIFFERENCE : ' + intToStr(difpos));

     If difpos > 0 then begin
    If difpos < 200 then begin

            actPosStart := PosFrom('<i>', HTML, (actposend-1)) + 4;
            {actPosStart := PosFrom('> <i>', HTML, actPosStart) + 6;}
            actPosEnd:=PosFrom('</i></li>', HTML, actPosStart) - 1;
            Role := Trim(Copy(HTML, actposStart, (actPosEnd - actPosStart)));
            LogMessage('Role: ' + Role);
     
            debug_pos1:=Pos('(',Role);
            if debug_pos1 >0 then
            Role:= Copy(Role,0,debug_pos1-1);
            LogMessage(Role);

        end;
     end;


//AddPersonMovie(Trim(Title), '', Role, Year, LowerCase(URL1), ctActors);
AddPersonMovie(Trim(OrigT), '', Role, Year, LowerCase(URL1), ctActors);


(*    // Total Line
if Lien <> '' then
         Lien := Lien + #13;
if URL1 <> '' then
         Lien := Lien + '<link url="' + URL1 + '">';
            Lien := Lien + Name + '</link>';
if Year <> '' then
         Lien := Lien + ' • ' + Year;
If Note <> '' then
         Lien := Lien + ' • ' + Note;
if Role <> '' then
         Lien := Lien + ' • ' + Role;
*)
 
           
            // Total Line
        If Lien <> '' then
        Lien := Lien + #13;
        If URL1 <> '' then begin
        If OrigT <> Title then
        Lien := Lien + Name
        else
        Lien := Lien + '<link url="' + URL1 + '">' + Name + '</link>';
        end;
        If Year <> '' then
        Lien := Lien + ' • ' + Year;
        If Note <> '' then
        Lien := Lien + ' • ' + Note;
        If Role <> '' then
        Lien := Lien + ' • ' + Role;

        LogMessage('LIEN :' + Lien);
               

         curPos := PosFrom('<a href="', HTML, actPosEnd);
       end;
     
   
        if (Lien <> '') AND (Born = '') then
AddFieldValue(pfBio, Lien);
if (Lien  <> '') AND (Born <> '') then
AddFieldValue(pfBio, Born + #13+#13 + Lien);
end;


//Foto
curPos :=Pos('src="/actresses/id/',HTML);
if curPos > 0 then begin
EndPos := PosFrom('" width', HTML, curPos);
PhotoURL := BASE_URL + Copy(HTML, curPos + 5, EndPos - curPos - 5);
LogMessage('URL de la photo: '+ PhotoURL);
{PhotoURL := HTMLToText (PhotoURL);}
AddImageURL(4, PhotoURL);
end
else begin
PhotoURL := '';
end;

end;

--- End code ---


--- Quote from: pra15 on December 06, 2011, 04:42:12 pm ---Sorry, i don't understand your modification!
--- End quote ---

--- Quote from: Ivek23 on December 06, 2011, 11:47:31 am ---I modified the part of the Bio code


--- End quote ---

Here is a complete Modified Bio code


--- Code: --- //BIO:
curpos := Pos('<th>Films</th>', HTML);
    LogMessage('Films readout');
    if curPos > 0 then    begin

Lien := '';

    EndPos := curPos;
       while (curPos > 0) AND (curPos < PosFrom('</ul>', HTML, EndPos)) do begin
     
         EndPos := curPos; // Set last position to actual position
         // get url
         UrlPosStart := PosFrom('<a href="', HTML, EndPos);  // search for url start
         UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart);  // search for url end     
         URL1 := BASE_URL + Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) );
         LogMessage(URL1);

   
(*       // get url (for example)
UrlPosStart := PosFrom('<a href="', HTML, EndPos);  // search for url start
         UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart);  // search for url end
         URL1 := BASE_URL + Trim(Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) ));
         LogMessage(URL1);
*)


         // Get Name
         actPosStart := PosFrom('<a href="', HTML, EndPos);   // search for url start;
         actPosStart2 := PosFrom('">', HTML, actPosStart)
         actPosEnd:=PosFrom('</a>', HTML, actPosStart2);    // search for url end
         Name := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
         LogMessage(Name);
     
         debug_pos1:=Pos('(',Name);
            if debug_pos1 >0 then
            Name := Copy(Name,0,debug_pos1-1);
            LogMessage(Name);


//If Title:
        actposstart := actposEnd + 5;
        actposstart := PosFrom('">', HTML, actposstart) + 2;
        actPosEnd := PosFrom('</', HTML, actPosstart) - 1;
        If copy(HTML, actposstart, 11) = 'alternative' then
        Title := Copy(HTML, (actPosstart + 22),(actPosEnd-actPosStart-21))
        else
        Title := OrigT;


//If Original:
        actposstart := actposEnd + 5;
        actposstart := PosFrom('">', HTML, actposstart) + 2;
        actPosEnd := PosFrom('</', HTML, actPosstart) - 1;
        If copy(HTML, actposstart, 3) = 'alt' then
        OrigT := Copy(HTML, (actPosstart + 22),(actPosEnd-actPosStart-21))
        else
        OrigT := Title;


(* // Get Title  (for movies)
         actPosStart := PosFrom('<a href="', HTML, EndPos);   // search for url start;
         actPosStart2 := PosFrom('">', HTML, actPosStart)
         actPosEnd:=PosFrom('</a>', HTML, actPosStart2);    // search for url end
         Title := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
         LogMessage(Title);
     
         debug_pos1:=Pos('(',Title);
            if debug_pos1 >0 then
            Title := Copy(Title,0,debug_pos1-1);
            LogMessage(Title);
*)


         //Notes :
     actPosStart := PosFrom('<a href="', HTML, EndPos);
     actPosStart2 := PosFrom('</a>', HTML, actPosStart);
     Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
            logmessage('AV : ' + Av);
            If Av = '<' then begin
            actposStart2 := (actposstart2 + 5);           //Step to go after "</a>"
            actposstart3 := PosFrom('>', HTML, actposStart2);
            actposend := PosFrom('<', HTML, actposstart3);
            Notes := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
            logmessage('Notes :' + Notes);
            end;   

            debug_pos1:=Pos('(',Notes);
            if debug_pos1 >0 then
            Year:= Copy(Notes,0,debug_pos1-1);
            LogMessage(Notes);

           
            ///Get Year & Note :
        tmpYear := Copy(Notes, 0, 2);
        logMessage('tmpYear :' + tmpYear);

        Case tmpYear of
            'c.' : Begin
               Year := Copy(Notes,4,4);
               Note := '';
   end;
   
'19', '20' : Begin
               If Copy(Notes,0,5) = 's' {OR Copy(Notes,0,5) = '?'} then begin    
                                Year := Copy(Notes,0,5);
                Note := Copy(Notes,7, Length(Notes)-6);
                end
                       else begin
                Year := Copy(Notes,0,4);
                Note := Copy(Notes,6, Length(Notes)-5);
               end;
                           If Copy(Notes,0,5) = '?' then begin
                Year := Copy(Notes,0,5);
                Note := Copy(Notes,7, Length(Notes)-6);
                end
               else begin
                Year := Copy(Notes,0,4);
                Note := Copy(Notes,6, Length(Notes)-5);
               end;
               end;
   
    else begin
             Year := '';
             Note := Notes;
            end;
            end;

logmessage('Year :' + Year);
        logmessage('Note :' + Note);


(*       // Get Year
     actPosStart := PosFrom('<a href="', HTML, EndPos);
     actPosStart2 := PosFrom('</a>', HTML, actPosStart);
     Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
     logmessage('AV : ' + Av);
     If Av = '<' then begin
     actposStart2 := (actposstart2 + 5);           //Step to go after "</a>"
     actposstart3 := PosFrom('>', HTML, actposStart2);
     actposend := PosFrom('<', HTML, actposstart3);
     Year := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
     logmessage(Year);
     end;   

         debug_pos1:=Pos('(',Year);
         if debug_pos1 >0 then
         Year:= Copy(Year,0,debug_pos1-1);
         LogMessage(Year);
*)


(* // Get Role (Now defined)
         actPosStart4 := PosFrom('<i>', HTML, (actposend-1)) + 4;
         actPosEnd:=PosFrom('</i>', HTML, actPosStart4) - 1;
         Role := Trim(Copy(HTML, actposStart4, (actPosEnd - actPosStart4)));
         LogMessage('Role: ' + Role);
     
         debug_pos1:=Pos('(',Role);
         if debug_pos1 >0 then
         Role:= Copy(Role,0,debug_pos1-1);
         LogMessage(Role);
*)


        // Get Role (Now defined)
     difpos := (PosFrom('<i>', HTML, (actposEnd-1))+4) - actposend;
     logmessage('DIFFERENCE : ' + intToStr(difpos));

     If difpos > 0 then begin
    If difpos < 200 then begin

            actPosStart := PosFrom('<i>', HTML, (actposend-1)) + 4;
            {actPosStart := PosFrom('> <i>', HTML, actPosStart) + 6;}
            actPosEnd:=PosFrom('</i></li>', HTML, actPosStart) - 1;
            Role := Trim(Copy(HTML, actposStart, (actPosEnd - actPosStart)));
            LogMessage('Role: ' + Role);
     
            debug_pos1:=Pos('(',Role);
            if debug_pos1 >0 then
            Role:= Copy(Role,0,debug_pos1-1);
            LogMessage(Role);

        end;
     end;


//AddPersonMovie(Trim(Title), '', Role, Year, LowerCase(URL1), ctActors);
AddPersonMovie(Trim(OrigT), '', Role, Year, LowerCase(URL1), ctActors);


(*    // Total Line
if Lien <> '' then
         Lien := Lien + #13;
if URL1 <> '' then
         Lien := Lien + '<link url="' + URL1 + '">';
            Lien := Lien + Name + '</link>';
if Year <> '' then
         Lien := Lien + ' • ' + Year;
If Note <> '' then
         Lien := Lien + ' • ' + Note;
if Role <> '' then
         Lien := Lien + ' • ' + Role;
*)
 
           
            // Total Line
        If Lien <> '' then
        Lien := Lien + #13;
        If URL1 <> '' then begin
    If Title <> OrigT then
        Lien := Lien + Name
else
If OrigT <> Title then
        Lien := Lien + Name
        else
        Lien := Lien + '<link url="' + URL1 + '">' + Name + '</link>';
        end;
        If Year <> '' then
        Lien := Lien + ' • ' + Year;
        If Note <> '' then
        Lien := Lien + ' • ' + Note;
        If Role <> '' then
        Lien := Lien + ' • ' + Role;

        LogMessage('LIEN :' + Lien);
               

         curPos := PosFrom('<a href="', HTML, actPosEnd);
       end;
     
   
        if (Lien <> '') AND (Born = '') then
AddFieldValue(pfBio, Lien);
if (Lien  <> '') AND (Born <> '') then
AddFieldValue(pfBio, Born + #13 + #13 + Lien);
end;

--- End code ---


--- Quote from: Ivek23 on December 06, 2011, 11:47:31 am ---and now, in addition to  Original Title also that Title, which are not  Alternative Title.
Not the best.
In Bio movie list is not as transparent as yours, I like it, Of course, if that is what change is also OK.

--- End quote ---

Is it still your version of this code is better, perhaps it could be something to fix.

pra15:
Thanks, but i always don't understand!

The result is the same as the version before my modification.

//If title and //If Original have the same code (the same effect)!

idem in part of Lien.


I don't find page of an actresse with born info in egafd, if you have links!

Ivek23:

--- Quote from: pra15 on December 08, 2011, 01:52:20 pm ---Thanks, but i always don't understand!

The result is the same as the version before my modification.

//If title and //If Original have the same code (the same effect)!

idem in part of Lien.

--- End quote ---
OK. This is not deals more and we leave this so as your modification, than if when offered what better solution for this change and prefer let's try to find a solution for this,
--- Code: --- //BORN:
curpos := Pos('<th>Notes</th>', HTML);
endpos := curpos;

While (curpos > 0) AND (curpos < Posfrom('</tr>', HTML, EndPos)) do begin
endpos := curpos;
PosStart := PosFrom('<td><ul class="list"><li>', HTML, endpos);
PosEnd := PosFrom('</li></ul></td>', HTML, PosStart);
Born := Trim(Copy(HTML, (PosStart + 25), (PosEnd - PosStart - 25)));
//LogMessage('BORN :' + Born);
curpos := posfrom('<td><ul class="list"><li>', HTML, posend);
if Born <> '' then
AddFieldValue(pfBirthplace, Born);
LogMessage('BORN :' + Born);
end;
--- End code ---

--- Quote from: Ivek23 on December 06, 2011, 11:47:31 am ---Could be done in BORN that all was as up to now,  Birthplace field arranged so that there are visible such data like these for example:
Czech, b. 1985
Hungarian. b. 1978

--- End quote ---
if possible.

Navigation

[0] Message Index

[#] Next page

[*] Previous page

Go to full version