2021
Development / Re: Script for egafd.com
« on: December 08, 2011, 09:47:18 am »
I'm back.
Here is a complete Modified Bio code
Is it still your version of this code is better, perhaps it could be something to fix.
Quote
Certainly i made mistakes in modifying my code!Here is a complete Procedure Parse people code
could you post your total code of Parse people?
Code: [Select]
//--------------------Procedure parse people-----------------
procedure ParsePeople(URL : String; HTML : String);
var curpos, endpos, PosStart, PosEnd, debug_Pos1, difpos : Integer;
actPosstart, actposstart2, actPosStart3, actPosStart4, actposend, UrlposStart, UrlposEnd : Integer;
Pseudo, Born, Lien, URL1, Name, OrigT, Title, Year, Av, Role, Notes, Note, tmpYear : String;
begin
//URL
Addfieldvalue(pfURL,URL);
//BORN:
curpos := Pos('<th>Notes</th>', HTML);
endpos := curpos;
While (curpos > 0) AND (curpos < Posfrom('</tr>', HTML, EndPos)) do begin
endpos := curpos;
PosStart := PosFrom('<td><ul class="list"><li>', HTML, endpos);
PosEnd := PosFrom('</li></ul></td>', HTML, PosStart);
Born := Trim(Copy(HTML, (PosStart + 25), (PosEnd - PosStart - 25)));
//LogMessage('BORN :' + Born);
curpos := posfrom('<td><ul class="list"><li>', HTML, posend);
if Born <> '' then
//AddFieldValue(pfBirthplace, Born);
LogMessage('BORN :' + Born);
end;
//AKA:
curpos := Pos('<th>Pseudonyms</th>', HTML);
endpos := curpos;
While (curpos > 0) AND (curpos < Posfrom('<th>Films</th>', HTML, EndPos)) do begin
endpos := curpos;
PosStart := PosFrom('class="acta">', HTML, endpos);
PosEnd := PosFrom('</span>', HTML, PosStart);
Pseudo := Trim(Copy(HTML, (PosStart + 13), (PosEnd - PosStart - 13)));
LogMessage('AKA :' + Pseudo);
curpos := posfrom('class="acta">', HTML, posend);
if Pseudo <> '' then
AddFieldValue(pfAltnames, Pseudo);
end;
//BIO:
curpos := Pos('<th>Films</th>', HTML);
LogMessage('Films readout');
if curPos > 0 then begin
Lien := '';
EndPos := curPos;
while (curPos > 0) AND (curPos < PosFrom('</ul>', HTML, EndPos)) do begin
EndPos := curPos; // Set last position to actual position
// get url
UrlPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start
UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart); // search for url end
URL1 := BASE_URL + Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) );
LogMessage(URL1);
(* // get url (for example)
UrlPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start
UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart); // search for url end
URL1 := BASE_URL + Trim(Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) ));
LogMessage(URL1);
*)
// Get Name
actPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start;
actPosStart2 := PosFrom('">', HTML, actPosStart)
actPosEnd:=PosFrom('</a>', HTML, actPosStart2); // search for url end
Name := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
LogMessage(Name);
debug_pos1:=Pos('(',Name);
if debug_pos1 >0 then
Name := Copy(Name,0,debug_pos1-1);
LogMessage(Name);
//If Original:
actposstart := actposEnd + 5;
actposstart := PosFrom('">', HTML, actposstart) + 2;
actPosEnd := PosFrom('</', HTML, actPosstart) - 1;
If copy(HTML, actposstart, 3) = 'alt' then
OrigT := Copy(HTML, (actPosstart + 22),(actPosEnd-actPosStart-21))
else
OrigT := Title;
(* // Get Title (for movies)
actPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start;
actPosStart2 := PosFrom('">', HTML, actPosStart)
actPosEnd:=PosFrom('</a>', HTML, actPosStart2); // search for url end
Title := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
LogMessage(Title);
debug_pos1:=Pos('(',Title);
if debug_pos1 >0 then
Title := Copy(Title,0,debug_pos1-1);
LogMessage(Title);
*)
//Notes :
actPosStart := PosFrom('<a href="', HTML, EndPos);
actPosStart2 := PosFrom('</a>', HTML, actPosStart);
Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
logmessage('AV : ' + Av);
If Av = '<' then begin
actposStart2 := (actposstart2 + 5); //Step to go after "</a>"
actposstart3 := PosFrom('>', HTML, actposStart2);
actposend := PosFrom('<', HTML, actposstart3);
Notes := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
logmessage('Notes :' + Notes);
end;
debug_pos1:=Pos('(',Notes);
if debug_pos1 >0 then
Year:= Copy(Notes,0,debug_pos1-1);
LogMessage(Notes);
///Get Year & Note :
tmpYear := Copy(Notes, 0, 2);
logMessage('tmpYear :' + tmpYear);
Case tmpYear of
'c.' : Begin
Year := Copy(Notes,4,4);
Note := '';
end;
'19', '20' : Begin
If Copy(Notes,0,5) = 's' {OR Copy(Notes,0,5) = '?'} then begin
Year := Copy(Notes,0,5);
Note := Copy(Notes,7, Length(Notes)-6);
end
else begin
Year := Copy(Notes,0,4);
Note := Copy(Notes,6, Length(Notes)-5);
end;
If Copy(Notes,0,5) = '?' then begin
Year := Copy(Notes,0,5);
Note := Copy(Notes,7, Length(Notes)-6);
end
else begin
Year := Copy(Notes,0,4);
Note := Copy(Notes,6, Length(Notes)-5);
end;
end;
else begin
Year := '';
Note := Notes;
end;
end;
logmessage('Year :' + Year);
logmessage('Note :' + Note);
(* // Get Year
actPosStart := PosFrom('<a href="', HTML, EndPos);
actPosStart2 := PosFrom('</a>', HTML, actPosStart);
Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
logmessage('AV : ' + Av);
If Av = '<' then begin
actposStart2 := (actposstart2 + 5); //Step to go after "</a>"
actposstart3 := PosFrom('>', HTML, actposStart2);
actposend := PosFrom('<', HTML, actposstart3);
Year := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
logmessage(Year);
end;
debug_pos1:=Pos('(',Year);
if debug_pos1 >0 then
Year:= Copy(Year,0,debug_pos1-1);
LogMessage(Year);
*)
(* // Get Role (Now defined)
actPosStart4 := PosFrom('<i>', HTML, (actposend-1)) + 4;
actPosEnd:=PosFrom('</i>', HTML, actPosStart4) - 1;
Role := Trim(Copy(HTML, actposStart4, (actPosEnd - actPosStart4)));
LogMessage('Role: ' + Role);
debug_pos1:=Pos('(',Role);
if debug_pos1 >0 then
Role:= Copy(Role,0,debug_pos1-1);
LogMessage(Role);
*)
// Get Role (Now defined)
difpos := (PosFrom('<i>', HTML, (actposEnd-1))+4) - actposend;
logmessage('DIFFERENCE : ' + intToStr(difpos));
If difpos > 0 then begin
If difpos < 200 then begin
actPosStart := PosFrom('<i>', HTML, (actposend-1)) + 4;
{actPosStart := PosFrom('> <i>', HTML, actPosStart) + 6;}
actPosEnd:=PosFrom('</i></li>', HTML, actPosStart) - 1;
Role := Trim(Copy(HTML, actposStart, (actPosEnd - actPosStart)));
LogMessage('Role: ' + Role);
debug_pos1:=Pos('(',Role);
if debug_pos1 >0 then
Role:= Copy(Role,0,debug_pos1-1);
LogMessage(Role);
end;
end;
//AddPersonMovie(Trim(Title), '', Role, Year, LowerCase(URL1), ctActors);
AddPersonMovie(Trim(OrigT), '', Role, Year, LowerCase(URL1), ctActors);
(* // Total Line
if Lien <> '' then
Lien := Lien + #13;
if URL1 <> '' then
Lien := Lien + '<link url="' + URL1 + '">';
Lien := Lien + Name + '</link>';
if Year <> '' then
Lien := Lien + ' • ' + Year;
If Note <> '' then
Lien := Lien + ' • ' + Note;
if Role <> '' then
Lien := Lien + ' • ' + Role;
*)
// Total Line
If Lien <> '' then
Lien := Lien + #13;
If URL1 <> '' then begin
If OrigT <> Title then
Lien := Lien + Name
else
Lien := Lien + '<link url="' + URL1 + '">' + Name + '</link>';
end;
If Year <> '' then
Lien := Lien + ' • ' + Year;
If Note <> '' then
Lien := Lien + ' • ' + Note;
If Role <> '' then
Lien := Lien + ' • ' + Role;
LogMessage('LIEN :' + Lien);
curPos := PosFrom('<a href="', HTML, actPosEnd);
end;
if (Lien <> '') AND (Born = '') then
AddFieldValue(pfBio, Lien);
if (Lien <> '') AND (Born <> '') then
AddFieldValue(pfBio, Born + #13+#13 + Lien);
end;
//Foto
curPos :=Pos('src="/actresses/id/',HTML);
if curPos > 0 then begin
EndPos := PosFrom('" width', HTML, curPos);
PhotoURL := BASE_URL + Copy(HTML, curPos + 5, EndPos - curPos - 5);
LogMessage('URL de la photo: '+ PhotoURL);
{PhotoURL := HTMLToText (PhotoURL);}
AddImageURL(4, PhotoURL);
end
else begin
PhotoURL := '';
end;
end;
Sorry, i don't understand your modification!
I modified the part of the Bio code
Here is a complete Modified Bio code
Code: [Select]
//BIO:
curpos := Pos('<th>Films</th>', HTML);
LogMessage('Films readout');
if curPos > 0 then begin
Lien := '';
EndPos := curPos;
while (curPos > 0) AND (curPos < PosFrom('</ul>', HTML, EndPos)) do begin
EndPos := curPos; // Set last position to actual position
// get url
UrlPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start
UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart); // search for url end
URL1 := BASE_URL + Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) );
LogMessage(URL1);
(* // get url (for example)
UrlPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start
UrlPosEnd := PosFrom('" class="', HTML, UrlPosStart); // search for url end
URL1 := BASE_URL + Trim(Copy(HTML, UrlPosStart + 9, (UrlPosEnd - UrlPosStart - 9) ));
LogMessage(URL1);
*)
// Get Name
actPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start;
actPosStart2 := PosFrom('">', HTML, actPosStart)
actPosEnd:=PosFrom('</a>', HTML, actPosStart2); // search for url end
Name := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
LogMessage(Name);
debug_pos1:=Pos('(',Name);
if debug_pos1 >0 then
Name := Copy(Name,0,debug_pos1-1);
LogMessage(Name);
//If Title:
actposstart := actposEnd + 5;
actposstart := PosFrom('">', HTML, actposstart) + 2;
actPosEnd := PosFrom('</', HTML, actPosstart) - 1;
If copy(HTML, actposstart, 11) = 'alternative' then
Title := Copy(HTML, (actPosstart + 22),(actPosEnd-actPosStart-21))
else
Title := OrigT;
//If Original:
actposstart := actposEnd + 5;
actposstart := PosFrom('">', HTML, actposstart) + 2;
actPosEnd := PosFrom('</', HTML, actPosstart) - 1;
If copy(HTML, actposstart, 3) = 'alt' then
OrigT := Copy(HTML, (actPosstart + 22),(actPosEnd-actPosStart-21))
else
OrigT := Title;
(* // Get Title (for movies)
actPosStart := PosFrom('<a href="', HTML, EndPos); // search for url start;
actPosStart2 := PosFrom('">', HTML, actPosStart)
actPosEnd:=PosFrom('</a>', HTML, actPosStart2); // search for url end
Title := Trim(Copy(HTML, (actPosStart2 + 2), (actPosEnd - actPosStart2 - 2) ));
LogMessage(Title);
debug_pos1:=Pos('(',Title);
if debug_pos1 >0 then
Title := Copy(Title,0,debug_pos1-1);
LogMessage(Title);
*)
//Notes :
actPosStart := PosFrom('<a href="', HTML, EndPos);
actPosStart2 := PosFrom('</a>', HTML, actPosStart);
Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
logmessage('AV : ' + Av);
If Av = '<' then begin
actposStart2 := (actposstart2 + 5); //Step to go after "</a>"
actposstart3 := PosFrom('>', HTML, actposStart2);
actposend := PosFrom('<', HTML, actposstart3);
Notes := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
logmessage('Notes :' + Notes);
end;
debug_pos1:=Pos('(',Notes);
if debug_pos1 >0 then
Year:= Copy(Notes,0,debug_pos1-1);
LogMessage(Notes);
///Get Year & Note :
tmpYear := Copy(Notes, 0, 2);
logMessage('tmpYear :' + tmpYear);
Case tmpYear of
'c.' : Begin
Year := Copy(Notes,4,4);
Note := '';
end;
'19', '20' : Begin
If Copy(Notes,0,5) = 's' {OR Copy(Notes,0,5) = '?'} then begin
Year := Copy(Notes,0,5);
Note := Copy(Notes,7, Length(Notes)-6);
end
else begin
Year := Copy(Notes,0,4);
Note := Copy(Notes,6, Length(Notes)-5);
end;
If Copy(Notes,0,5) = '?' then begin
Year := Copy(Notes,0,5);
Note := Copy(Notes,7, Length(Notes)-6);
end
else begin
Year := Copy(Notes,0,4);
Note := Copy(Notes,6, Length(Notes)-5);
end;
end;
else begin
Year := '';
Note := Notes;
end;
end;
logmessage('Year :' + Year);
logmessage('Note :' + Note);
(* // Get Year
actPosStart := PosFrom('<a href="', HTML, EndPos);
actPosStart2 := PosFrom('</a>', HTML, actPosStart);
Av := Trim(Copy(HTML, (actposstart2 + 5), 1));
logmessage('AV : ' + Av);
If Av = '<' then begin
actposStart2 := (actposstart2 + 5); //Step to go after "</a>"
actposstart3 := PosFrom('>', HTML, actposStart2);
actposend := PosFrom('<', HTML, actposstart3);
Year := Trim(Copy(HTML, (actposstart3 +1), (actposend - actposStart3 - 1)));
logmessage(Year);
end;
debug_pos1:=Pos('(',Year);
if debug_pos1 >0 then
Year:= Copy(Year,0,debug_pos1-1);
LogMessage(Year);
*)
(* // Get Role (Now defined)
actPosStart4 := PosFrom('<i>', HTML, (actposend-1)) + 4;
actPosEnd:=PosFrom('</i>', HTML, actPosStart4) - 1;
Role := Trim(Copy(HTML, actposStart4, (actPosEnd - actPosStart4)));
LogMessage('Role: ' + Role);
debug_pos1:=Pos('(',Role);
if debug_pos1 >0 then
Role:= Copy(Role,0,debug_pos1-1);
LogMessage(Role);
*)
// Get Role (Now defined)
difpos := (PosFrom('<i>', HTML, (actposEnd-1))+4) - actposend;
logmessage('DIFFERENCE : ' + intToStr(difpos));
If difpos > 0 then begin
If difpos < 200 then begin
actPosStart := PosFrom('<i>', HTML, (actposend-1)) + 4;
{actPosStart := PosFrom('> <i>', HTML, actPosStart) + 6;}
actPosEnd:=PosFrom('</i></li>', HTML, actPosStart) - 1;
Role := Trim(Copy(HTML, actposStart, (actPosEnd - actPosStart)));
LogMessage('Role: ' + Role);
debug_pos1:=Pos('(',Role);
if debug_pos1 >0 then
Role:= Copy(Role,0,debug_pos1-1);
LogMessage(Role);
end;
end;
//AddPersonMovie(Trim(Title), '', Role, Year, LowerCase(URL1), ctActors);
AddPersonMovie(Trim(OrigT), '', Role, Year, LowerCase(URL1), ctActors);
(* // Total Line
if Lien <> '' then
Lien := Lien + #13;
if URL1 <> '' then
Lien := Lien + '<link url="' + URL1 + '">';
Lien := Lien + Name + '</link>';
if Year <> '' then
Lien := Lien + ' • ' + Year;
If Note <> '' then
Lien := Lien + ' • ' + Note;
if Role <> '' then
Lien := Lien + ' • ' + Role;
*)
// Total Line
If Lien <> '' then
Lien := Lien + #13;
If URL1 <> '' then begin
If Title <> OrigT then
Lien := Lien + Name
else
If OrigT <> Title then
Lien := Lien + Name
else
Lien := Lien + '<link url="' + URL1 + '">' + Name + '</link>';
end;
If Year <> '' then
Lien := Lien + ' • ' + Year;
If Note <> '' then
Lien := Lien + ' • ' + Note;
If Role <> '' then
Lien := Lien + ' • ' + Role;
LogMessage('LIEN :' + Lien);
curPos := PosFrom('<a href="', HTML, actPosEnd);
end;
if (Lien <> '') AND (Born = '') then
AddFieldValue(pfBio, Lien);
if (Lien <> '') AND (Born <> '') then
AddFieldValue(pfBio, Born + #13 + #13 + Lien);
end;
and now, in addition to Original Title also that Title, which are not Alternative Title.
Not the best.
In Bio movie list is not as transparent as yours, I like it, Of course, if that is what change is also OK.
Is it still your version of this code is better, perhaps it could be something to fix.