English > PVD Python Scripts

PVD Selenium MOD v4 IMDb Movie Script Confusion

<< < (3/15) > >>

Ivek23:
Function ParsePage_IMDBMovieBASE


--- Quote ---// Get ~User Reviews~
--- End quote ---

This part of the code is added here.


--- Quote ---FullReview := StringReplace(FullReview, '; - )', '', True, False, True);
--- End quote ---

The whole code is now like this.


--- Quote ---      // Get ~User Reviews~
   curPos := Pos('<section data-testid="UserReviews"', HTML); // Start of the User Reviews section
   If 0 < curPos Then Begin
      // Loop through multiple reviews if needed
      While 0 < curPos Do Begin
         // Find the review subject start
         curPos := PosFrom('data-testid="review-summary"><a href="', HTML, curPos);
         If 0 < curPos Then Begin
            curPos := PosFrom(' class="ipc-title-link-wrapper" tabindex="0"><h3 class="ipc-title__text">', HTML, curPos) + Length(' class="ipc-title-link-wrapper" tabindex="0"><h3 class="ipc-title__text">');
            EndPos := PosFrom('<svg', HTML, curPos);
            ItemValue := Copy(HTML, curPos, EndPos - curPos);
            ItemValue := Trim(ItemValue); // Clean up leading/trailing whitespace
            curPos := EndPos;

            // Find the review content start
            curPos := PosFrom('<div class="ipc-overflowText--children"><div class="ipc-html-content ipc-html-content--base" role="presentation"><div class="ipc-html-content-inner-div" role="presentation">', HTML, curPos) + Length('<div class="ipc-overflowText--children"><div class="ipc-html-content ipc-html-content--base" role="presentation"><div class="ipc-html-content-inner-div" role="presentation">');
            EndPos := PosFrom('</div></div></div></div></div><div class="ipc-list-card__actions">', HTML, curPos);
            ReviewContent := Copy(HTML, curPos, EndPos - curPos);
            ReviewContent := Trim(ReviewContent); // Clean up leading/trailing whitespace

            // Combine review subject and content
            FullReview := ItemValue + #13#10 + '-------------------------------------------' + #13#10 + ReviewContent; // Add new line between subject and content

            // Clean up unnecessary parts of the review content
            FullReview := StringReplace(FullReview, '; - )', '', True, False, True);
            FullReview := StringReplace(FullReview, ':)', '', True, False, True);
            FullReview := StringReplace(FullReview, '=)', '', True, False, True);

            // Add the review to custom fields
            AddCustomFieldValueByName('User Reviews', FullReview);
            AddCustomFieldValueByName('User Comments', FullReview);

            // Log the review
            If FullReview <> '' Then LogMessage('Function ParsePage_IMDBMovieBASE - Get result User Reviews (CF~User Reviews~) (CF~User Comments~): <br>' + FullReview + ' | |');

            // Move to the next review
            curPos := PosFrom('<section data-testid="UserReviews"', HTML, curPos);
         End Else Break;
      End;
   End;
--- End quote ---

In Function ParsePage_IMDBMovieBASE, everything would be there.

Ivek23:
Function ParsePage_IMDBMovieMPAA

Fix part of the code.


--- Quote ---    mpaaValue := '';
   
   // Get (CF~IMDbmpaaSummary~)
      curPos := Pos('<select id="jump-to"', HTML);
      If curPos > 0 Then
      Begin
        // Extract the relevant section for categories
        curPos := PosFrom('<option', HTML, curPos);
        endPos := PosFrom('</select>', HTML, curPos);
        mpaaSection := Copy(HTML, curPos, endPos - curPos);
        //LogMessage('Function ParsePage_IMDBMovieMPAA - Extracted Category section for (CF~IMDbmpaaSummary~): ' + mpaaSection);

        // Parse the options and category names
        curPos := 1;
        mpaaContent := '';
        While PosFrom('<option', mpaaSection, curPos) > 0 Do
        Begin
         curPos := PosFrom('<option', mpaaSection, curPos) + Length('<option');
         optionValue := TextBetween(mpaaSection, 'value="', '">', False, curPos);
         categoryName := TextBetween(mpaaSection, '">', '</option>', False, curPos);

         // Format the category link
         mpaaContent := mpaaContent + '<link url="' + MovieURL + optionValue + '">' + categoryName + '</link>    ';

         // Move to the next position
         curPos := PosFrom('</option>', mpaaSection, curPos) + Length('</option>');
        End;

        // Remove the trailing "   •   " if it exists and keep the last </link> intact
        If Copy(mpaaContent, Length(mpaaContent) - 13, 7) = '   •   ' Then
        Begin
         mpaaContent := Copy(mpaaContent, 1, Length(mpaaContent) - 14) + '</link>';
        End;

        // Combine and format the final result 
        mpaaContent := '<link url="' + MovieURL + '#contentRating' + '">Content Ratings Summary:</link>     •     ' + mpaaContent;
       
        // Store the result in the custom field
        AddCustomFieldValueByName('IMDbmpaaSummary', mpaaContent);
        LogMessage('Function ParsePage_IMDBMovieMPAA - Stored result for (CF~IMDbmpaaSummary~)');
      End
      Else
      Begin
        LogMessage('Function ParsePage_IMDBMovieMPAA - Content Rating section for (CF~IMDbmpaaSummary~) not found');
        Result := prError; // Set to error if content rating section is not found
      End;

--- End quote ---
      
      
Custom field to remove.


--- Quote ---      AddFieldValueXML('mpaa', mpaaValue);
      //AddCustomFieldValueByName('IMDB_MPAA', mpaaValue);
      LogMessage('Function ParsePage_IMDBMovieMPAA - Final ~mpaa~ Result: ' + mpaaValue + ' | |');

   // Get "Certification" (CF~Certification~) info

--- End quote ---


Ivek23:
Function ParsePage_IMDBMovieBASE

Minor code correction.


--- Quote ---   If Not (ReferencePageDownloaded) Then Begin
      If Not (USE_SAVED_PVDCONFIG And (ConfigOptions[19] = '0')) Then Begin
         // Get ~budget~
         //curPos := Pos('<span class="ipc-metadata-list-item__label" aria-disabled="false">Budget</span>', HTML); // WEB_SPECIFIC
         curPos := Pos('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Budget</span>', HTML); // WEB_SPECIFIC
         If 0 < curPos Then Begin
            ItemValue := TextBetWeen(HTML, '<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Budget</span><div class="ipc-metadata-list-item__content-container"><ul class="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--inline ipc-metadata-list-item__list-content base" role="presentation"><li role="presentation" class="ipc-inline-list__item"><span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">', '</span></li></ul>', false, curPos); // Strings which opens/closes the data. WEB_SPECIFIC
            ItemValue := StringReplace(ItemValue, ' (estimated)', '', True, True, False); // Eliminate '(estimated)' if exists
            if ItemValue <> '' then AddCustomFieldValueByName('ProductionBudget', ItemValue);
            ItemValue := StringReplace(ItemValue, 'EUR', '', True, True, False); // Eliminate 'EUR' if exists
            ItemValue := StringReplace(ItemValue, #36, '', True, True, False); // Eliminate '$' if exists
            ItemValue := StringReplace(ItemValue, '€', '', True, True, False); // Eliminate '€' if exists
            ItemValue := StringReplace(ItemValue, ',', '', True, True, False); // Eliminate ',' if exists
            AddFieldValueXML('budget', ItemValue);
            if ItemValue <> '' then LogMessage('Function ParsePage_IMDBMovieBASE - ParsePage_IMDBMovieBASE - Get results ~budget~: ' + ItemValue + ' | |');
         End;
      End;
   End;   //If Not (ReferencePageDownloaded) Then Begin should end here
   
      //Get (CF~Domestic~) Gross US & Canada
      //curPos:=Pos('<span class="ipc-metadata-list-item__label" aria-disabled="false">Gross US & Canada</span>',HTML);      //WEB_SPECIFIC
      curPos:=Pos('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross US & Canada</span>',HTML);      //WEB_SPECIFIC
      If 0<curPos Then Begin       
         ItemValue:=TextBetWeen(HTML,'<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross US & Canada</span>','</li></ul>',false,curPos);      //Strings which opens/closes the data. WEB_SPECIFIC
         //LogMessage('Function ParsePage_IMDBMovieBASE -      Get results BoxOffice Gross US & Canada:'+ItemValue+'||');
         if ItemValue <> '' then AddCustomFieldValueByName('Domestic',ItemValue);      
         ItemValue:=StringReplace(ItemValue,#36,'',True,True,False);        //Eliminate '$' if exists 
         ItemValue:=StringReplace(ItemValue,'€','',True,True,False);        //Eliminate '€' if exists         
         ItemValue:=StringReplace(ItemValue,',','',True,True,False);        //Eliminate ',' if exists       
         if ItemValue <> '' then LogMessage('Function ParsePage_IMDBMovieBASE -      Get results Box Office Gross US & Canada  (CF~Domestic~):'+ItemValue+'||');
      End;   
   
      If Not (USE_SAVED_PVDCONFIG And (ConfigOptions[20] = '0')) Then Begin
         // Get ~money~ +  (CF~Worldwide~) Box Office worldwide
         //curPos := Pos('<span class="ipc-metadata-list-item__label" aria-disabled="false">Gross worldwide</span>', HTML); // WEB_SPECIFIC
         curPos := Pos('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross worldwide</span>', HTML); // WEB_SPECIFIC
         If 0 < curPos Then Begin
            ItemValue := TextBetWeen(HTML, '<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">Gross worldwide</span><div class="ipc-metadata-list-item__content-container"><ul class="ipc-inline-list ipc-inline-list--show-dividers ipc-inline-list--inline ipc-metadata-list-item__list-content base" role="presentation"><li role="presentation" class="ipc-inline-list__item"><span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">', '</span></li></ul>', false, curPos); // Strings which opens/closes the data. WEB_SPECIFIC
            LogMessage('Function ParsePage_IMDBMovieBASE -      Get results BoxOffice worldwide: ' + ItemValue + ' | |');

            // Process the monetary value for XML
            MoneyValue := ItemValue;
            MoneyValue := StringReplace(MoneyValue, #36, '', True, True, False); // Eliminate '$' if exists
            MoneyValue := StringReplace(MoneyValue, '€', '', True, True, False); // Eliminate '€' if exists
            MoneyValue := StringReplace(MoneyValue, ',', '', True, True, False); // Eliminate ',' if exists
            LogMessage('Function ParsePage_IMDBMovieBASE - Processed Box Office value for "money" PVD field: ' + MoneyValue);
            AddFieldValueXML('money', MoneyValue);

            // Add the Worldwide value with currency symbol
            If ItemValue <> '' Then AddCustomFieldValueByName('Worldwide', ItemValue);

            If MoneyValue <> '' Then LogMessage('Function ParsePage_IMDBMovieBASE - Get results Box Office worldwide ~money~ +  (CF~Worldwide~): ' + MoneyValue + ' | |');
         End;
      End;

--- End quote ---
      

Ivek23:
Function ParsePage_IMDBMovieAKA

Minor code correction.


--- Quote ---//(*   
    //Get ~aka~ (multiple values in a line break separated list) Be careful don't repeat #13
   // Note that "For" loop iterates through each element in the ItemArray and checks if it is already present in the ItemValue string. If it's not present, it adds it to ItemValue. only unique AKA values are stored and logged!
    //if (Pos('It looks like we don' + Chr(39) + 't have any AKAs for this title yet.', HTML)>0) then Exit;
    //Go to "Also Known As"
    curPos := Pos('<h3 class="ipc-title__text"><span id="akas">Also Known As (AKA)</span>', HTML);                      //Strings start which opens the block content data. WEB_SPECIFIC
   If 0<curPos then begin
      EndPos := curPos;
      //ItemList := HTMLValues2(HTML, '<h3 class="ipc-title__text"><span id="akas">Also Known As (AKA)</span>', '</section>', '<label class="', '</label>', '*', EndPos);
      ItemList := HTMLValues2(HTML, '<h3 class="ipc-title__text"><span id="akas">Also Known As (AKA)</span>', '</section>', '<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable"', '</span>', '*', EndPos);
      //LogMessage('Function ParsePage_IMDBMovieAKA -      Get result ItemList: ' + ItemList);
      ItemValue1 := ItemList;
      ExplodeString(ItemList, ItemArray, '*');
      ItemValue := '';
      For index := Low(ItemArray) To High(ItemArray) Do Begin        //Remember index begins at 0
         //LogMessage('Function ParsePage_IMDBMovieAKA -      Get result ItemArray[index]*: ' + ItemArray[index]);
         If (Pos(ItemArray[index], ItemValue)=0) Then ItemValue := ItemValue + '<br>' + ItemArray[index];
      End;
      //ItemValue := StringReplace(ItemValue, '', '', True, True, False);
      AddFieldValueXML('aka', ItemValue);
      AddCustomFieldValueByName('Imdb Aka', ItemValue);
      if ItemValue <> '' then LogMessage('Function ParsePage_IMDBMovieAKA -      Get results ~aka~ & (CF~Imdb Aka~) : ' + ItemValue);
   End;
//*)
//(*   
// Initialize the string to collect all entries
allItemValues := '';

// Get "Also Known As (AKA)" information with countries
curPos := Pos('>Also Known As (AKA)<', HTML);
If 0 < curPos Then Begin
    EndPos := PosFrom('</section>', HTML, curPos);

    While curPos < EndPos Do Begin
        // Find each list item
        curPos := PosFrom('<li role="presentation" class="ipc-metadata-list__item', HTML, curPos);
        If (curPos = 0) Or (curPos > EndPos) Then Break;

        // Extract the country or label
        countryStart := PosFrom('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable"', HTML, curPos) + Length('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false">');
        countryEnd := PosFrom('</span>', HTML, countryStart);
        countryValue := Copy(HTML, countryStart, countryEnd - countryStart);

        // Extract the AKA title
        akaStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable"', HTML, countryEnd) + Length('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">');
        akaEnd := PosFrom('</span>', HTML, akaStart);
        akaValue := Copy(HTML, akaStart, akaEnd - akaStart);

        // Move curPos past the end of the AKA title span
        curPos := akaEnd + Length('</span>');

        // Extract any subText (like (English), (Japanese), etc.)
        subTextStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item--subText"', HTML, akaEnd);
        If (subTextStart > 0) And (subTextStart < PosFrom('</li>', HTML, akaStart)) Then Begin
            subTextStart := subTextStart + Length('<span class="ipc-metadata-list-item__list-content-item--subText">');
            subTextEnd := PosFrom('</span>', HTML, subTextStart);
            subText := Copy(HTML, subTextStart, subTextEnd - subTextStart);
            akaValue := akaValue + ' ' + subText;
            // Move curPos past the end of the subText span
            curPos := subTextEnd + Length('</span>');
        End Else
            subText := '';

        // Combine country and AKA for the custom field
        ItemValue := countryValue + ' - ' + akaValue;

        // Collect the result
        allItemValues := allItemValues + ItemValue + #13#10;

        // Move to the next list item
        curPos := PosFrom('</li>', HTML, curPos) + Length('</li>');
    End;

    // Add the collected results to the custom field
    AddCustomFieldValueByName('IMDbCountryAKA', allItemValues);
    LogMessage('Function ParsePage_IMDBMovieAKA - Get result IMDbCountryAKA: ' + allItemValues);
End;
//*)
//(*
   // Get (CF~IMDbPremiereDates~) information with countries and dates
      // Initialize the string to collect all entries
      allItemValues := '';

      curPos := Pos('<h3 class="ipc-title__text"><span id="releases">Release Date</span>', HTML);
      LogMessage('Function ParsePage_IMDBMovieAKA - CurPos initial: ' + IntToStr(curPos));
      If 0 < curPos Then Begin
         EndPos := PosFrom('</section>', HTML, curPos);
         //LogMessage('Function ParsePage_IMDBMovieAKA - EndPos: ' + IntToStr(EndPos));

         While curPos < EndPos Do Begin
            // Find each list item
            curPos := PosFrom('<li role="presentation" class="ipc-metadata-list__item ipc-metadata-list', HTML, curPos);
            //LogMessage('Function ParsePage_IMDBMovieAKA - CurPos list item: ' + IntToStr(curPos));
            If (curPos = 0) Or (curPos > EndPos) Then Break;

            // Extract the country or label
            countryStart := PosFrom('aria-label="', HTML, curPos) + Length('aria-label="');
            countryEnd := PosFrom('"', HTML, countryStart);
            countryValue := Copy(HTML, countryStart, countryEnd - countryStart);
            //LogMessage('Function ParsePage_IMDBMovieAKA - CountryValue for (CF~IMDbPremiereDates~): ' + countryValue);

            // Extract the release date
            dateStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">', HTML, countryEnd) + Length('<span class="ipc-metadata-list-item__list-content-item ipc-btn--not-interactable" aria-disabled="false">');
            dateEnd := PosFrom('</span>', HTML, dateStart);
            dateValue := Copy(HTML, dateStart, dateEnd - dateStart);
            //LogMessage('Function ParsePage_IMDBMovieAKA - DateValue for (CF~IMDbPremiereDates~): ' + dateValue);

            // Move curPos past the end of the date span
            curPos := dateEnd + Length('</span>');
            //LogMessage('Function ParsePage_IMDBMovieAKA - CurPos after date for (CF~IMDbPremiereDates~): ' + IntToStr(curPos));

            // Extract any subText (like (limited), (Fantasia International Film Festival), etc.)
            subTextStart := PosFrom('<span class="ipc-metadata-list-item__list-content-item--subText"', HTML, dateEnd);
            //LogMessage('Function ParsePage_IMDBMovieAKA - SubTextStart for (CF~IMDbPremiereDates~): ' + IntToStr(subTextStart));
            If (subTextStart > 0) And (subTextStart < PosFrom('</li>', HTML, dateStart)) Then Begin
               subTextStart := subTextStart + Length('<span class="ipc-metadata-list-item__list-content-item--subText">');
               subTextEnd := PosFrom('</span>', HTML, subTextStart);
               subText := Copy(HTML, subTextStart, subTextEnd - subTextStart);
               dateValue := dateValue + ' ' + subText;
               // Move curPos past the end of the subText span
               curPos := subTextEnd + Length('</span>');
               //LogMessage('Function ParsePage_IMDBMovieAKA - SubText for (CF~IMDbPremiereDates~): ' + subText);
               //LogMessage('Function ParsePage_IMDBMovieAKA - CurPos after subText for (CF~IMDbPremiereDates~): ' + IntToStr(curPos));
            End Else
               subText := '';

            // Combine country, date, and subText for the custom field
            ItemValue := countryValue + ' - ' + dateValue;
            //LogMessage('Function ParsePage_IMDBMovieAKA - ItemValue for (CF~IMDbPremiereDates~): ' + ItemValue);

            // Collect the result
            allItemValues := allItemValues + ItemValue + #13#10;

            // Move to the next list item
            curPos := PosFrom('</li>', HTML, curPos) + Length('</li>');
            //LogMessage('Function ParsePage_IMDBMovieAKA - CurPos after </li>: ' + IntToStr(curPos));
         End;

         // Add the collected results to the custom field
         AddCustomFieldValueByName('IMDbPremiereDates', allItemValues);
         LogMessage('Function ParsePage_IMDBMovieAKA - Get result (CF~IMDbPremiereDates~): ' + allItemValues);
      End;
//*)

    LogMessage('Function ParsePage_IMDBMovieAKA END=====================| |');
   //LogMessage('Function ParsePage_IMDBMovieAKA: Ending processing.')
  End; //BlockClose

--- End quote ---

Ivek23:
Function ParsePage_IMDBMovieMPAA

Minor code correction.


--- Quote ---//(*
   // Get "Certification" (CF~Certification~) info
      If Pos('<span id="certificates">Certifications</span>', HTML) > 0 Then
      Begin
        curPos := Pos('<span id="certificates">Certifications</span>', HTML);
        EndPos := PosFrom('</section>', HTML, curPos);

        // Extract the relevant section
        Section := Copy(HTML, curPos, EndPos - curPos);
        //LogMessage('Function ParsePage_IMDBMovieMPAA - Extract the relevant Section: ' + Section + ' | |');

        curPos := 1;
        ItemValue1 := '';

        // Parse the certification items
        While PosFrom('"certificates-item">', Section, curPos) > 0 Do
        Begin
         // Move to the label
         curPos := PosFrom('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false"', Section, curPos) + Length('<span class="ipc-metadata-list-item__label ipc-btn--not-interactable" aria-disabled="false"');
         categoryName := TextBetween(Section, '>', '</span>', False, curPos);
         //LogMessage('Function ParsePage_IMDBMovieMPAA - categoryName after TextBetween: ' + categoryName + ' | |');

         // Move to the value
         curPos := PosFrom('href="', Section, curPos) + Length('href="');
         categoryValue := TextBetween(Section, '">', '</a>', False, curPos);
         //LogMessage('Function ParsePage_IMDBMovieMPAA - categoryValue after TextBetween: ' + categoryValue + ' | |');

         // Format the category link with proper spaces
         ItemValue1 := ItemValue1 + '   •   ' + categoryName + ': ' + categoryValue + '<br>';

         // Move to the next item
         curPos := PosFrom('</li>', Section, curPos) + Length('</li>');
        End;

        // Remove the trailing "   •   " if it exists and keep the last </br> intact
        If Copy(ItemValue1, Length(ItemValue1) - 14, 14) = '<br>   •   ' Then
        Begin
         ItemValue1 := Copy(ItemValue1, 1, Length(ItemValue1) - 14);
        End;

        AddCustomFieldValueByName('Certification', 'Certification<br>' + ItemValue1);
        If ItemValue1 <> '' Then
         LogMessage('Function ParsePage_IMDBMovieMPAA - Certification:<br>' + ItemValue1 + ' | |');
      End
      Else
      Begin
        LogMessage('Function ParsePage_IMDBMovieMPAA - Certification section not found');
        Result := prError; // Set to error if Certification section is not found
      End;
//*)
    LogMessage('Function ParsePage_IMDBMovieMPAA END=====================| |');
  End; //BlockClose

--- End quote ---

Navigation

[0] Message Index

[#] Next page

[*] Previous page

Go to full version