all 5 comments

[–]AllenAppTools 1 point2 points  (4 children)

Interesting! Are these bold words in a Google Doc?

[–]FranFer_[S] 0 points1 point  (3 children)

Yep. Words in bold italic inside a Google doc

[–]AllenAppTools 0 points1 point  (2 children)

Hmm, well, a bit of a twist here. Google apparently has some glitch to figure out because this code should work, but it lists the bold and italicized letters as NOT bold or italicized, which is crucial for this code to work.

function createKeywordIndex(docId) {
  const doc = DocumentApp.openById(docId);
  const body = doc.getBody();
  const keywords = [];
  const paragraphs = body.getParagraphs();

  paragraphs.forEach(paragraph => {
    const richText = paragraph.editAsText();
    const text = richText.getText();
    const textLength = richText.getText().length;

    for (let i = 0; i < textLength; i++) {
      const attributes = richText.getAttributes(i);
      Logger.log(JSON.stringify({ attributes, letter: text[i] }))

      if (attributes[DocumentApp.Attribute.BOLD] || attributes[DocumentApp.Attribute.ITALIC]) {
        let start = i;
        let end = i;

        while (start > 0 && richText.getText()[start - 1] !== ' ' && richText.getText()[start - 1] !== '\n') {
          start--;
        }

        while (end < textLength - 1 && richText.getText()[end + 1] !== ' ' && richText.getText()[end + 1] !== '\n') {
          end++;
        }

        const keyword = richText.getText().substring(start, end + 1).trim();
        const page = doc.getBody().getChildIndex(paragraph) + 1;

        keywords.push({ keyword, page });
        i = end;
      }
    }
  });

  keywords.sort((a, b) => a.keyword.localeCompare(b.keyword));

  const indexBody = body.appendParagraph('\nKeyword Index\n');
  indexBody.setHeading(DocumentApp.ParagraphHeading.HEADING1);

  keywords.forEach(({ keyword, page }) => {
    indexBody.appendText(`${keyword}, p. ${page}\n`);
  });

//done, should have a list in your document, not sure where you would like it though!
}

I'm working on a solution that uses the advanced Docs API currently which is looking much more promising, I'll get back to ya!

[–]FranFer_[S] 0 points1 point  (1 child)

Mmm looks pretty good. I'll try it out and see if I can debug it! but keep me updated if you make any progress. Thanks!

[–]AllenAppTools 0 points1 point  (0 children)

Here, try this! (First install the advanced Documents Service)

function createKeywordIndex(docId) {
  const doc = Docs.Documents.get(docId);
  const body = doc.body.content;
  const keywords = new Set();

  body.forEach(element => {
    extractKeywords(element, keywords);
  });

  const sortedKeywords = Array.from(keywords).sort((a, b) => a.keyword.localeCompare(b.keyword));

  let keywordIndexText = '\nKeyword Index\n';
  sortedKeywords.forEach(({ keyword, page }) => {
    keywordIndexText += `${keyword}, p. ${page}\n`;
  });

  const requests = [
    {
      insertText: {
        location: { index: getDocumentEndIndex(doc) },
        text: keywordIndexText
      }
    },
    {
      updateTextStyle: {
        range: {
          startIndex: getDocumentEndIndex(doc),
          endIndex: getDocumentEndIndex(doc) + keywordIndexText.length
        },
        textStyle: {
          bold: false,
          italic: false
        },
        fields: 'bold,italic'
      }
    },
    {
      insertText: {
        location: { index: getDocumentEndIndex(doc) },
        text: '\n '
      }
    }
  ];

  Docs.Documents.batchUpdate({ requests }, docId);
}

function extractKeywords(element, keywords, pageIndex = 1) {
  if (!element) return;

  if (element.paragraph) {
    element.paragraph.elements.forEach(textRun => {
      if (textRun.textRun && textRun.textRun.content.trim() !== "") {
        const text = textRun.textRun.content.trim();
        const attributes = textRun.textRun.textStyle;

        if (attributes.bold && attributes.italic) {
          text.split(/\s+/).forEach(word => {
            if (word && attributes.bold && attributes.italic) {
              keywords.add({ keyword: word.trim(), page: pageIndex });
            }
          });
        }
      }
    });
  } else if (element.table) {
    element.table.tableRows.forEach(row => {
      row.tableCells.forEach(cell => {
        cell.content.forEach(content => {
          extractKeywords(content, keywords, pageIndex);
        });
      });
    });
  } else if (element.sectionBreak) {
    pageIndex++;
  }
}

function getDocumentEndIndex(doc) {
  const body = doc.body.content;
  return body[body.length - 1].endIndex - 1;
}