Tuesday, December 24, 2024

VBA Word - Extract questions from document and consolidate them at one place

The following code extracts questions from word document based on the format of questions. Each question has specific font style, color, size etc. Based on this information, each question is stored in Dictionary object: question number is key and question is value. Look at the code below which extracts questions from document and place them at top of the document to consolidate them at one place:
Sub ExtractQuestionsAndMoveToTopPerfectUsingDictionary()
    Dim doc As Document
    Dim para As Paragraph
    Dim questionsDict As Object
    Dim questionRangesDict As Object
    Dim k As Integer
    Dim questionText As String
    Dim rngQStart As Range
    Dim topRng As Range
    Dim key As Variant
    Dim result As Variant
    
    result = MsgBox("Continue?", vbOKCancel, "Chat GPT Document Maker")
    If result = vbOK Then
        ' Initialize the document and dictionaries
        Set doc = ActiveDocument
        Set questionsDict = CreateObject("Scripting.Dictionary")
        Set questionRangesDict = CreateObject("Scripting.Dictionary")
        k = 1
        questionText = ""
    
        ' Loop through each paragraph in the document
        For Each para In doc.Paragraphs
            ' Check if the paragraph has the required format
            With para.Range.Font
                If .Name = "Georgia" And .Size = 10 And .Bold = True And .Color = RGB(0, 0, 255) Then
                    ' Store the starting range of the question
                    If questionText = "" Then
                        Set rngQStart = para.Range.Duplicate
                    End If
                    ' Append the plain text of the paragraph to the question text
                    questionText = questionText & para.Range.Text
                ElseIf Len(questionText) > 0 Then
                    ' Store the collected question text in the dictionary
                    questionsDict.Add k, questionText
                    questionRangesDict.Add k, rngQStart
                    k = k + 1
                    questionText = ""
                End If
            End With
        Next para
    
        ' If there's any remaining question text, add it to the dictionaries
        If Len(questionText) > 0 Then
            questionsDict.Add k, questionText
            questionRangesDict.Add k, rngQStart
        End If
    
        ' Set the range to the start of the document to insert questions
        Set topRng = doc.Range(0, 0)
    
        ' Insert questions and hyperlinks at the top of the document in reverse order
        For i = questionsDict.Count To 1 Step -1
            ' Insert question with key
            topRng.InsertAfter questionsDict(i)
            
            ' Move the range to the end of the newly added text
            Set topRng = doc.Range(0, 0)
            topRng.Collapse wdCollapseEnd
            
            ' Add a bookmark and hyperlink
            AddHyperlink topRng, "Question" & i, questionRangesDict(i)
            topRng.InsertAfter vbCrLf
        Next i
    End If
End Sub
Now, the consolidated questions are provided hyperlinks to reach to their answers.
Sub AddHyperlink(rng As Range, displayText As String, targetRange As Range)
    ' Add a bookmark at the target range so that the hyperlink can reference it
    On Error Resume Next
    ActiveDocument.Bookmarks.Add Name:=displayText, Range:=targetRange
    On Error GoTo 0

    ' Add a hyperlink to the specified range
    ActiveDocument.Hyperlinks.Add _
        Anchor:=rng, _
        Address:="", _
        SubAddress:=displayText, _
        TextToDisplay:=displayText
End Sub

The following is new code developed in 2025 which works like before but seems to be better than before.


Sub ExtractQuestionsAndMoveToTopPerfectUsingDictionary_HyperlinkFullText()
    Dim doc As Document
    Dim para As Paragraph
    Dim questionsDict As Object ' Stores question number (Key) and full question text (Value)
    Dim questionRangesDict As Object ' Stores question number (Key) and original Range object (Value)
    Dim k As Integer
    Dim questionText As String
    Dim rngQStart As Range
    Dim topRng As Range ' This will be our dynamic insertion point for the list
    Dim result As Variant
    
    result = MsgBox("Continue?", vbOKCancel, "Chat GPT Document Maker")
    If result = vbCancel Then Exit Sub ' Exit if user cancels
    
    ' Initialize the document and dictionaries
    Set doc = ActiveDocument
    Set questionsDict = CreateObject("Scripting.Dictionary")
    Set questionRangesDict = CreateObject("Scripting.Dictionary")
    k = 1 ' Initialize question counter
    questionText = "" ' Initialize collected question text
    
    ' --- Clear existing question-related bookmarks before starting ---
    ' This prevents errors if the macro is run multiple times or if old bookmarks exist.
    Dim bm As Bookmark
    For Each bm In doc.Bookmarks
        ' Check if bookmark name starts with "Question_" or "Q" followed by a number
        If InStr(bm.Name, "Question_") = 1 Or (Left(bm.Name, 1) = "Q" And IsNumeric(Mid(bm.Name, 2, 1))) Then
            On Error Resume Next ' In case a bookmark cannot be deleted for some reason
            bm.Delete
            On Error GoTo 0
        End If
    Next bm

    ' Loop through each paragraph in the document to identify and collect questions
    For Each para In doc.Paragraphs
        ' Check if the paragraph has the required formatting (Times New Roman, 12pt, Bold, Blue)
        With para.Range.Font
            If .Name = "Times New Roman" And .Size = 12 And .Bold = True And .Color = RGB(0, 0, 255) Then
                ' This paragraph is part of a question
                If questionText = "" Then
                    ' If this is the very first part of a new question, store its starting range
                    Set rngQStart = para.Range.Duplicate
                End If
                ' Append the plain text of the paragraph to the current question text.
                ' Trim and replace vbCr to ensure clean text without extra line breaks within the question.
                questionText = questionText & Trim(Replace(para.Range.Text, vbCr, ""))
            ElseIf Len(questionText) > 0 Then
                ' This paragraph does NOT match the question format, and we have collected a question.
                ' This signifies the end of the current question. Store it.
                questionsDict.Add k, questionText
                questionRangesDict.Add k, rngQStart.Duplicate ' Store a duplicate range to preserve its reference
                k = k + 1 ' Increment for the next question
                questionText = "" ' Reset collected question text
                Set rngQStart = Nothing ' Clear the range reference
            End If
        End With
    Next para
    
    ' After the loop, check if there's any remaining question text that wasn't stored
    ' (e.g., if the document ends with a question)
    If Len(questionText) > 0 Then
        questionsDict.Add k, questionText
        questionRangesDict.Add k, rngQStart.Duplicate
    End If
    
    ' --- Prepare the document for inserting the consolidated list ---
    ' Ensure the list starts on a fresh page at the very beginning of the document.
    ' This only inserts a page break if there's existing content at the document's start.
    If doc.Content.Start <> 0 Then
        doc.Range(0, 0).InsertBreak wdPageBreak
    End If

    ' Set the initial insertion point for the list at the very beginning of the document (page 1, position 0)
    Set topRng = doc.Range(0, 0)
    
    ' Add a title for the list
    topRng.InsertAfter "List of Consolidated Questions:" & vbCrLf & vbCrLf
    ' Move the insertion point to the end of the title, so subsequent content is added after it.
    topRng.Collapse Direction:=wdCollapseEnd
    
    ' --- Insert questions and hyperlinks at the top of the document ---
    ' Your original loop for correct order: Iterate from the last question collected to the first.
    For k = questionsDict.Count To 1 Step -1
        Dim currentQuestionFullText As String
        Dim targetBookmarkName As String
        Dim targetQuestionRange As Range
        
        currentQuestionFullText = questionsDict(k) ' Get the full question text from the dictionary
        Set targetQuestionRange = questionRangesDict(k) ' Get the original range for the bookmark
        
        ' Define the bookmark name for the original question's location.
        ' Using "Question_" prefix for clarity and uniqueness.
        targetBookmarkName = "Question_" & k
        
        ' Add a bookmark to the original question's location in the document.
        ' This is the destination for the hyperlink.
        On Error Resume Next ' Resume on error if bookmark somehow already exists
        doc.Bookmarks.Add Name:=targetBookmarkName, Range:=targetQuestionRange
        On Error GoTo 0
        
        ' Prepare the full text that will be displayed as the hyperlink.
        ' This includes "Question N: " prefix + the full question text.
        Dim displayTextForHyperlink As String
        displayTextForHyperlink = "Question " & k & ": " & currentQuestionFullText
        
        ' --- THIS IS THE KEY MODIFICATION ---
        ' Insert the line break *before* the hyperlink text, at the current topRng position.
        ' Your original code used topRng.InsertBefore vbCrLf which works with your loop.
        ' We'll now combine the insertion of the line break and the hyperlink text into one operation.
        
        ' Create a temporary range for inserting the hyperlink and line break at the current
        ' topRng's start. This ensures the correct stacking order.
        Dim insertPoint As Range
        Set insertPoint = doc.Range(topRng.Start, topRng.Start) ' Start of the current topRng
        
        ' Hyperlinks.Add method is used to create a new hyperlink object
        ' and add it to the Hyperlinks collection of a document.
        ' Anchor: required parameter that specifies where in the document the hyperlink will be inserted. It expects a Range object
        ' Address: optional parameter specifies the path and file name of the linked document or the URL for a web page.
        ' SubAddress: optional parameter specifies a sub-location within the linked document.
        ' TextToDisplay: optional parameter specifies the actual text that will be displayed in the document for the hyperlink. This is what the user will see and click on.
        doc.Hyperlinks.Add _
            Anchor:=insertPoint, _
            Address:="", _
            SubAddress:=targetBookmarkName, _
            TextToDisplay:=displayTextForHyperlink

        ' Insert a line break after the hyperlink you just inserted
        insertPoint.InsertAfter vbCrLf
        
        ' Now, crucially, update topRng to encompass the newly inserted content.
        ' This ensures the next iteration's 'insertPoint' is correctly positioned
        ' *before* this content.
        Set topRng = doc.Range(topRng.Start, topRng.End + Len(displayTextForHyperlink) + 2) ' +2 for vbCrLf
        ' This specific recalculation of topRng.End is vital when inserting at the start.
        ' It makes topRng expand to cover what was just added.
        
    Next k
    
    ' Optional: Go to the top of the document to view the list
    doc.GoTo What:=wdGoToPage, Which:=wdGoToFirst
    
    MsgBox "Questions extracted and consolidated with full-text hyperlinks.", vbInformation, "Process Complete"
End Sub

Two way Hyperlinks in Word Document


Sub ExtractQuestionsAndMoveToTopPerfectUsingDictionary_TwoWayHyperlinks()
    Dim doc As Document
    Dim para As Paragraph
    Dim questionsDict As Object ' Stores question number (Key) and full question text (Value)
    Dim questionRangesDict As Object ' Stores question number (Key) and original Range object (Value)
    Dim k As Integer
    Dim questionText As String
    Dim rngQStart As Range
    Dim topRng As Range ' This will be our dynamic insertion point for the list
    Dim result As Variant
    Dim consolidatedListBookmarkName As String
    
    ' Define a unique bookmark name for the top consolidated list
    consolidatedListBookmarkName = "ConsolidatedQuestionsListTop"

    result = MsgBox("Continue?", vbOKCancel, "Chat GPT Document Maker")
    If result = vbCancel Then Exit Sub ' Exit if user cancels
    
    ' Initialize the document and dictionaries
    Set doc = ActiveDocument
    Set questionsDict = CreateObject("Scripting.Dictionary")
    Set questionRangesDict = CreateObject("Scripting.Dictionary")
    k = 1 ' Initialize question counter
    questionText = "" ' Initialize collected question text
    
    ' --- Clear existing question-related bookmarks before starting ---
    ' This prevents errors if the macro is run multiple times or if old bookmarks exist.
    Dim bm As Bookmark
    For Each bm In doc.Bookmarks
        ' Check if bookmark name starts with "Question_" (our question bookmarks)
        ' or the consolidated list bookmark name
        If InStr(bm.Name, "Question_") = 1 Or bm.Name = consolidatedListBookmarkName Then
            On Error Resume Next ' In case a bookmark cannot be deleted for some reason
            bm.Delete
            On Error GoTo 0
        End If
    Next bm

    ' Loop through each paragraph in the document to identify and collect questions
    For Each para In doc.Paragraphs
        ' Check if the paragraph has the required formatting (Times New Roman, 12pt, Bold, Blue)
        With para.Range.Font
            If .Name = "Times New Roman" And .Size = 12 And .Bold = True And .Color = RGB(0, 0, 255) Then
                ' This paragraph is part of a question
                If questionText = "" Then
                    ' If this is the very first part of a new question, store its starting range
                    Set rngQStart = para.Range.Duplicate
                End If
                ' Append the plain text of the paragraph to the current question text.
                ' Trim and replace vbCr to ensure clean text without extra line breaks within the question.
                questionText = questionText & Trim(Replace(para.Range.Text, vbCr, ""))
            ElseIf Len(questionText) > 0 Then
                ' This paragraph does NOT match the question format, and we have collected a question.
                ' This signifies the end of the current question. Store it.
                questionsDict.Add k, questionText
                questionRangesDict.Add k, rngQStart.Duplicate ' Store a duplicate range to preserve its reference
                k = k + 1 ' Increment for the next question
                questionText = "" ' Reset collected question text
                Set rngQStart = Nothing ' Clear the range reference
            End If
        End With
    Next para
    
    ' After the loop, check if there's any remaining question text that wasn't stored
    ' (e.g., if the document ends with a question)
    If Len(questionText) > 0 Then
        questionsDict.Add k, questionText
        questionRangesDict.Add k, rngQStart.Duplicate
    End If
    
    ' --- Prepare the document for inserting the consolidated list ---
    ' Ensure the list starts on a fresh page at the very beginning of the document.
    ' This only inserts a page break if there's existing content at the document's start.
    If doc.Content.Start <> 0 Then
        doc.Range(0, 0).InsertBreak wdPageBreak
    End If

    ' Set the initial insertion point for the list at the very beginning of the document (page 1, position 0)
    Set topRng = doc.Range(0, 0)
    
    ' Add a bookmark at the very top of the document for the consolidated list.
    ' This is the target for hyperlinks from individual questions.
    On Error Resume Next
    doc.Bookmarks.Add Name:=consolidatedListBookmarkName, Range:=doc.Range(0, 0)
    On Error GoTo 0
    
    ' Add a title for the list
    topRng.InsertAfter "List of Consolidated Questions:" & vbCrLf & vbCrLf
    ' Move the insertion point to the end of the title, so subsequent content is added after it.
    topRng.Collapse Direction:=wdCollapseEnd
    
    ' --- Insert questions and hyperlinks at the top of the document ---
    ' Iterate from the last question collected to the first for correct ascending display order.
    For k = questionsDict.Count To 1 Step -1
        Dim currentQuestionFullText As String
        Dim targetBookmarkName As String
        Dim targetQuestionRange As Range
        
        currentQuestionFullText = questionsDict(k) ' Get the full question text from the dictionary
        Set targetQuestionRange = questionRangesDict(k) ' Get the original range for the bookmark
        
        ' Define the bookmark name for the original question's location.
        targetBookmarkName = "Question_" & k
        
        ' Add a bookmark to the original question's location in the document.
        ' This is the destination for the hyperlink from the consolidated list.
        On Error Resume Next
        doc.Bookmarks.Add Name:=targetBookmarkName, Range:=targetQuestionRange
        On Error GoTo 0
        
        ' Prepare the full text that will be displayed as the hyperlink in the consolidated list.
        Dim displayTextForHyperlink As String
        displayTextForHyperlink = "Question " & k & ": " & currentQuestionFullText
        
        ' Create a temporary range for inserting the hyperlink and line break at the current topRng's start.
        Dim insertPoint As Range
        Set insertPoint = doc.Range(topRng.Start, topRng.Start)
        
        ' Insert the hyperlink (with the full question text)
        doc.Hyperlinks.Add _
            Anchor:=insertPoint, _
            Address:="", _
            SubAddress:=targetBookmarkName, _
            TextToDisplay:=displayTextForHyperlink

        ' Insert a line break after the hyperlink you just inserted
        insertPoint.InsertAfter vbCrLf
        
        ' Update topRng to encompass the newly inserted content.
        Set topRng = doc.Range(topRng.Start, topRng.End + Len(displayTextForHyperlink) + 2) ' +2 for vbCrLf
        
    Next k

    ' Add a line break after the last question in the consolidated list for proper spacing.
    If questionsDict.Count > 0 Then
        topRng.InsertAfter vbCrLf
    End If
    
    ' --- NOW, ADD HYPERLINKS TO THE ORIGINAL QUESTIONS ---
    ' Iterate through the stored original question ranges
    For k = 1 To questionRangesDict.Count
        Set rngQStart = questionRangesDict(k) ' Get the original question's range

        ' Ensure the range is valid and exists before adding a hyperlink
        ' (e.g., if content was cut/deleted after initial collection)
        If rngQStart.StoryType = wdMainTextStory Then ' Check if it's still in the main body
            ' Add a hyperlink to the original question.
            ' The anchor is the original question's text.
            ' The subaddress is the bookmark at the top of the consolidated list.
            ' The text displayed is the original question's text.
            On Error Resume Next ' Handle potential errors if range is somehow problematic
            doc.Hyperlinks.Add _
                Anchor:=rngQStart, _
                Address:="", _
                SubAddress:=consolidatedListBookmarkName, _
                TextToDisplay:=questionsDict(k) ' Use the stored full question text as display text
            On Error GoTo 0
        End If
    Next k
    
    ' Optional: Go to the top of the document to view the list
    doc.GoTo What:=wdGoToPage, Which:=wdGoToFirst
    
    MsgBox "Questions extracted and consolidated with full-text hyperlinks, and original questions are now hyperlinked to the top list.", vbInformation, "Process Complete"
End Sub

No comments:

Post a Comment

Hot Topics