所有元素都可以使用tag或class找到
print(soup.find('div', class_='qtext').text.strip())
# HOW DO I PRINT THIS QUESTION?
for item in soup.find_all('label'):
print(item.text.strip())
# a. I WANT TO PRINT THIS
# b. I WANT TO PRINT THIS TOO
# c. I WANT TO PRINT THIS ALSO
# d. I WANT TO PRINT THIS AS WELL
print(soup.find('div', class_='rightanswer').text.strip())
# THE CORRECT ANSWER IS: I WANT TO PRINT THIS
你也可以使用.get_text(strip=True)代替.text.strip()
完整代码:
data = '''
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
'''
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
print(soup.find('div', class_='qtext').text.strip())
for item in soup.find_all('label'):
print(item.text.strip())
print(soup.find('div', class_='rightanswer').text.strip())
编辑:如果您在 HTML 中有更多问题,那么您可以找到一个标签,该标签保留一个问题及其选择和正确答案 - 即。 <div class="que multichoice deferredfeedback correct" id="q7"> - 然后找到所有标签,然后在这些标签内搜索。
for questions in soup.find_all('div', class_='multichoice'):
print(questions.find('div', class_='qtext').text.strip())
for item in questions.find_all('label'):
print(item.text.strip())
print(questions.find('div', class_='rightanswer').text.strip())
完整代码 - 我复制了相同的 HTML 来模拟两个问题:
data = '''
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
'''
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
for questions in soup.find_all('div', class_='multichoice'):
print(questions.find('div', class_='qtext').text.strip())
for item in questions.find_all('label'):
print(item.text.strip())
print(questions.find('div', class_='rightanswer').text.strip())
print('---')
或者你可以使用for-loop 对项目进行分组
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
all_questions = soup.find_all('div', class_='qtext')
all_choices = soup.find_all('label')
all_answers = soup.find_all('div', class_='rightanswer')
for x in range(len(all_questions)):
print(all_questions[x].text.strip())
y = x*4
for item in all_choices[y:y+4]:
print(item.text.strip())
print(all_answers[x].text.strip())
print('---')
完整代码:
data = '''
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
<div class="que multichoice deferredfeedback correct" id="q7">
<div class="info">
<h3 class="no">
Question
<span class="qno">
7
</span>
</h3>
<div class="state">
Correct
</div>
<div class="grade">
Mark 1.00 out of 1.00
</div>
</div>
<div class="content">
<div class="formulation">
<h4 class="accesshide">
Question text
</h4>
<input name="q7391425:7_:sequencecheck" type="hidden" value="3"/>
<div class="qtext">
HOW DO I PRINT THIS QUESTION?
</div>
<div class="ablock">
<div class="prompt">
Select one:
</div>
<div class="answer">
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer0" name="q7391425:7_answer" type="radio" value="0"/>
<label for="q7391425:7_answer0">
a. I WANT TO PRINT THIS
</label>
</div>
<div class="r1 correct">
<input checked="checked" disabled="disabled" id="q7391425:7_answer1" name="q7391425:7_answer" type="radio" value="1"/>
<label for="q7391425:7_answer1">
b. I WANT TO PRINT THIS TOO
</label>
</div>
<div class="r0">
<input disabled="disabled" id="q7391425:7_answer2" name="q7391425:7_answer" type="radio" value="2"/>
<label for="q7391425:7_answer2">
c. I WANT TO PRINT THIS ALSO
</label>
</div>
<div class="r1">
<input disabled="disabled" id="q7391425:7_answer3" name="q7391425:7_answer" type="radio" value="3"/>
<label for="q7391425:7_answer3">
d. I WANT TO PRINT THIS AS WELL
</label>
</div>
</div>
</div>
</div>
<div class="outcome">
<h4 class="accesshide">
Feedback
</h4>
<div class="feedback">
<div class="rightanswer">
THE CORRECT ANSWER IS: I WANT TO PRINT THIS
</div>
</div>
</div>
</div>
</div>
'''
from bs4 import BeautifulSoup as BS
soup = BS(data, 'html.parser')
all_questions = soup.find_all('div', class_='qtext')
all_choices = soup.find_all('label')
all_answers = soup.find_all('div', class_='rightanswer')
for x in range(len(all_questions)):
print(all_questions[x].text.strip())
y = x*4
for item in all_choices[y:y+4]:
print(item.text.strip())
print(all_answers[x].text.strip())
print('---')