It would seem that I have far too much time on my hands. After the post about a Star Trek “test”, I started wondering if there could be any data to back it up and… well here we go:
Those Old Scientists
| Name |
Total Lines |
Percentage of Lines |
| KIRK |
8257 |
32.89 |
| SPOCK |
3985 |
15.87 |
| MCCOY |
2334 |
9.3 |
| SCOTT |
912 |
3.63 |
| SULU |
634 |
2.53 |
| UHURA |
575 |
2.29 |
| CHEKOV |
417 |
1.66 |
The Next Generation
| Name |
Total Lines |
Percentage of Lines |
| PICARD |
11175 |
20.16 |
| RIKER |
6453 |
11.64 |
| DATA |
5599 |
10.1 |
| LAFORGE |
3843 |
6.93 |
| WORF |
3402 |
6.14 |
| TROI |
2992 |
5.4 |
| CRUSHER |
2833 |
5.11 |
| WESLEY |
1285 |
2.32 |
Deep Space Nine
| Name |
Total Lines |
Percentage of Lines |
| SISKO |
8073 |
13.0 |
| KIRA |
5112 |
8.23 |
| BASHIR |
4836 |
7.79 |
| O’BRIEN |
4540 |
7.31 |
| ODO |
4509 |
7.26 |
| QUARK |
4331 |
6.98 |
| DAX |
3559 |
5.73 |
| WORF |
1976 |
3.18 |
| JAKE |
1434 |
2.31 |
| GARAK |
1420 |
2.29 |
| NOG |
1247 |
2.01 |
| ROM |
1172 |
1.89 |
| DUKAT |
1091 |
1.76 |
| EZRI |
953 |
1.53 |
Voyager
| Name |
Total Lines |
Percentage of Lines |
| JANEWAY |
10238 |
17.7 |
| CHAKOTAY |
5066 |
8.76 |
| EMH |
4823 |
8.34 |
| PARIS |
4416 |
7.63 |
| TUVOK |
3993 |
6.9 |
| KIM |
3801 |
6.57 |
| TORRES |
3733 |
6.45 |
| SEVEN |
3527 |
6.1 |
| NEELIX |
2887 |
4.99 |
| KES |
1189 |
2.06 |
Enterprise
| Name |
Total Lines |
Percentage of Lines |
| ARCHER |
6959 |
24.52 |
| T’POL |
3715 |
13.09 |
| TUCKER |
3610 |
12.72 |
| REED |
2083 |
7.34 |
| PHLOX |
1621 |
5.71 |
| HOSHI |
1313 |
4.63 |
| TRAVIS |
1087 |
3.83 |
| SHRAN |
358 |
1.26 |
Discovery
Important Note: As the source material is incomplete for Discovery, the following table only includes line counts from seasons 1 and 4 along with a single episode of season 2.
| Name |
Total Lines |
Percentage of Lines |
| BURNHAM |
2162 |
22.92 |
| SARU |
773 |
8.2 |
| BOOK |
586 |
6.21 |
| STAMETS |
513 |
5.44 |
| TILLY |
488 |
5.17 |
| LORCA |
471 |
4.99 |
| TARKA |
313 |
3.32 |
| TYLER |
300 |
3.18 |
| GEORGIOU |
279 |
2.96 |
| CULBER |
267 |
2.83 |
| RILLAK |
205 |
2.17 |
| DETMER |
186 |
1.97 |
| OWOSEKUN |
169 |
1.79 |
| ADIRA |
154 |
1.63 |
| COMPUTER |
152 |
1.61 |
| ZORA |
151 |
1.6 |
| VANCE |
101 |
1.07 |
| CORNWELL |
101 |
1.07 |
| SAREK |
100 |
1.06 |
| T’RINA |
96 |
1.02 |
If anyone is interested, here’s the (rather hurried, don’t judge me) Python used:
import re
from collections import defaultdict
from pathlib import Path
EPISODE_REGEX = re.compile(r"^\d+\.html?$")
LINE_REGEX = re.compile(r"^(?P<name>[A-Z']+): ")
EPISODES = Path("www.chakoteya.net")
DISCO = EPISODES / "STDisco17"
ENT = EPISODES / "Enterprise"
TNG = EPISODES / "NextGen"
TOS = EPISODES / "StarTrek"
DS9 = EPISODES / "DS9"
VOY = EPISODES / "Voyager"
NAMES = {
TOS.name: "Those Old Scientists",
TNG.name: "The Next Generation",
DS9.name: "Deep Space Nine",
VOY.name: "Voyager",
ENT.name: "Enterprise",
DISCO.name: "Discovery",
}
class CharacterLines:
def __init__(self, path: Path) -> None:
self.path = path
self.line_count = defaultdict(int)
def collect(self) -> None:
for episode in self.path.glob("*.htm*"):
if EPISODE_REGEX.match(episode.name):
for line in episode.read_text().split("\n"):
if m := LINE_REGEX.match(line):
self.line_count[m.group("name")] += 1
@property
def as_tablular_data(self) -> tuple[tuple[str, int, float], ...]:
total = sum(self.line_count.values())
r = []
for k, v in self.line_count.items():
percentage = round(v * 100 / total, 2)
if percentage > 1:
r.append((str(k), v, percentage))
return tuple(reversed(sorted(r, key=lambda _: _[2])))
def render(self) -> None:
print(f"\n\n# {NAMES[self.path.name]}\n")
print("| Name | Total Lines | Percentage of Lines |")
print("| ---------------- | :---------: | ------------------: |")
for character, total, pct in self.as_tablular_data:
print(f"| {character:16} | {total:11} | {pct:19} |")
if __name__ == "__main__":
for series in (TOS, TNG, DS9, VOY, ENT, DISCO):
counter = CharacterLines(series)
counter.collect()
counter.render()
Thanks for the update.
Poor Chekov has almost no lines, but Koenig was great as Bester on B5.